2026年3月30日 星期一

[HackerRank] Smallest Substring Containing All Patterns

Smallest Substring Containing All Patterns

Difficulty: Hard

Given a string S and an array of patterns, find the smallest substring window [l, r] such that each pattern appears at least once within S[l..r]. Return the pair of indices or [-1, -1] if no such window exists.

Example

Input

S = xyzabcabczyx
patterns = ['abc', 'zyx']

Output

[6,11]

Explanation

- Identify occurrences of 'abc' at indices [3..5] and [6..8], and 'zyx' at [9..11]. 
- Combining the second 'abc' ([6..8]) with 'zyx' ([9..11]) yields the window [6,11] of length 6, which is shorter than the alternative [3,11], so the result is [6,11].

Input Format

  • The first line contains an integer n denoting the length of array patterns
  • The next n lines contains the elements of the array.
  • The last line contains the string S.

Example

2
abc
zyx
xyzabcabczyx

Constraints

  • 1 <= |S| <= 10^6
  • 1 <= m <= 10^5
  • 1 <= |patterns[i]| <= 10^5 for each 1 <= i <= m
  • all patterns[i] are non-empty strings
  • patterns may contain duplicate entries

Output Format

  • Returns an integer array of length 2

Sample Input 0

1
a
a

Sample Output 0

0 
0

Sample Input 1

1
b
a

Sample Output 1

-1
-1

Consideration:

1. Sort the patterns and remove the sub-string patterns. (one pattern is another pattern's sub-string.): This way, we may reduce the number of patterns to reduce finding/searching time.

2. Then find one pattern's matched location in given string S.

3. Find another pattern's matched location in given string S.

4. Combined 2 & 3. It's union (min(start), max(end)) of the matched location of 2 different patterns.

5. Remove the larger range which is completely covered the smaller range.

6. Find next pattern's matched location in given string S. Combined with the range vector in step5. Remove the larger range again.

7. Repeat step6 till all patterns are checked.

8. When every the result is {-1,-1} or {0, n-1}, return immediately.

9. Return the smallest the range vector.

10. Timeout at test case#13 before we do "patterns2 = removeSubstrings(patterns);"

Code:

#include <bits/stdc++.h>

using namespace std;

string ltrim(const string &);
string rtrim(const string &);


#include <algorithm>

/*
 * Complete the 'findSmallestSubstringWindow' function below.
 *
 * The function is expected to return an INTEGER_ARRAY.
 * The function accepts following parameters:
 *  1. STRING_ARRAY patterns
 *  2. STRING S
 */

int findPatternInString(const string& myPattern, const string& myString, const int& offset);
vector<int> findSmallestSubString(const vector<vector<int>>& my2DVector);
vector<vector<int>> combinedV1V2(const vector<vector<int>>& my2DV1, const vector<vector<int>>& my2DV2);
vector<string> removeSubstrings(vector<string> patterns);

vector<int> findSmallestSubstringWindow(vector<string> patterns, string S) {
    vector<vector<int>> myVector1;
    vector<vector<int>> myVector2;
    vector<vector<int>> myVector3;
    vector<string> patterns2;
    int pos1=0;
    int pattenIndex=0;
    int n = S.size();

    //cout << "n = " << n << "\n";

    std::sort(patterns.begin(), patterns.end(), [](const std::string& a, const std::string& b) {
        // longer string length is in front of the others.
        if (a.size() != b.size()) {
            return a.size() > b.size();
        }
        // if the size is the same, let anyone put in front.
        return a > b;
    });
    patterns2 = removeSubstrings(patterns);
    int m = patterns2.size();
    //cout << "m = " << m << "\n";
    //patterns2=patterns;

    while (1) {
        pos1 = findPatternInString(patterns2[pattenIndex], S, pos1);
        //cout << "pos1 = " << pos1 << "\n";
        if (pos1==-1) {
            break;
        }
        else {
            myVector1.push_back({pos1, pos1+(int)(patterns2[pattenIndex].size())-1});
            //myVector1.push_back({pos1, pos1+1});    
        }
        if ((pos1+1)>=n) break;
        pos1++;
    }
    if ((pattenIndex+1)<m) {
        pattenIndex++;
    }
    else {
        return findSmallestSubString(myVector1);
    }
    if (myVector1.empty()) return {-1,-1};
    if ((myVector1.size()==1)&&(myVector1[0][1]==0)&&(myVector1[0][1]==n-1)) return {0,n-1};
    pos1=0;
    while (1) {

        pos1 = findPatternInString(patterns2[pattenIndex], S, pos1);
        //cout << "pos1 = " << pos1 << "\n";
        if (pos1==-1) {
            break;
        }
        else {
            myVector2.push_back({pos1, pos1+(int)(patterns2[pattenIndex].size())-1});
            //myVector1.push_back({pos1, pos1+1});    
        }
        if ((pos1+1)>=n) break;
        pos1++;
       
    }
    //cout << "myVector1 = " << myVector1 << "\n";
    if (myVector2.empty()) return {-1,-1};
    if ((myVector2.size()==1)&&(myVector2[0][1]==0)&&(myVector2[0][1]==n-1)) return {0,n-1};
   
    myVector3 = combinedV1V2(myVector1, myVector2);
    if (myVector3.empty()) return {-1,-1};
    if ((myVector3.size()==1)&&(myVector3[0][1]==0)&&(myVector3[0][1]==n-1)) return {0,n-1};
    while (++pattenIndex<m)
    {
        myVector1 = myVector3;
        myVector2.clear();
        pos1=0;

        while (1) {

            pos1 = findPatternInString(patterns2[pattenIndex], S, pos1);
            //cout << "pos1 = " << pos1 << "\n";
            if (pos1==-1) {
                break;
            }
            else {
                myVector2.push_back({pos1, pos1+(int)(patterns2[pattenIndex].size())-1});
                //myVector1.push_back({pos1, pos1+1});    
            }
            if ((pos1+1)>=n) break;
            pos1++;
            if ((myVector2.size()==1)&&(myVector2[0][1]==0)&&(myVector2[0][1]==n-1)) return {0,n-1};      
        }
        //cout << "myVector1 = " << myVector1 << "\n";
        if (myVector2.empty()) return {-1,-1};
       
        myVector3 = combinedV1V2(myVector1, myVector2);
        if (myVector3.empty()) return {-1,-1};
        if ((myVector3.size()==1)&&(myVector3[0][1]==0)&&(myVector3[0][1]==n-1)) return {0,n-1};
    }
   
    //if (myVector3.empty()) return {-1,-1};
    return findSmallestSubString(myVector3);
}


vector<string> removeSubstrings(vector<string> patterns)
{
    vector<string> result;
    result.push_back(patterns[0]);
    for (int i=1; i<patterns.size(); i++) {
        bool isSub = false;
        for (const auto& s: result) {
            if ( s.find(patterns[i]) != std::string::npos) {
                isSub = true;
                break;
            }
        }
        if (isSub==false) {
            result.push_back(patterns[i]);
        }
    }
    return result;
}


vector<vector<int>> combinedV1V2(const vector<vector<int>>& my2DV1, const vector<vector<int>>& my2DV2)
{
    vector<vector<int>> my2DV3;
    for (auto v1: my2DV1) {
        for (auto v2: my2DV2) {
            my2DV3.push_back({std::min(v1[0], v2[0]), std::max(v1[1], v2[1])});
        }
    }
    if (my2DV3.empty()) return {{-1,-1}};
    // remove the larger covering range elements.
    // 1. Sort the start_index first
    sort(my2DV3.begin(), my2DV3.end(), [](const vector<int>& a, const vector<int>& b) {
        if (a[0] != b[0]) return a[0] < b[0];
        return a[1] > b[1];
    });
    // 2. Remove the larger range which fully covered the smaller range
    vector<vector<int>> result;
    for (const auto& current : my2DV3) {
        // if result is not empty, and the range is covered by the last result
        while (!result.empty() && current[0] >= result.back()[0] && current[1] <= result.back()[1]) {
            result.pop_back(); // remove that larger range
        }
        result.push_back(current);
    }
   
    return result;
}

vector<int> findSmallestSubString(const vector<vector<int>>& my2DVector)
{
    vector<int> myVector;
    int min_size;
    if (my2DVector.empty()) return {-1,-1};
    min_size = my2DVector[0][1] - my2DVector[0][0] + 1;
    myVector = my2DVector[0];
    for (const auto& vector_a: my2DVector) {
        if ((vector_a[1]-vector_a[0]+1)<min_size) {
            myVector=vector_a;
            min_size = vector_a[1]-vector_a[0]+1;
        }
    }
    return myVector;
}

int findPatternInString(const string& myPattern, const string& myString, const int& offset)
{
    int pos;
    string mySubString = myString.substr(offset);
    pos = mySubString.find(myPattern);
    if (string::npos==pos) return -1;
    return (pos+offset);
}