2026年3月30日 星期一

[HackerRank] Smallest Substring Containing All Patterns

Smallest Substring Containing All Patterns

Difficulty: Hard

Given a string S and an array of patterns, find the smallest substring window [l, r] such that each pattern appears at least once within S[l..r]. Return the pair of indices or [-1, -1] if no such window exists.

Example

Input

S = xyzabcabczyx
patterns = ['abc', 'zyx']

Output

[6,11]

Explanation

- Identify occurrences of 'abc' at indices [3..5] and [6..8], and 'zyx' at [9..11]. 
- Combining the second 'abc' ([6..8]) with 'zyx' ([9..11]) yields the window [6,11] of length 6, which is shorter than the alternative [3,11], so the result is [6,11].

Input Format

  • The first line contains an integer n denoting the length of array patterns
  • The next n lines contains the elements of the array.
  • The last line contains the string S.

Example

2
abc
zyx
xyzabcabczyx

Constraints

  • 1 <= |S| <= 10^6
  • 1 <= m <= 10^5
  • 1 <= |patterns[i]| <= 10^5 for each 1 <= i <= m
  • all patterns[i] are non-empty strings
  • patterns may contain duplicate entries

Output Format

  • Returns an integer array of length 2

Sample Input 0

1
a
a

Sample Output 0

0 
0

Sample Input 1

1
b
a

Sample Output 1

-1
-1

Consideration:

1. Sort the patterns and remove the sub-string patterns. (one pattern is another pattern's sub-string.): This way, we may reduce the number of patterns to reduce finding/searching time.

2. Then find one pattern's matched location in given string S.

3. Find another pattern's matched location in given string S.

4. Combined 2 & 3. It's union (min(start), max(end)) of the matched location of 2 different patterns.

5. Remove the larger range which is completely covered the smaller range.

6. Find next pattern's matched location in given string S. Combined with the range vector in step5. Remove the larger range again.

7. Repeat step6 till all patterns are checked.

8. When every the result is {-1,-1} or {0, n-1}, return immediately.

9. Return the smallest the range vector.

10. Timeout at test case#13 before we do "patterns2 = removeSubstrings(patterns);"

Code:

#include <bits/stdc++.h>

using namespace std;

string ltrim(const string &);
string rtrim(const string &);


#include <algorithm>

/*
 * Complete the 'findSmallestSubstringWindow' function below.
 *
 * The function is expected to return an INTEGER_ARRAY.
 * The function accepts following parameters:
 *  1. STRING_ARRAY patterns
 *  2. STRING S
 */

int findPatternInString(const string& myPattern, const string& myString, const int& offset);
vector<int> findSmallestSubString(const vector<vector<int>>& my2DVector);
vector<vector<int>> combinedV1V2(const vector<vector<int>>& my2DV1, const vector<vector<int>>& my2DV2);
vector<string> removeSubstrings(vector<string> patterns);

vector<int> findSmallestSubstringWindow(vector<string> patterns, string S) {
    vector<vector<int>> myVector1;
    vector<vector<int>> myVector2;
    vector<vector<int>> myVector3;
    vector<string> patterns2;
    int pos1=0;
    int pattenIndex=0;
    int n = S.size();

    //cout << "n = " << n << "\n";

    std::sort(patterns.begin(), patterns.end(), [](const std::string& a, const std::string& b) {
        // longer string length is in front of the others.
        if (a.size() != b.size()) {
            return a.size() > b.size();
        }
        // if the size is the same, let anyone put in front.
        return a > b;
    });
    patterns2 = removeSubstrings(patterns);
    int m = patterns2.size();
    //cout << "m = " << m << "\n";
    //patterns2=patterns;

    while (1) {
        pos1 = findPatternInString(patterns2[pattenIndex], S, pos1);
        //cout << "pos1 = " << pos1 << "\n";
        if (pos1==-1) {
            break;
        }
        else {
            myVector1.push_back({pos1, pos1+(int)(patterns2[pattenIndex].size())-1});
            //myVector1.push_back({pos1, pos1+1});    
        }
        if ((pos1+1)>=n) break;
        pos1++;
    }
    if ((pattenIndex+1)<m) {
        pattenIndex++;
    }
    else {
        return findSmallestSubString(myVector1);
    }
    if (myVector1.empty()) return {-1,-1};
    if ((myVector1.size()==1)&&(myVector1[0][1]==0)&&(myVector1[0][1]==n-1)) return {0,n-1};
    pos1=0;
    while (1) {

        pos1 = findPatternInString(patterns2[pattenIndex], S, pos1);
        //cout << "pos1 = " << pos1 << "\n";
        if (pos1==-1) {
            break;
        }
        else {
            myVector2.push_back({pos1, pos1+(int)(patterns2[pattenIndex].size())-1});
            //myVector1.push_back({pos1, pos1+1});    
        }
        if ((pos1+1)>=n) break;
        pos1++;
       
    }
    //cout << "myVector1 = " << myVector1 << "\n";
    if (myVector2.empty()) return {-1,-1};
    if ((myVector2.size()==1)&&(myVector2[0][1]==0)&&(myVector2[0][1]==n-1)) return {0,n-1};
   
    myVector3 = combinedV1V2(myVector1, myVector2);
    if (myVector3.empty()) return {-1,-1};
    if ((myVector3.size()==1)&&(myVector3[0][1]==0)&&(myVector3[0][1]==n-1)) return {0,n-1};
    while (++pattenIndex<m)
    {
        myVector1 = myVector3;
        myVector2.clear();
        pos1=0;

        while (1) {

            pos1 = findPatternInString(patterns2[pattenIndex], S, pos1);
            //cout << "pos1 = " << pos1 << "\n";
            if (pos1==-1) {
                break;
            }
            else {
                myVector2.push_back({pos1, pos1+(int)(patterns2[pattenIndex].size())-1});
                //myVector1.push_back({pos1, pos1+1});    
            }
            if ((pos1+1)>=n) break;
            pos1++;
            if ((myVector2.size()==1)&&(myVector2[0][1]==0)&&(myVector2[0][1]==n-1)) return {0,n-1};      
        }
        //cout << "myVector1 = " << myVector1 << "\n";
        if (myVector2.empty()) return {-1,-1};
       
        myVector3 = combinedV1V2(myVector1, myVector2);
        if (myVector3.empty()) return {-1,-1};
        if ((myVector3.size()==1)&&(myVector3[0][1]==0)&&(myVector3[0][1]==n-1)) return {0,n-1};
    }
   
    //if (myVector3.empty()) return {-1,-1};
    return findSmallestSubString(myVector3);
}


vector<string> removeSubstrings(vector<string> patterns)
{
    vector<string> result;
    result.push_back(patterns[0]);
    for (int i=1; i<patterns.size(); i++) {
        bool isSub = false;
        for (const auto& s: result) {
            if ( s.find(patterns[i]) != std::string::npos) {
                isSub = true;
                break;
            }
        }
        if (isSub==false) {
            result.push_back(patterns[i]);
        }
    }
    return result;
}


vector<vector<int>> combinedV1V2(const vector<vector<int>>& my2DV1, const vector<vector<int>>& my2DV2)
{
    vector<vector<int>> my2DV3;
    for (auto v1: my2DV1) {
        for (auto v2: my2DV2) {
            my2DV3.push_back({std::min(v1[0], v2[0]), std::max(v1[1], v2[1])});
        }
    }
    if (my2DV3.empty()) return {{-1,-1}};
    // remove the larger covering range elements.
    // 1. Sort the start_index first
    sort(my2DV3.begin(), my2DV3.end(), [](const vector<int>& a, const vector<int>& b) {
        if (a[0] != b[0]) return a[0] < b[0];
        return a[1] > b[1];
    });
    // 2. Remove the larger range which fully covered the smaller range
    vector<vector<int>> result;
    for (const auto& current : my2DV3) {
        // if result is not empty, and the range is covered by the last result
        while (!result.empty() && current[0] >= result.back()[0] && current[1] <= result.back()[1]) {
            result.pop_back(); // remove that larger range
        }
        result.push_back(current);
    }
   
    return result;
}

vector<int> findSmallestSubString(const vector<vector<int>>& my2DVector)
{
    vector<int> myVector;
    int min_size;
    if (my2DVector.empty()) return {-1,-1};
    min_size = my2DVector[0][1] - my2DVector[0][0] + 1;
    myVector = my2DVector[0];
    for (const auto& vector_a: my2DVector) {
        if ((vector_a[1]-vector_a[0]+1)<min_size) {
            myVector=vector_a;
            min_size = vector_a[1]-vector_a[0]+1;
        }
    }
    return myVector;
}

int findPatternInString(const string& myPattern, const string& myString, const int& offset)
{
    int pos;
    string mySubString = myString.substr(offset);
    pos = mySubString.find(myPattern);
    if (string::npos==pos) return -1;
    return (pos+offset);
}



[HankerRank] Max Unique Substring Length in a Session

Max Unique Substring Length in a Session

Difficulty: Medium

Given a string of lowercase letters with sessions separated by '' characters, find the maximum length of a substring with all distinct letters within any single session.

Example

Input

sessionString = abcabcbb

Output

3

Explanation

- There is only one session: "abcabcbb". 
- Scanning with a sliding window for distinct letters, the longest substrings without repeats are "abc", "bca" and so on, each of length 3. 
- Therefore, the result is 3.

Input Format

  • A single line containing the string sessionString.

Constraints

  • 0 <= S.length <= 100000
  • For all i in [0, S.length]: S[i] is either '*' or a lowercase letter 'a' to 'z'
  • Each session is defined as a maximal contiguous substring of S without '*' characters
  • Number of sessions (segments between '*') is at most S.length + 1

Output Format

  • A single integer denoting the maximum length among all substrings. If sessionString is empty or contains only '*', output 0.

Sample Input 0

*

Sample Output 0

0

Sample Input 1

aa

Sample Output 1

1 


Consideration:

1. Set function "mySet.count" could check if the coming character was appeared in the substring already.

2. If this character is still unique, add to mySet by mySet.insert function.

3. If this character was appeared in the substring, find the index which had the same character as current end_index points to. Remove those characters before this index from mySet by mySet.erase function. The new start_index will be that index + 1.

4. When there is '*', stop operation -> calculate current max unique substring length -> clear mySet by mySet.clear function -> move to check the substring starting after this '*'.

5. Whenever end_index reaches to the end of the input string, calculate current max unique substring length and return this value.


Code:

/*
 * Complete the 'maxDistinctSubstringLengthInSessions' function below.
 *
 * The function is expected to return an INTEGER.
 * The function accepts STRING sessionString as parameter.
 */

int maxDistinctSubstringLengthInSessions(string sessionString) {
    set<char> mySet;
    int n = sessionString.size();
    //cout << "n = " << n << "\n";
    int start_index=0, end_index=0;
    int maxLength=0, curLength;
    if (n==0) return 0;

    while (1) {
        if (sessionString[end_index]=='*') {
            curLength = end_index - start_index;
            if (curLength>maxLength) {
                maxLength = curLength;
            }
            if ((end_index+1) < n) {
                start_index = end_index + 1;
                end_index = start_index;
            }
            else {
                return maxLength;
            }
            mySet.clear();
        }
        else if (!(mySet.count(sessionString[end_index]))) {
            mySet.insert(sessionString[end_index]);
            if ((end_index+1)<n) {
                end_index++;
            }
            else {
                curLength = mySet.size();
                if (curLength>maxLength) {
                    maxLength = curLength;
                }
                return maxLength;
            }
        }
        else {
            curLength = mySet.size();
            if (curLength>maxLength) {
                maxLength = curLength;
            }
            while (sessionString[start_index]!=sessionString[end_index]) {
                mySet.erase(sessionString[start_index]);
                start_index++;
            }
            start_index++;
            if ((end_index+1) < n) {
                end_index++;
            }      
            else {
                return maxLength;
            }
        }
    }

    return maxLength;
}