Smallest Substring Containing All Patterns
Difficulty: Hard
Given a string S and an array of patterns, find the smallest substring window [l, r] such that each pattern appears at least once within S[l..r]. Return the pair of indices or [-1, -1] if no such window exists.
Example
Input
S = xyzabcabczyx
patterns = ['abc', 'zyx']
Output
[6,11]
Explanation
- Identify occurrences of 'abc' at indices [3..5] and [6..8], and 'zyx' at [9..11].
- Combining the second 'abc' ([6..8]) with 'zyx' ([9..11]) yields the window [6,11] of length 6, which is shorter than the alternative [3,11], so the result is [6,11].
Input Format
- The first line contains an integer n denoting the length of array patterns
- The next n lines contains the elements of the array.
- The last line contains the string S.
Example
2
abc
zyx
xyzabcabczyx
Constraints
- 1 <= |S| <= 10^6
- 1 <= m <= 10^5
- 1 <= |patterns[i]| <= 10^5 for each 1 <= i <= m
- all patterns[i] are non-empty strings
- patterns may contain duplicate entries
Output Format
- Returns an integer array of length 2
Sample Input 0
1
a
a
Sample Output 0
0
0
Sample Input 1
1
b
a
Sample Output 1
-1
-1Consideration:
1. Sort the pattern and then find
2. Combined
3. When every the result is {-1,-1} or {0, n-1}, return immediatelly.
4. I pass 14/15 test case. Timeout at test case#13.
Code:
#include <bits/stdc++.h>
using namespace std;
string ltrim(const string &);
string rtrim(const string &);
#include <algorithm>
using namespace std;
/*
* Complete the 'findSmallestSubstringWindow' function below.
*
* The function is expected to return an INTEGER_ARRAY.
* The function accepts following parameters:
* 1. STRING_ARRAY patterns
* 2. STRING S
*/
int findPatternInString(const string& myPattern, const string& myString, const int& offset);
vector<int> findSmallestSubString(const vector<vector<int>>& my2DVector);
vector<vector<int>> combinedV1V2(const vector<vector<int>>& my2DV1, const vector<vector<int>>& my2DV2);
vector<int> findSmallestSubstringWindow(vector<string> patterns, string S) {
vector<vector<int>> myVector1;
vector<vector<int>> myVector2;
vector<vector<int>> myVector3;
int pos1=0;
int pattenIndex=0;
int n = S.size();
int m = patterns.size();
//cout << "n = " << n << "\n";
//cout << "m = " << m << "\n";
std::sort(patterns.begin(), patterns.end(), [](const std::string& a, const std::string& b) {
// longer string length is in front of the others.
if (a.size() != b.size()) {
return a.size() > b.size();
}
// if the size is the same, let anyone put in front.
return a > b;
});
while (1) {
pos1 = findPatternInString(patterns[pattenIndex], S, pos1);
//cout << "pos1 = " << pos1 << "\n";
if (pos1==-1) {
break;
}
else {
myVector1.push_back({pos1, pos1+patterns[pattenIndex].size()-1});
//myVector1.push_back({pos1, pos1+1});
}
if ((pos1+1)>=n) break;
pos1++;
}
if ((pattenIndex+1)<m) {
pattenIndex++;
}
else {
return findSmallestSubString(myVector1);
}
if (myVector1.empty()) return {-1,-1};
if ((myVector1.size()==1)&&(myVector1[0][1]==0)&&(myVector1[0][1]==n-1)) return {0,n-1};
pos1=0;
while (1) {
pos1 = findPatternInString(patterns[pattenIndex], S, pos1);
//cout << "pos1 = " << pos1 << "\n";
if (pos1==-1) {
break;
}
else {
myVector2.push_back({pos1, pos1+patterns[pattenIndex].size()-1});
//myVector1.push_back({pos1, pos1+1});
}
if ((pos1+1)>=n) break;
pos1++;
}
//cout << "myVector1 = " << myVector1 << "\n";
if (myVector2.empty()) return {-1,-1};
if ((myVector2.size()==1)&&(myVector2[0][1]==0)&&(myVector2[0][1]==n-1)) return {0,n-1};
myVector3 = combinedV1V2(myVector1, myVector2);
if (myVector3.empty()) return {-1,-1};
if ((myVector3.size()==1)&&(myVector3[0][1]==0)&&(myVector3[0][1]==n-1)) return {0,n-1};
while (++pattenIndex<m)
{
myVector1 = myVector3;
myVector2.clear();
pos1=0;
while (1) {
pos1 = findPatternInString(patterns[pattenIndex], S, pos1);
//cout << "pos1 = " << pos1 << "\n";
if (pos1==-1) {
break;
}
else {
myVector2.push_back({pos1, pos1+patterns[pattenIndex].size()-1});
//myVector1.push_back({pos1, pos1+1});
}
if ((pos1+1)>=n) break;
pos1++;
if ((myVector2.size()==1)&&(myVector2[0][1]==0)&&(myVector2[0][1]==n-1)) return {0,n-1};
}
//cout << "myVector1 = " << myVector1 << "\n";
if (myVector2.empty()) return {-1,-1};
myVector3 = combinedV1V2(myVector1, myVector2);
if (myVector3.empty()) return {-1,-1};
if ((myVector3.size()==1)&&(myVector3[0][1]==0)&&(myVector3[0][1]==n-1)) return {0,n-1};
}
//if (myVector3.empty()) return {-1,-1};
return findSmallestSubString(myVector3);
}
vector<vector<int>> combinedV1V2(const vector<vector<int>>& my2DV1, const vector<vector<int>>& my2DV2)
{
vector<vector<int>> my2DV3;
for (auto v1: my2DV1) {
for (auto v2: my2DV2) {
my2DV3.push_back({std::min(v1[0], v2[0]), std::max(v1[1], v2[1])});
}
}
if (my2DV3.empty()) return {{-1,-1}};
// remove the larger covering range elements.
// 1. Sort the start_index first
sort(my2DV3.begin(), my2DV3.end(), [](const vector<int>& a, const vector<int>& b) {
if (a[0] != b[0]) return a[0] < b[0];
return a[1] > b[1];
});
// 2. Remove the larger range which fully covered the smaller range
vector<vector<int>> result;
for (const auto& current : my2DV3) {
// if result is not empty, and the range is covered by the last result
while (!result.empty() && current[0] >= result.back()[0] && current[1] <= result.back()[1]) {
result.pop_back(); // remove that larger range
}
result.push_back(current);
}
return result;
}
vector<int> findSmallestSubString(const vector<vector<int>>& my2DVector)
{
vector<int> myVector;
int min_size;
if (my2DVector.empty()) return {-1,-1};
min_size = my2DVector[0][1] - my2DVector[0][0] + 1;
myVector = my2DVector[0];
for (const auto& vector_a: my2DVector) {
if ((vector_a[1]-vector_a[0]+1)<min_size) {
myVector=vector_a;
min_size = vector_a[1]-vector_a[0]+1;
}
}
return myVector;
}
int findPatternInString(const string& myPattern, const string& myString, const int& offset)
{
int pos;
string mySubString = myString.substr(offset);
pos = mySubString.find(myPattern);
if (string::npos==pos) return -1;
return (pos+offset);
}