I need to find the longest non-overlapping repeated substring in a String. I have the suffix tree and suffix array of the string available.
When overlapping is allowed,
Generate suffix array and sort in O(nlogn).ps: There is more effective algorithm like DC3 and Ukkonen algorithm. example:
String : ababc
Suffix array:
start-index of substring | substring
0 - ababc
2 - abc
1 - babc
3 - bc
4 - c
compare each two consecutive substring and get common prefix with following constraint:
Say i1 is index of substring "ababc": 0
say i2 is index of substring "abc":2
common prefix is "ab" , length of common prefix is len
abs(i1-i2) >len //avoid overlap
go through suffix array with solution, and you will get the result of "ababc", which is "ab";
The whole solution will run O(nlogn)
However, there will be a special case: "aaaaa" that this solution can't solve thoroughly.
Welcome to discuss and come up to a solution of O(nlogn) but not O(n^2)
Complete code:
#include <bits/stdc++.h>
using namespace std;
int cplen(string a,string b){
int i,to=min(a.length(),b.length());
int ret=0;
for(i=0;i<to;i++){
if(a[i]==b[i])ret++;
else {
return ret;}
}
return ret;
}
int main(){
{
int len,i;
string str;
cin>>str;
len=str.length();
vector<pair<string,int> >vv;
map<char,int>hatbc;
string pp="";
for(i=len-1;i>=0;i--){
hatbc[str[i]]++;
pp=str.substr(i,len-i);
vv.push_back(make_pair(pp,i));
}
if(len==1 || (int)hatbc.size()==len){
printf("0\n");
continue;
}
if(hatbc.size()==1){
printf("%d\n",len/2);
continue;
}
char prev=str[0];
int foo=1,koo=0;
for(i=1;i<len;){
while(str[i]==prev && i<len){i++;foo++;}
prev=str[i];
i+=1;
if(koo<foo)koo=foo;
foo=1;
}
sort(vv.begin(),vv.end());
int ans=0;
ans=koo/2;
for(i=1;i<(int)vv.size();i++){
int j=i-1;
int a=vv[j].second,b=vv[i].second;
string sa=vv[j].first,sb=vv[i].first;
int cpl;
cpl=cplen(sa,sb);
if(abs(a-b)>=cpl)
ans=max(ans,cpl);
}
printf("%d\n",ans);
}
return 0;
}
Complexity : O(n*log(n)) (due to sorting)