string string string
Time Limit: 2000/1000 MS (Java/Others) Memory Limit: 32768/32768 K (Java/Others)Total Submission(s): 989 Accepted Submission(s): 285
Given a string s, we define a substring that happens exactly k times as an important string, and you need to find out how many substrings which are important strings.
For each test case, there are two lines:
the first line contains an integer k ( k≥1) which is described above;
the second line contain a string s ( length(s)≤105).
It's guaranteed that ∑length(s)≤2∗106.
题意: 给出一个字符串,询问恰好出现k次的子串的种数。
后缀数组+ST表
对于给定字符串,跑出sa[]和height数组,然后预处理出ST表。
然后类似于Two Pointers的做法,维护一个长度为k-1的height数组的区间,一次从k到length(s)推一遍。
ans += st.query_min(i - k + 2, i) - max(st.query_min(i - k + 2, i+1), st.query_min(i - k + 2 - 1, i));
这里维护的是k个后缀的最大公共前缀,然后减去max(st.query_min(i - k + 2, i+1), st.query_min(i - k + 2 - 1, i))剩下的就是刚好出现k次的那部分,
比如lcp(i-k+2, i) == xabc, 然后 max(lcp(i-k+2, i+1), lcp(i-k+1, i)) == x, 则xabc
然后这里要注意,1、对于k == 1时要特殊处理, sigma{n - sa[i] - max(height[i], height[i+1)}
a
aba
ba
当 i = 2时,x = a, 则 xba、xb为恰好出现1次的子串
2、虽然height数组不用重置,但算法中用到height[n+1]所以要每次把height[n+1]手动置零
且虽然ST也不用重置,所以把ST里的这个位置也重置, st.st_prepare(n+2, height);
时间复杂度 O(nlogn)
空间复杂度 O(nlogn)
#include <iostream>
#include <cstdio>
#include <string>
#include <cstring>
#include <vector>
#include <map>
#include <algorithm>
using namespace std;
typedef long long LL;
const int MAXN = 2e5 + 8;
int sa[MAXN], height[MAXN];
int _rank[MAXN], t1[MAXN], t2[MAXN], c[MAXN];
string s;
inline void get_sa(const int &n, int m)
{
int i, k, *x = t1, *y = t2, p, j;
for(i = 0; i < m; i++) c[i] = 0;
for(i = 0; i < n; i++) ++ c[x[i] = s[i]];
for(i = 1; i < m; i++) c[i] += c[i - 1];
for(i = n - 1; i >= 0; i--) sa[-- c[x[i]]] = i;
for(k = 1; k <= n; k <<= 1){
p = 0;
for(i = n - k; i < n; i++) y[p ++] = i;
for(i = 0; i < n; i++) if(sa[i] >= k) y[p ++] = sa[i] - k;
for(i = 0; i < m; i++) c[i] = 0;
for(i = 0; i < n; i++) ++ c[x[y[i]]];
for(i = 1; i < m; i++) c[i] += c[i - 1];
for(i = n - 1; i >= 0; i--) sa[--c[x[y[i]]]] = y[i];
swap(x, y), p = 1, x[sa[0]] = 0;
for(i = 1; i < n; i++)
x[sa[i]] = (y[sa[i-1]] == y[sa[i]] && y[sa[i-1]+k] == y[sa[i]+k]) ? p - 1 : p ++;
if(p >= n) break;
m = p;
}
k = 0;
for(i = 0; i < n; i++) _rank[sa[i]] = i;
for(i = 0; i < n; i++){
if(k) --k; if(!_rank[i]) continue;
j = sa[_rank[i] - 1];
while(s[i + k] == s[j + k]) k++;
height[_rank[i]] = k;
}
}
inline void print(const int &n)
{
for(int i = 1; i <= n; i++){
cout << i << " : " << _rank[sa[i]] << " " << sa[i] << endl;
for(int j = sa[i]; j < n; j++){
cout << s[j];
}
cout << endl;
}
cout << endl;
}
struct ST_list{
int stTable[MAXN][32], preLog2[MAXN];
void init(int n){
preLog2[1] = 0;
for(int i = 2; i <= n; i++){
preLog2[i] = preLog2[i-1];
if((1 << (preLog2[i] + 1)) == i){
preLog2[i]++;
}
}
}
void st_prepare(int n, int arr[]){
for(int i = n - 1; i >= 0; i--){
stTable[i][0] = arr[i];
for(int j = 1; (i + (1 << j) - 1) < n; j++){
stTable[i][j] = min(stTable[i][j - 1], stTable[i + (1 << j - 1)][j - 1]); //1
}
}
}
int query_min(int l, int r){
int len = r - l + 1, k = preLog2[len];
return min(stTable[l][k], stTable[r - (1 << k) + 1][k]); //2
}
}st;
int main()
{
#ifdef LOCAL
freopen("a.txt", "r", stdin);
//freopen("a.out", "w", stdout);
#endif // LOCAL
ios::sync_with_stdio(false); cin.tie(0);
int T, n, i, k;
LL ans;
st.init(1e5+2);
cin >> T;
while(T--){
cin >> k;
cin >> s;
n = s.size(); height[n+1] = 0;
get_sa(n+1, 256); st.st_prepare(n+2, height);
//使用st表示要访问n+1,由于0~n-1based 所以数组的大小为n+2,不然height[n+1] = 0这个数不会进入st表
//print(n);
ans = 0;
if(k >= 2){
for(i = k; i <= n; i++){
ans += st.query_min(i - k + 2, i) - max(st.query_min(i - k + 2, i+1), st.query_min(i - k + 2 - 1, i));
}
}
else{
for(i = 1; i <= n; i++){
ans += (n - sa[i]) - max(height[i], height[i+1]);
}
}
cout << ans << "\n";
}
return 0;
}
Thank you!
------from ProLights
来源:oschina
链接:https://my.oschina.net/u/4355012/blog/4462808