GRE Words

Problem Description

Recently George is preparing for the Graduate Record Examinations (GRE for short). Obviously the most important thing is reciting the words.
Now George is working on a word list containing N words.
He has so poor a memory that it is too hard for him to remember all of the words on the list. But he does find a way to help him to remember. He finds that if a sequence of words has a property that for all pairs of neighboring words, the previous one is a substring of the next one, then the sequence of words is easy to remember.
So he decides to eliminate some words from the word list first to make the list easier for him. Meantime, he doesn't want to miss the important words. He gives each word an importance, which is represented by an integer ranging from -1000 to 1000, then he wants to know which words to eliminate to maximize the sum of the importance of remaining words. Negative importance just means that George thought it useless and is a waste of time to recite the word.
Note that although he can eliminate any number of words from the word list, he can never change the order between words. In another word, the order of words appeared on the word list is consistent with the order in the input. In addition, a word may have different meanings, so it can appear on the list more than once, and it may have different importance in each occurrence.

Input

The first line contains an integer T(1 <= T <= 50), indicating the number of test cases.
Each test case contains several lines.
The first line contains an integer N(1 <= N <= 2 * 10⁴), indicating the number of words.
Then N lines follows, each contains a string S_i_i, representing the word and its importance. S_i
You can assume that the total length of all words will not exceeded 3 * 10⁵.

Output

For each test case in the input, print one line: "Case #X: Y", where X is the test case number (starting with 1) and Y is the largest importance of the remaining sequence of words.

Sample Input

1 5 a 1 ab 2 abb 3 baba 5 abbab 8

Sample Output

Case #1: 14

Source

2011 Asia ChengDu Regional Contest

定义dp[i]表示，前i个串，选第i个的最大收益。max{dp[i]} (i∈[1, n])即为所求。

有转移方程dp[i] = max{dp[j]} + w[i] (j∈[1, i) 且串j是串i的子串)

看到多串时，首先考虑AC自动机。首先介绍一个AC自动机性质。方便描述，我们定义“节点对应的字符串”为――字典树的根到这个节点形成的字符串。这个性质就是：fail[x]对应的串一定是x对应的串的后缀！！！！！！而字典树上的节点对应的字符串，一定是整个串的一个前缀。这就好办了。子串就是前缀的后缀呀！！！

初步想法：在AC自动机上，字符串i的每个节点（即每个前缀）都不停跑fail指针（前缀的后缀即为子串），如果这个节点代表某一个整串，那就试试能不能更新。但是这样会TLE。

AC自动机中，一个常见套路就是把所有fail都反过来，就能构成一棵以字典树root为根的fail树。其中孩子们是祖先的子串（后缀，即一个点可以被所有祖先更新dp值，而一个点可以把整棵子树更新（如果足够大来更新的话））

于是考虑求dfs序，然后用线段树维护。

#include <cstdio> #include <cstring> #include <algorithm> #include <queue> #define N 300005 #define M 20005 using namespace std; queue<int> Q; int n, T, tot; char p[N]; int w[M], f[M], length[M]; struct AC_Automaton { 	int c[N][26], fail[N], cnt, head[N]; 	struct edge {int to, next;}e[N]; int ecnt; 	int dfn, st[N], ed[N];	 	int s[N<<2], tag[N<<2]; 	inline void init() {dfn = 0; memset(c[0], 0, sizeof(c[0])); memset(s, 0, sizeof(s)); memset(tag, 0, sizeof(tag)); cnt = -1; ecnt = 0; memset(fail, 0, sizeof(fail)); memset(head, 0, sizeof(head));} 	inline void add(int x, int y) {e[++ecnt].to = y; e[ecnt].next = head[x]; head[x] = ecnt;} 	inline int newnode() {++cnt; memset(c[cnt], 0, sizeof(c[cnt])); return cnt;} 	inline void ins(char *s, int id) { 		int len = strlen(s+tot+1); int now = 0; 		for(int i = tot + 1; i <= tot + len; ++i) { 			int v = s[i] - 'a'; 			if(!c[now][v]) c[now][v] = newnode(); 			now = c[now][v]; 		} 		length[id] = len; tot+= len; 	} 	inline void build() { 		for(int i = 0; i < 26; ++i) if(c[0][i] > 0) fail[c[0][i]] = 0, Q.push(c[0][i]); 		while(!Q.empty()) { 			int u = Q.front(); Q.pop(); 			for(int i = 0; i < 26; ++i) 				if(c[u][i]) fail[c[u][i]] = c[fail[u]][i], Q.push(c[u][i]); 				else if(!c[u][i]) c[u][i] = c[fail[u]][i]; 		} 	} 	inline void set_tree() {for(int i = 1; i <= cnt; ++i) add(fail[i], i);} 	void dfs(int x, int f) { 		st[x] = ++dfn; 		for(int i = head[x]; i; i = e[i].next) { 			int y = e[i].to; 			if(y != f) dfs(y, x); 		} 		ed[x] = dfn; 	} 	inline void pushdown(int p) { 		if(tag[p]) { 			tag[p<<1] = max(tag[p], tag[p<<1]); tag[p<<1|1] = max(tag[p], tag[p<<1|1]); 			s[p<<1] = max(tag[p], s[p<<1]); s[p<<1|1] = max(tag[p], s[p<<1|1]); tag[p] = 0; 		} 	} 	inline void update(int p) {s[p] = max(s[p<<1], s[p<<1|1]);} 	int query(int p, int l, int r, int x) { 		if(l == r) return s[p]; 		if(tag[p]) pushdown(p); int mid = (l + r)>>1; 		if(x <= mid) return query(p<<1, l, mid, x); 		else return query(p<<1|1, mid + 1, r, x); 	} 	void change(int p, int l, int r, int x, int y, int key) { 		if(x <= l && r <= y) { 			tag[p] = max(tag[p], key); 			s[p] = max(s[p], key); 			return ; 		} 		pushdown(p); int mid = (l + r)>>1; 		if(x <= mid) change(p<<1, l, mid, x, y, key); 		if(mid + 1 <= y) change(p<<1|1, mid + 1, r, x, y, key); 		update(p); 	} 	inline void solve() { 		tot = 0; 		for(int i = 1; i <= n; ++i) { 			f[i] = 0; int now = 0; 			for(int j = 1; j <= length[i]; ++j) { 				++tot; int v = p[tot] - 'a'; 				now = c[now][v]; 				f[i] = max(f[i], query(1, 1, dfn, st[now])); 			} 			f[i]+= w[i]; f[i] = max(0, f[i]); 			change(1, 1, dfn, st[now], ed[now], f[i]); 		} 		int ans = 0; 		for(int i = 1; i <= n; ++i) ans = max(ans, f[i]); 		printf("%d\n", ans); 	} }AC; int main() { 	scanf("%d", &T); 	for(int tc = 1; tc <= T; ++tc) { 		printf("Case #%d: ", tc); 		scanf("%d", &n); AC.init(); 		tot = 0; 		for(int i = 1; i <= n; ++i) { 			scanf("%s%d", p+tot+1, &w[i]); 			AC.ins(p, i); 		} 		AC.build(); AC.set_tree(); AC.dfs(0, 0); AC.solve(); 	} 	return 0; }

文章来源: 【Hdu4117】GRE Words――AC自动机+dfs序+带区间add线段树维护最大值+dp

标签

自动机

gre考试