CodeForces 631D Messenger KMP算法
题目链接:http://codeforces.com/problemset/problem/631/D
Each employee of the "Blake Techologies" company uses a special messaging app "Blake Messenger". All the stuff likes this app and uses it constantly. However, some important futures are missing. For example, many users want to be able to search through the message history. It was already announced that the new feature will appear in the nearest update, when developers faced some troubles that only you may help them to solve.
All the messages are represented as a strings consisting of only lowercase English letters. In order to reduce the network load strings are represented in the special compressed form. Compression algorithm works as follows: string is represented as a concatenation of nblocks, each block containing only equal characters. One block may be described as a pair (li, ci), where li is the length of the i-th block and ci is the corresponding letter. Thus, the string s may be written as the sequence of pairs .
Your task is to write the program, that given two compressed string t and s finds all occurrences of s in t. Developers know that there may be many such occurrences, so they only ask you to find the number of them. Note that p is the starting position of some occurrence of s in t if and only if tptp + 1...tp + |s| - 1 = s, where ti is the i-th character of string t.
Note that the way to represent the string in compressed form may not be unique. For example string "aaaa" may be given as , , ...
The first line of the input contains two integers n and m (1 ≤ n, m ≤ 200 000) — the number of blocks in the strings t and s, respectively.
The second line contains the descriptions of n parts of string t in the format "li-ci" (1 ≤ li ≤ 1 000 000) — the length of the i-th part and the corresponding lowercase English letter.
The second line contains the descriptions of m parts of string s in the format "li-ci" (1 ≤ li ≤ 1 000 000) — the length of the i-th part and the corresponding lowercase English letter.
Print a single integer — the number of occurrences of s in t.
5 3
3-a 2-b 4-c 3-a 2-c
2-a 2-b 1-c
1
6 1
3-a 6-b 7-a 4-c 8-e 2-a
3-a
6
5 5
1-h 1-e 1-l 1-l 1-o
1-w 1-o 1-r 1-l 1-d
0
In the first sample, t = "aaabbccccaaacc", and string s = "aabbc". The only occurrence of string s in string t starts at position p = 2.
In the second sample, t = "aaabbbbbbaaaaaaacccceeeeeeeeaa", and s = "aaa". The occurrences of s in t start at positions p = 1,p = 10, p = 11, p = 12, p = 13 and p = 14.
题意:给出一个主串S和模式串T,问T在S中出现了多少次。字符串的给出形式如样例1中3-a 2-b 4-c 3-a 2-c 表示aaabbccccaaacc。
思路:第一想到的就是用KMP算法搞一下,可惜后来时间到了没调出来。
先预处理主串和模式串,比如说含有连续的3-a 4-a,那么就合并成7-a。
然后如果模式串长度 >= 3(长度是指x-c为一个长度,x表示数目,c表示字符),截取除掉头和尾的中间部分,然后用KMP算法找出截取后的字符串在S中出现的位置,再特判一下头尾是否符合条件。因为满足题目要求的情况下,模式串的头尾字符个数可以小于主串。
如果模式串长度=1或=2,直接暴力匹配,并且要用long long。
1 #include <bits/stdc++.h> 2 using namespace std; 3 #define maxn 1000002 4 int next[maxn]; 5 char S[maxn]; 6 long long cnts[maxn], cntt[maxn], cnttt[maxn]; 7 char TT[maxn], T[maxn]; 8 int slen, tlen; 9 vector <int> anspos; 10 void getNext() 11 { 12 int j, k; 13 j = 0; k = -1; next[0] = -1; 14 while(j < tlen) 15 if(k == -1 || (T[j] == T[k] && cntt[j] == cntt[k])) 16 next[++j] = ++k; 17 else k = next[k]; 18 19 } 20 int KMP_Count() 21 { 22 int ans = 0; 23 int i, j = 0; 24 25 if(slen == 1 && tlen == 1) 26 { 27 if(S[0] == T[0] &&cnts[0] == cntt[0] ) return 1; 28 else return 0; 29 } 30 getNext(); 31 for(i = 0; i < slen; i++) 32 { 33 while(j > 0 && ( S[i] != T[j] || cnts[i] != cntt[j] )) j = next[j]; 34 if(S[i] == T[j] && cnts[i] == cntt[j]) j++; 35 if(j == tlen) 36 { 37 ans++; anspos.push_back(i+1-tlen); 38 j = next[j]; 39 } 40 } 41 return ans; 42 } 43 int main() 44 { 45 while(cin>>slen>>tlen) 46 { 47 char now = 'A'; 48 int lens = 0; 49 for(int i = 0; i < slen; i++) 50 { 51 int a; char b; char c; 52 cin>>a>>b>>c; 53 if(now == 'A') {now = c; lens = 1; S[0] = c; cnts[0] = a;} 54 else 55 { 56 if(c == S[lens-1]) cnts[lens-1] += a; 57 else{ now = c; lens ++; S[lens-1] = c; cnts[lens-1] = a; } 58 } 59 } 60 61 int lent = 0; 62 now = 'A'; 63 for(int i = 0; i < tlen; i++) 64 { 65 int a; char b; char c; 66 cin>>a>>b>>c; 67 if(now == 'A') { now = c; lent = 1; TT[0] = c; cnttt[0] = a;} 68 else 69 { 70 if(c == TT[lent-1]) cnttt[lent-1] += a; 71 else { now = c; lent ++; TT[lent-1] = c; cnttt[lent-1] = a; } 72 } 73 } 74 75 if(lent == 1) 76 { 77 long long sum = 0; 78 for(int i = 0; i < lens; i++) 79 { 80 if(TT[0] == S[i] && cnts[i] >= cnttt[0]) {sum += cnts[i] - cnttt[0] + 1;} 81 } 82 printf("%I64d\n", sum); 83 84 } 85 else if(lent == 2) 86 { 87 long long sum = 0; 88 int i, j; 89 for(int i = 0; i < lens;) 90 { 91 int nowi = i; 92 for(j = 0; j < lent;) 93 { 94 if(S[i] == TT[j] && cnts[i] >= cnttt[j]){i++; j++;} 95 else {i = nowi+1; break;} 96 } 97 if(j == lent) sum++; 98 } 99 printf("%I64d\n", sum); 100 } 101 else 102 { 103 for(int i = 1; i < lent-1; i++) 104 { 105 cntt[i-1] = cnttt[i]; 106 T[i-1] = TT[i]; 107 } 108 tlen = lent-2; 109 slen = lens; 110 anspos.clear(); 111 KMP_Count(); 112 int sum = 0; 113 for(int i = 0; i < anspos.size(); i++) 114 { 115 int pos = anspos[i]; 116 if(pos == 0 || pos+lent-2 > lens-1) continue; 117 else 118 { 119 if(S[pos-1] == TT[0] && cnts[pos-1] >= cnttt[0] && 120 S[pos+lent-2] == TT[lent-1] && cnts[pos+lent-2] >= cnttt[lent-1]) sum++; 121 } 122 } 123 printf("%d\n", sum); 124 } 125 } 126 return 0; 127 }