之前我们给的SAM的例题,基本上是一个串建SAM的就能做的
如果要建多个串的SAM应该怎么做呢
首先看题,bzoj2780
我一开始的想法是SA以前的弄法,把串拼起来,中间加分隔符做SAM
这题确实可以这么做,这样根据SAM能识别所有子串的性质
而且每个节点都代表了唯一的一个串
每个询问串我们都能找到最终转移到哪(找不到就是没出现过)
问在多少个串出现过这就等价于在ST(s)的parent树的子树中,出现了多少种不同的权值
这显然可以维护dfs序,用经典的离线做法来搞(更好的写法见文末UPD)
1 type node=record 2 po,next:longint; 3 end; 4 5 var go:array[0..300010,1..27] of longint; 6 d,mx,fa,l,r,p,c,wh,w,fir,next:array[0..300010] of longint; 7 ans,a,b:array[0..60010] of longint; 8 e:array[0..300010] of node; 9 h,t,k,last,len,i,j,n,q,x:longint; 10 s,ss:ansistring; 11 12 function lowbit(x:longint):longint; 13 begin 14 exit(x and (-x)); 15 end; 16 17 function cmp(a,b:longint):boolean; 18 begin 19 exit(l[a]<l[b]); 20 end; 21 22 procedure swap(var a,b:longint); 23 var c:longint; 24 begin 25 c:=a; 26 a:=b; 27 b:=c; 28 end; 29 30 procedure sort(l,r:longint); 31 var i,j,x:longint; 32 begin 33 i:=l; 34 j:=r; 35 x:=a[(l+r) shr 1]; 36 repeat 37 while cmp(a[i],x) do inc(i); 38 while cmp(x,a[j]) do dec(j); 39 if not(i>j) then 40 begin 41 swap(a[i],a[j]); 42 swap(b[i],b[j]); 43 inc(i); 44 dec(j); 45 end; 46 until i>j; 47 if l<j then sort(l,j); 48 if i<r then sort(i,r); 49 end; 50 51 procedure build(x,y:longint); 52 begin 53 inc(len); 54 e[len].po:=y; 55 e[len].next:=p[x]; 56 p[x]:=len; 57 end; 58 59 procedure add(c,x:longint); 60 var p,q,np,nq:longint; 61 begin 62 p:=last; 63 inc(t); last:=t; np:=t; 64 w[np]:=x; mx[np]:=mx[p]+1; 65 while (p<>0) and (go[p,c]=0) do 66 begin 67 go[p,c]:=np; 68 p:=fa[p]; 69 end; 70 if p=0 then fa[np]:=1 71 else begin 72 q:=go[p,c]; 73 if mx[q]=mx[p]+1 then fa[np]:=q 74 else begin 75 inc(t); nq:=t; 76 mx[nq]:=mx[p]+1; 77 go[nq]:=go[q]; 78 fa[nq]:=fa[q]; 79 fa[q]:=nq; fa[np]:=nq; 80 while go[p,c]=q do 81 begin 82 go[p,c]:=nq; 83 p:=fa[p]; 84 end; 85 end; 86 end; 87 end; 88 89 procedure dfs(x:longint); 90 var i:longint; 91 begin 92 inc(h); 93 l[x]:=h; 94 d[h]:=w[x]; //dfs序 95 i:=p[x]; 96 while i<>0 do 97 begin 98 dfs(e[i].po); 99 i:=e[i].next; 100 end; 101 r[x]:=h; 102 end; 103 104 procedure work(x:longint); 105 begin 106 while x<=t do 107 begin 108 inc(c[x]); 109 x:=x+lowbit(x); 110 end; 111 end; 112 113 function ask(x:longint):longint; 114 begin 115 ask:=0; 116 while x>0 do 117 begin 118 ask:=ask+c[x]; 119 x:=x-lowbit(x); 120 end; 121 end; 122 123 begin 124 readln(n,q); 125 for i:=1 to n do 126 begin 127 readln(ss); 128 len:=length(ss); 129 for j:=1 to len do //拼接 130 begin 131 s:=s+ss[j]; 132 inc(t); wh[t]:=i; 133 end; 134 if i<>n then 135 begin 136 s:=s+chr(97+26); 137 inc(t); 138 end; 139 end; 140 len:=length(s); 141 t:=1; last:=1; 142 for i:=len downto 1 do 143 add(ord(s[i])-96,wh[i]); 144 145 len:=0; 146 for i:=2 to t do //构建树 147 build(fa[i],i); 148 149 dfs(1); 150 for i:=1 to q do 151 begin 152 readln(s); 153 j:=1; 154 len:=length(s); 155 for k:=len downto 1 do //每个询问串最终转移到哪 156 begin 157 x:=ord(s[k])-96; 158 if go[j,x]=0 then 159 begin 160 j:=0; 161 break; 162 end 163 else j:=go[j,x]; 164 end; 165 a[i]:=j; 166 b[i]:=i; 167 end; 168 for i:=t downto 2 do //经典的离线做法 169 begin 170 next[i]:=fir[d[i]]; 171 fir[d[i]]:=i; 172 end; 173 for i:=1 to n do 174 if fir[i]<>0 then work(fir[i]); 175 sort(1,q); 176 j:=1; 177 while a[j]=0 do inc(j); 178 for i:=1 to t do 179 begin 180 while (j<=q) and (l[a[j]]=i) do 181 begin 182 ans[b[j]]:=ask(r[a[j]])-ask(i-1); 183 inc(j); 184 end; 185 if d[i]<>0 then 186 if next[i]<>0 then work(next[i]); 187 end; 188 for i:=1 to q do 189 writeln(ans[i]); 190 end.
然后我看到了bzoj3277 3473(双倍经验)
用我刚才的做法似乎不好搞,因为这题问每个字符串有多少子串出现在至少k个子串中
而刚才那种拼接,每个节点可接受的子串会搞出一堆不存在的子串
这时,我膜拜了wyfcyx的构建广义后缀树的做法,显然这里每个串要反序建SAM(这样才能构造出原串的后缀树)
他的做法是建完一个串的SAM后回到根,对下个串s先匹配
如果转移到的节点在SAM中可接受的最长串长度=当前匹配s的长度,那么这个节点可以代表s
否则的话就像SAM一样新开一个节点,感觉和可持久化的思想很像
这样每个节点可能代表了多个串的子串,并且也没有多出来奇怪的子串
而且每个节点可接受串出现在多少个串中依然=parent树的子树中,出现了多少种不同的权值
这样我们可以像刚才一样,求出每个点出现次数,如果大于等于k,
那么根据之前的性质,这个点p可接受串的长度为[max[fa[p]]+1,max[p]]
那么点p能做出的贡献即为max[p]-max[fa[p]],否则贡献为0
由于子串是某个后缀的前缀,所以每个字符串的答案等于所有这个字符串的后缀节点的从根到该节点的权值和
1 type node=record 2 po,next:longint; 3 end; 4 5 var e,w,pr:array[0..400010] of node; 6 go:array[0..400010,'a'..'z'] of longint; 7 sa,r,h,q,p,c,d,cur,a,b,mx,fa:array[0..400010] of longint; 8 g,ans:array[0..400010] of int64; 9 n,k,l,ti,y,f,i,j,len,x,t,last:longint; 10 s:ansistring; 11 ch:char; 12 13 function lowbit(x:longint):longint; 14 begin 15 exit(x and (-x)); 16 end; 17 18 procedure swap(var a,b:longint); 19 var c:longint; 20 begin 21 c:=a; 22 a:=b; 23 b:=c; 24 end; 25 26 procedure ins(x,y:longint); 27 begin 28 inc(len); 29 e[len].po:=y; 30 e[len].next:=p[x]; 31 p[x]:=len; 32 end; 33 34 procedure add(c:char;x:longint); 35 var p,q,np,nq:longint; 36 begin 37 p:=last; 38 inc(t); last:=t; np:=t; 39 mx[np]:=mx[p]+1; 40 while (p<>0) and (go[p,c]=0) do 41 begin 42 go[p,c]:=np; 43 p:=fa[p]; 44 end; 45 if p=0 then fa[np]:=1 46 else begin 47 q:=go[p,c]; 48 if mx[q]=mx[p]+1 then fa[np]:=q 49 else begin 50 inc(t); nq:=t; 51 mx[nq]:=mx[p]+1; 52 go[nq]:=go[q]; 53 fa[nq]:=fa[q]; 54 fa[q]:=nq; fa[np]:=nq; 55 while go[p,c]=q do 56 begin 57 go[p,c]:=nq; 58 p:=fa[p]; 59 end; 60 end; 61 end; 62 end; 63 64 procedure change(c:char); 65 var p,np,q:longint; 66 begin 67 p:=go[last,c]; 68 if mx[p]=mx[last]+1 then last:=p 69 else begin 70 inc(t); np:=t; 71 mx[np]:=mx[last]+1; 72 go[np]:=go[p]; 73 fa[np]:=fa[p]; 74 fa[p]:=np; 75 q:=last; 76 while go[q,c]=p do 77 begin 78 go[q,c]:=np; 79 q:=fa[q]; 80 end; 81 last:=np; 82 end; 83 end; 84 85 procedure dfs(x:longint); 86 var i:longint; 87 begin 88 inc(ti); 89 sa[ti]:=x; //dfs序上对应哪个点 90 b[x]:=ti; 91 i:=p[x]; 92 while i<>0 do 93 begin 94 dfs(e[i].po); 95 i:=e[i].next; 96 end; 97 r[x]:=ti; 98 end; 99 100 procedure dfss(x:longint); 101 var i:longint; 102 begin 103 g[x]:=g[x]+g[fa[x]]; 104 i:=p[x]; 105 while i<>0 do 106 begin 107 dfss(e[i].po); 108 i:=e[i].next; 109 end; 110 end; 111 112 procedure work(x,w:longint); 113 begin 114 while x<=t do 115 begin 116 inc(c[x],w); 117 x:=x+lowbit(x); 118 end; 119 end; 120 121 function ask(x:longint):longint; 122 begin 123 ask:=0; 124 while x>0 do 125 begin 126 ask:=ask+c[x]; 127 x:=x-lowbit(x); 128 end; 129 end; 130 131 procedure put(x,y:longint); 132 begin 133 inc(len); //这个串x所有后缀所在的节点 134 w[len].po:=y; 135 w[len].next:=q[x]; 136 q[x]:=len; 137 pr[len].po:=x; //节点代表了哪些串的后缀 138 pr[len].next:=h[y]; 139 h[y]:=len; 140 end; 141 142 function cmp(a,b:longint):boolean; 143 begin 144 exit(r[a]<r[b]); 145 end; 146 147 procedure sort(l,r:longint); 148 var i,j,x:longint; 149 begin 150 i:=l; 151 j:=r; 152 x:=a[(l+r) shr 1]; 153 repeat 154 while cmp(a[i],x) do inc(i); 155 while cmp(x,a[j]) do dec(j); 156 if not(i>j) then 157 begin 158 swap(a[i],a[j]); 159 inc(i); 160 dec(j); 161 end; 162 until i>j; 163 if l<j then sort(l,j); 164 if i<r then sort(i,r); 165 end; 166 167 begin 168 readln(n,k); 169 last:=1; t:=1; 170 for i:=1 to n do 171 begin 172 readln(s); 173 l:=length(s); last:=1; 174 for j:=l downto 1 do 175 begin 176 if go[last,s[j]]<>0 then change(s[j]) //广义后缀树 177 else add(s[j],i); 178 put(i,last); 179 end; 180 end; 181 len:=0; 182 for i:=2 to t do 183 ins(fa[i],i); 184 185 dfs(1); 186 for i:=1 to t do 187 a[i]:=i; 188 sort(1,t); 189 j:=1; x:=a[1]; 190 for i:=1 to t do 191 begin 192 f:=h[sa[i]]; 193 while f<>0 do //因为一个节点可能代表了多个穿,插入相对麻烦 194 begin 195 y:=pr[f].po; 196 if cur[y]<>0 then work(cur[y],-1); 197 cur[y]:=i; 198 work(i,1); 199 f:=pr[f].next; 200 end; 201 while (j<=t) and (r[x]=i) do 202 begin 203 len:=ask(i)-ask(b[x]-1); 204 if len<k then g[x]:=0 else g[x]:=mx[x]-mx[fa[x]]; 205 inc(j); x:=a[j]; 206 end; 207 if j=t+1 then break; 208 end; 209 dfss(1); 210 for i:=1 to n do 211 begin 212 j:=q[i]; 213 while j<>0 do 214 begin 215 x:=w[j].po; 216 ans[i]:=ans[i]+g[x]; 217 j:=w[j].next; 218 end; 219 end; 220 for i:=1 to n do 221 write(ans[i],' '); 222 writeln; 223 end.
bzoj2806 第一道小强和阿米巴的题,解锁新成就
构造出标准作文库的SAM后,L0不难想到二分答案吧
然后我们可以求出以询问串每个位置i为结尾的最长子串长度P[i]
不难得到f[i]到i最长熟悉 f[i]=max(f[i-1],f[j]+i-j) (i-j>=l0 且 (i-j<=P[i])
然后这个是明显的单调队列优化吧
1 var go:array[0..1200010*2,'0'..'1'] of longint; 2 q,v,f:array[0..1200010] of longint; 3 fa,mx:array[0..1200010*2] of longint; 4 ans,mid,i,n,m,last,t,l,r,j:longint; 5 s:ansistring; 6 c:char; 7 8 function max(a,b:longint):longint; 9 begin 10 if a>b then exit(a) else exit(b); 11 end; 12 13 procedure change(c:char); 14 var q,p,np:longint; 15 begin 16 p:=go[last,c]; 17 if mx[p]=mx[last]+1 then last:=p 18 else begin 19 inc(t); np:=t; 20 mx[np]:=mx[last]+1; 21 go[np]:=go[p]; 22 fa[np]:=fa[p]; 23 fa[p]:=np; 24 q:=last; 25 while go[q,c]=p do 26 begin 27 go[q,c]:=np; 28 q:=fa[q]; 29 end; 30 last:=np; 31 end; 32 end; 33 34 procedure add(c:char); 35 var p,q,np,nq:longint; 36 begin 37 p:=last; 38 inc(t); last:=t; np:=t; 39 mx[np]:=mx[p]+1; 40 while (p<>0) and (go[p,c]=0) do 41 begin 42 go[p,c]:=np; 43 p:=fa[p]; 44 end; 45 if p=0 then fa[np]:=1 46 else begin 47 q:=go[p,c]; 48 if mx[q]=mx[p]+1 then fa[np]:=q 49 else begin 50 inc(t); nq:=t; 51 mx[nq]:=mx[p]+1; 52 go[nq]:=go[q]; 53 fa[nq]:=fa[q]; 54 fa[q]:=nq; fa[np]:=nq; 55 while go[p,c]=q do 56 begin 57 go[p,c]:=nq; 58 p:=fa[p]; 59 end; 60 end; 61 end; 62 end; 63 64 procedure match; 65 var i,j,l,t:longint; 66 begin 67 j:=1; t:=0; 68 l:=length(s); 69 for i:=1 to l do 70 begin 71 if go[j,s[i]]<>0 then 72 begin 73 inc(t); 74 j:=go[j,s[i]]; 75 end 76 else begin 77 while (j<>0) and (go[j,s[i]]=0) do j:=fa[j]; 78 if j=0 then 79 begin 80 t:=0; 81 j:=1; 82 end 83 else begin 84 t:=mx[j]+1; 85 j:=go[j,s[i]]; 86 end; 87 end; 88 v[i]:=t; 89 end; 90 end; 91 92 function cmp(i,j:longint):boolean; 93 begin 94 exit(f[i]-i<f[j]-j); 95 end; 96 97 function check(l0:longint):boolean; 98 var h,t,i,n:longint; 99 begin 100 n:=length(s); 101 for i:=0 to l0-1 do 102 f[i]:=0; 103 h:=1; t:=0; 104 for i:=l0 to n do 105 begin 106 while (h<=t) and (cmp(q[t],i-l0)) do dec(t); 107 inc(t); 108 q[t]:=i-l0; 109 f[i]:=f[i-1]; 110 while (h<=t) and (q[h]<i-v[i]) do inc(h); 111 if h<=t then f[i]:=max(f[i],f[q[h]]+i-q[h]); 112 end; 113 if f[n]/n>=0.89999999999 then exit(true) else exit(false); 114 end; 115 116 begin 117 readln(n,m); 118 t:=1; 119 for i:=1 to m do 120 begin 121 readln(s); 122 last:=1; 123 l:=length(s); 124 for j:=1 to l do 125 if go[last,s[j]]<>0 then change(s[j]) 126 else add(s[j]); 127 end; 128 for i:=1 to n do 129 begin 130 readln(s); 131 match; 132 l:=0; 133 r:=length(s); 134 while l<=r do 135 begin 136 mid:=(l+r) shr 1; 137 if check(mid) then 138 begin 139 ans:=mid; 140 l:=mid+1; 141 end 142 else r:=mid-1; 143 end; 144 writeln(ans); 145 end; 146 end.
UPD:以前写的广义后缀树有点冗长,最近转c++重新写了一份感觉好多了……
以我之前写的2780为例
1 #include<iostream> 2 #include<cstring> 3 #include<cstdio> 4 #include<stdlib.h> 5 #include<algorithm> 6 #include<vector> 7 8 using namespace std; 9 vector<int> b[200010],q[10010]; 10 //b[]记录每个节点是哪些串的子串,q[]记录每个串所有后缀所在的节点 11 struct way{int po,next;} e[200010]; 12 struct node{int w,id;} a[60010]; 13 int ans[60010],w[200010],go[200010][26],fa[200010],mx[200010],l[200010],r[200010],p[200010],c[200010]; 14 int len,t,last,n,m; 15 char s[100010]; 16 17 bool cmp(node a,node b) 18 { 19 return l[a.w]<l[b.w]; 20 } 21 22 void work(int c) //比较优美的写法 23 { 24 int np,nq,q,p=last; 25 if (!go[last][c]) 26 { 27 np=++t; 28 mx[np]=mx[p]+1; 29 for (;p&&!go[p][c];p=fa[p]) go[p][c]=np; 30 } 31 else np=0; 32 if (!p) fa[np]=1; 33 else { 34 q=go[p][c]; 35 if (mx[q]==mx[p]+1) fa[np]=q; 36 else { 37 nq=++t; 38 mx[nq]=mx[p]+1; 39 memcpy(go[nq],go[q],sizeof(go[q])); 40 fa[nq]=fa[q]; fa[q]=fa[np]=nq; 41 for (;go[p][c]==q;p=fa[p]) go[p][c]=nq; 42 } 43 } 44 last=go[last][c]; 45 } 46 47 void build(int x,int y) 48 { 49 e[++len].po=y; 50 e[len].next=p[x]; 51 p[x]=len; 52 } 53 54 void dfs(int x) 55 { 56 l[x]=++t; w[t]=x; 57 for (int i=p[x];i;i=e[i].next) 58 { 59 dfs(e[i].po); 60 } 61 r[x]=t; 62 } 63 64 void add(int x,int w) 65 { 66 for (int i=x;i<=t;i+=i&-i) c[i]+=w; 67 } 68 69 int ask(int x) 70 { 71 int s=0; 72 for (int i=x;i;i-=i&-i) s+=c[i]; 73 return s; 74 } 75 int main() 76 { 77 scanf("%d%d",&n,&m); 78 t=last=1; 79 for (int i=1; i<=n; i++) 80 { 81 scanf("%s",s+1); len=strlen(s+1); 82 last=1; 83 for (int j=1; j<=len;j++) 84 { 85 work(s[j]-'a'); 86 b[last].push_back(i); 87 } 88 } 89 len=fa[0]=0; 90 for (int i=2; i<=t; i++) build(fa[i],i); 91 t=0; dfs(1); 92 for (int i=1; i<=m; i++) 93 { 94 scanf("%s",s+1); len=strlen(s+1); 95 int j=1; 96 for (int k=1;k<=len;k++) 97 { 98 if (!go[j][s[k]-'a']) {j=0;break;} 99 j=go[j][s[k]-'a']; 100 } 101 a[i].w=j; a[i].id=i; 102 } 103 sort(a+1,a+1+m,cmp); 104 vector<int>::iterator k; 105 for (int i=1;i<=t;i++) 106 for (k=b[w[i]].begin();k!=b[w[i]].end(); k++) q[*k].push_back(i); 107 vector<int>::iterator cur[10010]; 108 for (int i=1; i<=n; i++) 109 { 110 cur[i]=q[i].begin(); 111 if (cur[i]!=q[i].end()) {add(*cur[i],1); cur[i]++;} 112 } 113 int j=1; 114 while (!a[j].w) j++; 115 for (int i=1; i<=t; i++) 116 { 117 for (;l[a[j].w]==i; j++) ans[a[j].id]=ask(r[a[j].w])-ask(l[a[j].w]-1); 118 for (k=b[w[i]].begin();k!=b[w[i]].end(); k++) 119 { 120 int x=*k; 121 if (cur[x]!=q[x].end()) {add(*cur[x],1); cur[x]++;} 122 } 123 } 124 for (int i=1; i<=m; i++) printf("%d\n",ans[i]); 125 system("pause"); 126 return 0; 127 }