[NOIP2020]T2字符串匹配

题外话：在考场上写了$O(n \ln n\log26) $ 的蠢做法，据说用暴力$O(n*26+n\ln n)$替代树状数组可以拿92分的好成绩$O(n*26+n\ln n)$

这里介绍两种$O(n*\log 26)$的做法

首先经思考容易想到先预处理出后缀$C$的奇数次字符数量，然后枚举$AB$的大小（用哈希判断一下就行）。

因为$B$只要至少有一个字符就满足条件，而$A$又是前缀，所以就直接维护$A$中的奇数次字符数量，最多有26个，用树状数组维护就行，复杂度：$O(n \ln n\log26)$（由于查询是一个前缀，所以在更新$A$时直接暴力修改前缀数组就可以去掉$\log 26$）。~~讲道理数据稍微随机一点都能跑过去~~

#include<iostream>
#include<ctime>
#include<climits>
#include<cmath>
#include<cstring>
#include<algorithm>
#include<cstdio>
#define ll long long
#define ull unsigned long long
using namespace std;
const ll N=1100000,base1=1021831,base2=96269; 
ull hash1[N],hash2[N],pow1[N],pow2[N];
ull ff1(ll l,ll r) {return hash1[r]-hash1[l-1]*pow1[r-l+1];}
ull ff2(ll l,ll r) {return hash2[r]-hash2[l-1]*pow2[r-l+1];}
pair<ull,ull> ff(ll l,ll r) {return make_pair(ff1(l,r),ff2(l,r));}
struct BIT
{
    ll sum[30],n;
    void clear() {memset(sum,0,sizeof(sum));}
    inline ll lowbit(ll x) {return x&(-x);}
    void add(ll x,ll v) {while(x<=n) sum[x]+=v,x+=lowbit(x);}
    ll find(ll x){ll ans=0;while(x) ans+=sum[x],x-=lowbit(x);return ans;}
}T;
ll n,ans,tot[30],cnt,_tot[N];
char ch[N];
int main()
{
    ll Q,i,j;
    pair<ull,ull> tmp;
    scanf("%lld",&Q);
    T.n=26;
    while(Q--)
    {
        T.clear();
        memset(tot,0,sizeof(tot));
        memset(_tot,0,sizeof(_tot));
        cnt=ans=0;
        scanf("%s",ch+1);
        n=strlen(ch+1);
        hash1[0]=hash2[0]=0,pow1[0]=pow2[0]=1;
        for(i=1;i<=n;i++) 
        {
            hash1[i]=hash1[i-1]*base1+ch[i],hash2[i]=hash2[i-1]*base2+ch[i];
            pow1[i]=pow1[i-1]*base1,pow2[i]=pow2[i-1]*base2;
        }
        for(i=n;i>=3;i--) 
        {
            tot[ch[i]-'a']^=1;
            if(tot[ch[i]-'a']==0) cnt--;
            else cnt++;
            _tot[i]=cnt;
        }
        memset(tot,0,sizeof(tot));
        cnt=0;
        for(i=2;i<n;i++)
        {
            tot[ch[i-1]-'a']^=1;
            if(tot[ch[i-1]-'a']==0) cnt--;
            else cnt++;
            T.add(cnt+1,1);
            tmp=ff(1,i);
            for(j=1;i*j<n;j++)
            {   
                if(ff(i*j-i+1,i*j)!=tmp) break;
                ans+=T.find(_tot[i*j+1]+1);
            } 
        }
        printf("%lld\n",ans);
    }
    return 0;
}

刚才的做法瓶颈在于要枚举$AB$的循环次数

从数据范围里还没有用到的特殊性质分析一下，发现还有一种字符和只有两种的在$AB$大小确定后$(AB)^i$的奇数次字符数量是循环的。

所以$S_1=AB$与$S_2=ABABAB$的奇数次字符数量是一样的！而且这样的话$S_1$与$S_2$对应的$C$的奇数次字符数量也是一样的！换言之但我们知道$AB$的循环次数就可以$O(1)$求出答案。

（$AB$循环最多的那种并不需要单独计算，因为$AB$中出现偶数次的没有影响，而奇数次的字符变成0可以当偶数来看）

下面介绍两种预处理次数的方法

法一：

令$num[x]$为$[1,x]$为$AB$时的最多重复次数

若$[1,x],[x+1,2x]$相同时$num[x]$至少为$num[2x]*2$，不超过这个数+1。只需再判断一下$[1,x],[num[2x]*2x+1,num[2x]*2x+x]$就行

#include<iostream>
#include<ctime>
#include<climits>
#include<cmath>
#include<cstring>
#include<algorithm>
#include<cstdio>
#define ll long long
#define ull unsigned long long
using namespace std;
const ll N=1100000,base1=1021831,base2=96269; 
ull hash1[N],hash2[N],pow1[N],pow2[N];
ull ff1(ll l,ll r) {return hash1[r]-hash1[l-1]*pow1[r-l+1];}
ull ff2(ll l,ll r) {return hash2[r]-hash2[l-1]*pow2[r-l+1];}
pair<ull,ull> ff(ll l,ll r) {return make_pair(ff1(l,r),ff2(l,r));}
struct BIT
{
    ll sum[30],n;
    void clear() {memset(sum,0,sizeof(sum));}
    inline ll lowbit(ll x) {return x&(-x);}
    void add(ll x,ll v) {while(x<=n) sum[x]+=v,x+=lowbit(x);}
    ll find(ll x){ll ans=0;while(x) ans+=sum[x],x-=lowbit(x);return ans;}
}T;
ll n,ans,book[30],cnt,tot[N],num[N];
char ch[N];
int main()
{
    ll Q,i;
    scanf("%lld",&Q);
    T.n=26;
    while(Q--)
    {
        T.clear();
        memset(book,0,sizeof(book));
        memset(tot,0,sizeof(tot));
        memset(num,0,sizeof(num));
        cnt=ans=0;
        scanf("%s",ch+1);
        n=strlen(ch+1);
        hash1[0]=hash2[0]=0,pow1[0]=pow2[0]=1;
        for(i=1;i<=n;i++) 
        {
            hash1[i]=hash1[i-1]*base1+ch[i],hash2[i]=hash2[i-1]*base2+ch[i];
            pow1[i]=pow1[i-1]*base1,pow2[i]=pow2[i-1]*base2;
        }
        for(i=n;i>=3;i--) 
        {
            book[ch[i]-'a']^=1;
            if(book[ch[i]-'a']==0) cnt--;
            else cnt++;
            tot[i]=cnt;
        }
        num[n-1]=1;
        for(i=n-2;i>=2;i--)
        {
        	if(i*2>n-1 || ff(1,i)!=ff(i+1,i*2)) {num[i]=1;continue;}
        	num[i]=num[i<<1]<<1;
        	if(num[i]*i+i<=n-1 && ff(1,i)==ff(num[i]*i+1,num[i]*i+i)) num[i]++;
		}
		cnt=0;
		memset(book,0,sizeof(book));
        for(i=2;i<n;i++)
        {
            book[ch[i-1]-'a']^=1;
            if(book[ch[i-1]-'a']==0) cnt--;
            else cnt++;
            T.add(cnt+1,1);
            ans+=T.find(tot[i+1]+1)*((num[i]>>1)+(num[i]&1));
            ans+=T.find(tot[i*2+1]+1)*(num[i]>>1);
        }
        printf("%lld\n",ans);
    }
    return 0;
}

法二：

众所周知，用kmp算法可以$O(1)$求出 $字符串的最小循环周期= len-kmp[len] (失配位置)$ 。因此我们可以用二分出$AB$的循环次数，二分的复杂度为

$\sum_{i=1}^{n}log(\frac{n}{i})=n\log n-\sum_{i=1}^{n}\log i$

乍一眼感觉这个东西不太行，但是稍微拆一下

$\sum_{i=1}^{n}\log i\thickapprox n+(n-2^0)+(n-2^1)+···+(n-2^{log_{2}{n}})\thickapprox n*\log n-n$

所以二分实际上复杂度只有$O(n)$

#include<iostream>
#include<ctime>
#include<climits>
#include<cmath>
#include<cstring>
#include<algorithm>
#include<cstdio>
#define ll long long
#define ull unsigned long long
using namespace std;
const ll N=1100000;
struct BIT
{
    ll sum[30],n;
    void clear() {memset(sum,0,sizeof(sum));}
    inline ll lowbit(ll x) {return x&(-x);}
    void add(ll x,ll v) {while(x<=n) sum[x]+=v,x+=lowbit(x);}
    ll find(ll x){ll ans=0;while(x) ans+=sum[x],x-=lowbit(x);return ans;}
}T;
ll n,ans,book[30],cnt,tot[N],num[N],kmp[N],k;
char ch[N];
int main()
{
    ll Q,i,l,r,mid,res,len;
    scanf("%lld",&Q);
    T.n=26;
    while(Q--)
    {
        T.clear();
        memset(book,0,sizeof(book));
        memset(tot,0,sizeof(tot));
        memset(num,0,sizeof(num));
        memset(kmp,0,sizeof(kmp));
        cnt=ans=0;
        scanf("%s",ch);
        n=strlen(ch);
        for(i=n-1;i>=2;i--) 
        {
            book[ch[i]-'a']^=1;
            if(book[ch[i]-'a']==0) cnt--;
            else cnt++;
            tot[i]=cnt;
        }
        kmp[0]=kmp[1]=0;k=0;
        for(i=1;i<n;i++)
        {
        	while(k && ch[i]!=ch[k]) k=kmp[k];
        	if(ch[i]==ch[k]) k++;
        	kmp[i+1]=k;
		}
		for(i=1;i<n;i++)
		{
			l=2,r=(n-1)/(i+1),res=1;
			while(l<=r)
			{
				mid=l+r>>1;
				len=(i+1)*mid;
				if((len-1-kmp[len-1])!=0 && (i+1)%(len-kmp[len])==0) res=mid,l=mid+1;
				else r=mid-1;
			}
			num[i]=res;
		}
		cnt=0;
		memset(book,0,sizeof(book));
        for(i=1;i<n-1;i++)
        {
            book[ch[i-1]-'a']^=1;
            if(book[ch[i-1]-'a']==0) cnt--;
            else cnt++;
            T.add(cnt+1,1);
            len=i+1;
            ans+=T.find(tot[len]+1)*((num[i]>>1)+(num[i]&1));
            ans+=T.find(tot[len*2]+1)*(num[i]>>1);
        }
        printf("%lld\n",ans);
    }
    return 0;
}

后记：

有个憨憨跑到我跟前说这题可以做到$O(n)$，我愣了一下，寻思着这$\log 26$貌似也不大啊，是不是我听错了？“因为你发现这次与上次的$C$的奇数次字符数量仅仅差1，所以···”，我无语。~~出题人卡这个他人就没了~~

$O(n):$

#include<iostream>
#include<ctime>
#include<climits>
#include<cmath>
#include<cstring>
#include<algorithm>
#include<cstdio>
#define ll long long
#define ull unsigned long long
using namespace std;
const ll N=1100000,base1=1021831,base2=96269; 
ull hash1[N],hash2[N],pow1[N],pow2[N];
ull ff1(ll l,ll r) {return hash1[r]-hash1[l-1]*pow1[r-l+1];}
ull ff2(ll l,ll r) {return hash2[r]-hash2[l-1]*pow2[r-l+1];}
pair<ull,ull> ff(ll l,ll r) {return make_pair(ff1(l,r),ff2(l,r));}
ll n,ans,book[30],cnt,tot[N*2],num[N],sum[30];
char ch[N];
int main()
{
    ll Q,i,last1,last2;
    scanf("%lld",&Q);
    while(Q--)
    {
        memset(book,0,sizeof(book));
        memset(tot,0,sizeof(tot));
        memset(num,0,sizeof(num));
        memset(sum,0,sizeof(sum));
        cnt=ans=last1=last2=0;
        scanf("%s",ch+1);
        n=strlen(ch+1);
        hash1[0]=hash2[0]=0,pow1[0]=pow2[0]=1;
        for(i=1;i<=n;i++) 
        {
            hash1[i]=hash1[i-1]*base1+ch[i],hash2[i]=hash2[i-1]*base2+ch[i];
            pow1[i]=pow1[i-1]*base1,pow2[i]=pow2[i-1]*base2;
        }
        for(i=n;i>=2;i--) 
        {
            book[ch[i]-'a']^=1;
            if(book[ch[i]-'a']==0) cnt--;
            else cnt++;
            tot[i]=cnt;
        }
        num[n-1]=1;
        for(i=n-2;i>=2;i--)
        {
        	if(i*2>n-1 || ff(1,i)!=ff(i+1,i*2)) {num[i]=1;continue;}
        	num[i]=num[i<<1]<<1;
        	if(num[i]*i+i<=n-1 && ff(1,i)==ff(num[i]*i+1,num[i]*i+i)) num[i]++;
		}
		cnt=0;
		memset(book,0,sizeof(book));
        for(i=2;i<n;i++)
        {
            book[ch[i-1]-'a']^=1;
            if(book[ch[i-1]-'a']==0) cnt--;
            else cnt++;
            ++sum[cnt];
            if(cnt<=tot[i]) last1++;
            if(cnt<=tot[(i-1)*2+1]) last2++;
            if(tot[i]<tot[i+1]) last1+=sum[tot[i+1]];
            else last1-=sum[tot[i]];
            ans+=last1*((num[i]>>1)+(num[i]&1));
            if(tot[(i-1)*2+1]<tot[i*2+1]) last2+=sum[tot[i*2+1]]+sum[tot[i*2+1]-1];
            else if(tot[(i-1)*2+1]>tot[i*2+1]) last2-=sum[tot[i*2+1]+1]+sum[tot[i*2+1]+2];
            ans+=last2*(num[i]>>1);
        }
        printf("%lld\n",ans);
    }
    return 0;
}

跪求各位老爷一键三连~~点赞也行~~

posted @ 2020-12-21 23:04 欢语_暗影阅读(149) 评论(0) 编辑收藏举报

刷新页面返回顶部

辰语（暗影）

[NOIP2020]T2字符串匹配

法一：

法二：

后记：

\(O(n):\)

公告