NTT效率测试

这玩意儿其实不怎么靠谱,毕竟很多时候并不会每一次 NTT 都作长度相同的卷积,更多时候是用来估计常数的。。。

差不多是把 1e3,1e4,1e5,1e6 四个数据范围的 乘法,求逆,ln,exp 都测了一遍,阈值是 5s。

指令:

-std=c++14 -Wl,-stack=114514192 -Wall -lm -O2

结果:

times_1e3:[time=5.000000,times=6961] | times_1e3:[time=5.000000,times=6730]
inv_1e3:[time=5.001000,times=4359]   | inv_1e3:[time=5.001000,times=4401]
ln_1e3:[time=5.000000,times=4011]    | ln_1e3:[time=5.000000,times=3964]
exp_1e3:[time=5.000000,times=2982]   | exp_1e3:[time=5.001000,times=2795]
times_1e4:[time=5.001000,times=1056] | times_1e4:[time=5.005000,times=764]
inv_1e4:[time=5.002000,times=474]    | inv_1e4:[time=5.005000,times=409]
ln_1e4:[time=5.010000,times=400]     | ln_1e4:[time=5.015000,times=345]
exp_1e4:[time=5.012000,times=253]    | exp_1e4:[time=5.008000,times=239]
times_1e5:[time=5.025000,times=114]  | times_1e5:[time=5.047000,times=108]
inv_1e5:[time=5.016000,times=50]     | inv_1e5:[time=5.095000,times=45]
ln_1e5:[time=5.086000,times=42]      | ln_1e5:[time=5.094000,times=37]
exp_1e5:[time=5.007000,times=26]     | exp_1e5:[time=5.031000,times=23]
times_1e6:[time=5.305000,times=12]   | times_1e6:[time=5.309000,times=11]
inv_1e6:[time=5.061000,times=5]      | inv_1e6:[time=5.597000,times=5]
ln_1e6:[time=5.509000,times=4]       | ln_1e6:[time=5.183000,times=4]
exp_1e6:[time=5.825000,times=3]      | exp_1e6:[time=6.529000,times=3]

测试用的代码:

#include<cstdio>
#include<cctype>
#define IMP(n,act) for(int lim=(n),i=0;i^lim;++i)act
const int M=1<<21|5,mod=998244353;
int n,m,F[M],G[M];
int INV[M<<2],buf[M<<2],*w[M];
inline void swap(int&a,int&b){
	int c=a;a=b;b=c;
}
inline int Add(const int&a,const int&b){
	return a+b>=mod?a+b-mod:a+b;
}
inline int Del(const int&a,const int&b){
	return b>a?a-b+mod:a-b;
}
inline int Getlen(const int&n){
	int len(0);while((1<<len)<n)++len;return len;
}
inline int pow(int a,int b=mod-2){
	int ans(1);for(;b;b>>=1,a=1ll*a*a%mod)if(b&1)ans=1ll*ans*a%mod;return ans;
}
inline void init(const int&n){
	int m=Getlen(n),*now=buf;w[m]=now;now+=1<<m;
	w[m][0]=1;w[m][1]=pow(3,mod-1>>m+1);for(int i=2;i^1<<m;++i)w[m][i]=1ll*w[m][i-1]*w[m][1]%mod;
	for(int k=m-1;k>=0&&(w[k]=now,now+=1<<k);--k)IMP(1<<k,w[k][i]=w[k+1][i<<1]);
	INV[0]=INV[1]=1;for(int i=2;i<=n;++i)INV[i]=1ll*(mod-mod/i)*INV[mod%i]%mod;
}
inline void DFT(int*f,const int&M){
	const int&n=1<<M;
	for(int d=M-1,len=n>>1;d>=0;--d,len>>=1)for(int k=0;k^n;k+=len<<1){
		int*W=w[d],*L=f+(k),*R=f+(k|len),x,y;IMP(len,(x=*L,y=*R)),*L++=Add(x,y),*R++=1ll**W++*Del(x,y)%mod;
	}
}
inline void IDFT(int*f,const int&M){
	const int&n=1<<M;
	for(int d=0,len=1;d<M;++d,len<<=1)for(int k=0;k^n;k+=len<<1){
		int*W=w[d],*L=f+(k),*R=f+(k|len),x,y;IMP(len,(x=*L,y=1ll**W++**R%mod)),*L++=Add(x,y),*R++=Del(x,y);
	}
	const int&k=pow(n);IMP(n,f[i]=1ll*f[i]*k%mod);for(int i=1;(i<<1)<n;++i)swap(f[i],f[n-i]);
}
inline void Inv(int*f,const int&n){
	static int b1[M],b2[M],b3[M];const int&m=Getlen(n);b1[0]=pow(f[0]);
	for(int len=1;len<=m;++len){
		IMP(1<<len-1,b2[i]=2ll*b1[i]%mod);IMP(1<<len,b3[i]=f[i]);
		DFT(b1,len+1);DFT(b3,len+1);IMP(1<<len+1,b1[i]=1ll*b1[i]*b1[i]%mod*b3[i]%mod);IDFT(b1,len+1);
		IMP(1<<len,b1[i]=Del(b2[i],b1[i])),b3[i]=b3[1<<len|i]=b1[1<<len|i]=0;
	}
	IMP(n,f[i]=b1[i]);IMP(1<<m,b1[i]=b2[i]=0);
}
inline void Der(int*f,const int&n){
	IMP(n-1,f[i]=1ll*(i+1)*f[i+1]%mod);f[n-1]=0;
}
inline void Int(int*f,const int&n){
	for(int i=n-1;i>=0;--i)f[i+1]=1ll*f[i]*INV[i+1]%mod;f[0]=0;
}
inline void Ln(int*f,const int&n){
	static int g[M];const int&len=Getlen(n+n-2);IMP(n,g[i]=f[i]);Der(g,n);Inv(f,n);
	DFT(f,len);DFT(g,len);IMP(1<<len,g[i]=1ll*f[i]*g[i]%mod);IDFT(g,len);
	IMP(1<<len,f[i]=0);IMP(n-1,f[i]=g[i]);Int(f,n-1);IMP(1<<len,g[i]=0);
}
inline void Exp(int*f,const int&n){
	static int b1[M],b2[M],b3[M];const int&m=Getlen(n);b1[0]=1;
	for(int len=1;len<=m;++len){
		IMP(1<<len-1,b3[i]=b2[i]=b1[i]);Ln(b2,1<<len);IMP(1<<len,b2[i]=Del(f[i],b2[i]));++b2[0];
		DFT(b2,len);DFT(b3,len);IMP(1<<len,b2[i]=1ll*b2[i]*b3[i]%mod);IDFT(b2,len);
		IMP(1<<len-1,b1[1<<len-1|i]=b2[1<<len-1|i]);
	}
	IMP(n,f[i]=b1[i]);IMP(1<<m,b1[i]=b2[i]=b3[i]=0);
}
inline int read(){
	int n(0);char s;while(!isdigit(s=getchar()));while(n=n*10+(s&15),isdigit(s=getchar()));return n;
}
inline void write(int n){
	static char s[10];int top(0);while(s[++top]=n%10^48,n/=10);while(putchar(s[top]),--top);putchar(0);
}
#include<string>
#include<ctime>
std::string times="times",inv="inv",ln="ln",exp="exp",_e3="_1e3",_e4="_1e4",_e5="_1e5",_e6="_1e6";
std::string in=".in",out=".out";
inline void check(std::string name){
	double tim=clock();int cnt(0);
	while((clock()-tim)/1000<5){
		freopen((name+in).c_str(),"r",stdin);
		freopen((name+out).c_str(),"w",stdout);
		int typ=read();
		if(typ==0){
			n=read();IMP(n,F[i]=read());m=read();IMP(m,G[i]=read());const int&len=Getlen(n+m-1);
			DFT(F,len);DFT(G,len);IMP(1<<len,F[i]=1ll*F[i]*G[i]%mod);IDFT(F,len);
			IMP(n+m-1,write(F[i]));
		}
		if(typ==1){
			n=read();IMP(n,F[i]=read());Inv(F,n);
			IMP(n,write(F[i]));
		}
		if(typ==2){
			n=read();IMP(n,F[i]=read());Ln(F,n);
			IMP(n,write(F[i]));
		}
		if(typ==3){
			n=read();IMP(n,F[i]=read());Exp(F,n);
			IMP(n,write(F[i]));
		}
		++cnt;
	}
	fprintf(stderr,(name+":").c_str());fprintf(stderr,"[time=%.6lf,times=%d]\n",(clock()-tim)/1000,cnt);
}
signed main(){
	init(1<<20);
	check((times+_e3).c_str());check((inv+_e3).c_str());check((ln+_e3).c_str());check((exp+_e3).c_str());
	check((times+_e4).c_str());check((inv+_e4).c_str());check((ln+_e4).c_str());check((exp+_e4).c_str());
	check((times+_e5).c_str());check((inv+_e5).c_str());check((ln+_e5).c_str());check((exp+_e5).c_str());
	check((times+_e6).c_str());check((inv+_e6).c_str());check((ln+_e6).c_str());check((exp+_e6).c_str());
}

数据生成器:

#include<cstdlib>
#include<string>
#include<cstdio>
#include<ctime>
int n;std::string times="times",inv="inv",ln="ln",exp="exp",_e3="_1e3",_e4="_1e4",_e5="_1e5",_e6="_1e6";
std::string in=".in",out=".out";
inline int Rand(const int&L,const int&R){
	return 1ll*rand()*rand()%(R-L+1)+L;
}
inline void write(int n){
	static char s[15];int top(0);while(s[++top]=n%10^48,n/=10);while(putchar(s[top]),--top);
}
inline void writepoly(const int&n,const int&typ){
	write(typ);putchar(32);
	write(n);putchar(10);
	if(typ==0)write(Rand(0,998244352));if(typ==1)write(Rand(0,998244352));
	if(typ==2)write(1);if(typ==3)write(0);putchar(32);
	for(int i=1;i<n;++i)write(Rand(0,998244352)),putchar(i+1==n?10:32);
}
signed main(){
	srand(time(NULL));
	freopen((times+_e3+in).c_str(),"w",stdout);writepoly(1e3,0);writepoly(1e3,0);
	freopen((inv+_e3+in).c_str(),"w",stdout);writepoly(1e3,1);
	freopen((ln+_e3+in).c_str(),"w",stdout);writepoly(1e3,2);
	freopen((exp+_e3+in).c_str(),"w",stdout);writepoly(1e3,3);
	srand(time(NULL));
	freopen((times+_e4+in).c_str(),"w",stdout);writepoly(1e4,0);writepoly(1e4,0);
	freopen((inv+_e4+in).c_str(),"w",stdout);writepoly(1e4,1);
	freopen((ln+_e4+in).c_str(),"w",stdout);writepoly(1e4,2);
	freopen((exp+_e4+in).c_str(),"w",stdout);writepoly(1e4,3);
	srand(time(NULL));
	freopen((times+_e5+in).c_str(),"w",stdout);writepoly(1e5,0);writepoly(1e5,0);
	freopen((inv+_e5+in).c_str(),"w",stdout);writepoly(1e5,1);
	freopen((ln+_e5+in).c_str(),"w",stdout);writepoly(1e5,2);
	freopen((exp+_e5+in).c_str(),"w",stdout);writepoly(1e5,3);
	srand(time(NULL));
	freopen((times+_e6+in).c_str(),"w",stdout);writepoly(1e6,0);writepoly(1e6,0);
	freopen((inv+_e6+in).c_str(),"w",stdout);writepoly(1e6,1);
	freopen((ln+_e6+in).c_str(),"w",stdout);writepoly(1e6,2);
	freopen((exp+_e6+in).c_str(),"w",stdout);writepoly(1e6,3);
}
posted @ 2022-07-06 14:13  Prean  阅读(71)  评论(0编辑  收藏  举报
var canShowAdsense=function(){return !!0};