【洛谷P4245】【模板】 任意模数NTT(MTT)

传送门


首先是一种只用两次DFTDFT的卷积写法

若求A(x)B(x)A(x)*B(x)

P(x)=A(x)+iB(x)P(x)=A(x)+iB(x)
Q(x)=A(x)iB(x)Q(x)=A(x)-iB(x)

那么如果P(x)IDFTP(x)\mathrm{IDFT}求出来
P[k]=j=0L1Ajωljk+iBjωljkP'_{[k]}=\sum_{j=0}^{L-1}A_j\omega_{l}^{jk}+iB_j\omega_{l}^{jk}
=j=0L1(Aj+iBj)(cosX+isinX)X=2πjkl=\sum_{j=0}^{L-1}(A_j+iB_j)(\cos_{X}+i\sin_{X}),X=\frac{2\pi jk}{l}

Qk=j=0L1(AjiBj)(cosX+isinX)Q'_{k}=\sum_{j=0}^{L-1}(A_j-iB_j)(\cos_X+i\sin_X)
=j(AjcosX+BjsinX)i(BjcosXAjsinX)=\sum_{j}(A_j\cos_X+B_j\sin_X)-i(B_j\cos_X-A_j\sin_X)
=conj(j(AjcosX+BjsinX)+i(BjcosXAjsinX))=conj(\sum_j(A_j\cos_X+B_j\sin_X)+i(B_j\cos_X-A_j\sin_X))
=conj(j(AjcosXBjsinX)+i(BjcosX+AjsinX))=conj(\sum_j(A_j\cos_{-X}-B_j\sin_{-X})+i(B_j\cos_{-X}+A_j\sin_{-X}))
=conj((Aj+iBj)(isinX+cosX))=conj((A_j+iB_j)(i\sin_{-X}+\cos_{-X}))
=conj(P[lk])=conj(P'_{[l-k]})

于是只用一次DFT\mathrm{DFT}即可求出P,QP',Q'
然后A=P+Q2,B=iQP2A'=\frac{P'+Q'}{2},B'=i\frac{Q'-P'}{2}
然后一次IDFT\mathrm{IDFT}即可

另外一个关于FFTFFT优化
实际上对于IDFT\mathrm{IDFT},做完之后应该是只剩实数部分的
所以可以把两个多项式A,BA,B做成A+iBA+iB的形式IDFT\mathrm{IDFT}回来
这样可以在对于多个多项式IDFT\mathrm{IDFT}的时候简化

对于任意模数,用FFTFFT做的问题是会炸精度
于是考虑把值拆成A1215+A2A_1*2^{15}+A_2的形式

相当于是对44个多项式变换
首先DFT\mathrm{DFT}可以只做两次了
然后IDFT\mathrm{IDFT}可以利用合并的技巧也只用做两次
常数不是很大

另外预处理单位根
每次乘ωn\omega_n也很耗费精度
所以每隔一些就重新用sin,cos\sin,\cos计算

#include<bits/stdc++.h>
using namespace std;
#define cs const
#define pb push_back
#define pii pair<int,int>
#define fi first
#define se second
#define ll long long
#define re register
cs int RLEN=1<<20|1;
inline char gc(){
	static char ibuf[RLEN],*ib,*ob;
	(ib==ob)&&(ob=(ib=ibuf)+fread(ibuf,1,RLEN,stdin));
	return (ib==ob)?EOF:*ib++;
}
inline int read(){
	char ch=gc();
	int res=0;bool f=1;
	while(!isdigit(ch))f^=ch=='-',ch=gc();
	while(isdigit(ch))res=(res+(res<<2)<<1)+(ch^48),ch=gc();
	return f?res:-res;
}
int mod;
struct plx{
	double x,y;
	plx(double _x=0,double _y=0):x(_x),y(_y){}
	friend inline plx operator +(cs plx &a,cs plx &b){
		return plx(a.x+b.x,a.y+b.y);
	}
	friend inline plx operator -(cs plx &a,cs plx &b){
		return plx(a.x-b.x,a.y-b.y);
	}
	friend inline plx operator *(cs plx &a,cs plx &b){
		return plx(a.x*b.x-a.y*b.y,a.x*b.y+a.y*b.x);
	}
	inline plx conj()cs{return plx(x,-y);}
};
#define poly vector<plx>
cs int C=19,M=(1<<15)-1,N=400005;
cs double pi=acos(-1);
poly w[C+1];
int rev[(1<<C)|5];
inline void init_rev(int lim){
	for(int i=0;i<lim;i++)rev[i]=(rev[i>>1]>>1)|((i&1)*(lim>>1));
}
inline void init_w(){
	for(int i=1;i<=C;i++)w[i].resize(1<<(i-1));
	plx wn=plx(cos(pi/(1<<(C-1))),sin(pi/(1<<(C-1))));
	w[C][0]=plx(1,0);
	for(int i=1;i<(1<<(C-1));i++){
		if(i&31)w[C][i]=w[C][i-1]*wn;
		else w[C][i]=plx(cos(pi*i/(1<<(C-1))),sin(pi*i/(1<<(C-1))));
	}
	for(int i=C-1;i;i--)
	for(int j=0;j<(1<<(i-1));j++)w[i][j]=w[i+1][j<<1];
}
inline void fft(plx *f,int lim,int kd){
	for(int i=0;i<lim;i++)if(i>rev[i])swap(f[i],f[rev[i]]);
	plx a0,a1;
	for(int mid=1,l=1;mid<lim;mid<<=1,l++)
	for(int i=0;i<lim;i+=(mid<<1))
	for(int j=0;j<mid;j++)
	a0=f[i+j],a1=f[i+j+mid]*w[l][j],f[i+j]=a0+a1,f[i+j+mid]=a0-a1;
	if(kd==-1){
		reverse(f+1,f+lim);
		for(int i=0;i<lim;i++)f[i].x/=lim,f[i].y/=lim;
	}
}
inline void mul(int *A,int *B,int lim,int *ret){
	static plx a[(1<<C)|5],b[(1<<C)|5],c[(1<<C)|5],d[(1<<C)|5],da,db,dc,dd;
	for(int i=0;i<lim;i++)a[i]=plx(A[i]&M,A[i]>>15),b[i]=plx(B[i]&M,B[i]>>15);
	init_rev(lim);
	fft(a,lim,1),fft(b,lim,1);
	for(int i=0;i<lim;i++){
		int j=(lim-i)&(lim-1);
		da=(a[i]+a[j].conj())*plx(0.5,0);
		db=(a[j].conj()-a[i])*plx(0,0.5);
		dc=(b[i]+b[j].conj())*plx(0.5,0);
		dd=(b[j].conj()-b[i])*plx(0,0.5);
		c[i]=(da*dc)+((da*dd)*plx(0,1));
		d[i]=(db*dd)+((db*dc)*plx(0,1));
	}
	fft(c,lim,-1),fft(d,lim,-1);
	for(int i=0;i<lim;i++){
		ll da=(ll)(d[i].x+0.5)%mod,db=(ll)(d[i].y+0.5)%mod,dc=(ll)(c[i].y+0.5)%mod,dd=(ll)(c[i].x+0.5)%mod;
		ret[i]=((da<<30)+((db+dc)<<15)+dd)%mod;
	}
}
int n,m,a[N],b[N],lim,ans[N];
int main(){
	#ifdef Stargazer
	freopen("lx.in","r",stdin);
	freopen("my.out","w",stdout);
	#endif
	init_w();
	n=read()+1,m=read()+1,mod=read();
	for(int i=0;i<n;i++)a[i]=read()%mod;
	for(int i=0;i<m;i++)b[i]=read()%mod;
	lim=1;
	while(lim<(n+m))lim<<=1;
	mul(a,b,lim,ans);
	for(int i=0;i<n+m-1;i++)cout<<ans[i]<<" ";
}
posted @ 2019-11-09 16:38  Stargazer_cykoi  阅读(160)  评论(0编辑  收藏  举报