任意模数NTT

任意模数\(NTT\)

众所周知,为了满足单位根的性质,\(NTT\)需要质数模数,而且需要能写成\(a2^{k} + 1\)\(2^k \ge n\)
比较常用的有\(998244353,1004535809,469762049\),这三个原根都是\(3\)

如果要任意模数怎么办?
\(n\)次多项式在模\(m\)下乘积,最终系数一定不会大于\(nm^2\)
所以我们找三个模数分别做\(NTT\)再合并一下就好辣

但这样的合并结果会爆\(long long\)
需要用高精吗?
可以使用一些技巧
我们要合并的是

\[\left \{ \begin{aligned} x \equiv a_1 \pmod {m_1} \\ x \equiv a_2 \pmod {m_2} \\ x \equiv a_3 \pmod {m_3} \\ \end{aligned} \right. \]

我们先在\(long long\)范围内合并前两个

\[\left \{ \begin{aligned} x \equiv A \pmod M \\ x \equiv a_3 \pmod {m_3} \\ \end{aligned} \right. \]

由于最后结果模\(M\)\(A\),模\(m_3\)\(a_3\)
设最后的答案是

\[ans = kM + A \]

\(k\)需要满足

\[kM + A \equiv a_3 \pmod {m_3} \]

所以\(k\)一定是在模\(m_3\)意义下求出的,为

\[k \equiv (a_3 - A)M^{-1} \pmod {m_3} \]

求出\(k\)后就可以直接在原模数意义下求出

\[ans = kM + A \]

在第一次合并的时候需要快速乘

做三次\(NTT\)常数有够大的

模板题

#include<algorithm>
#include<iostream>
#include<cstdlib>
#include<cstring>
#include<cstdio>
#include<vector>
#include<queue>
#include<cmath>
#include<map>
#define LL long long int
#define REP(i,n) for (int i = 1; i <= (n); i++)
#define Redge(u) for (int k = h[u],to; k; k = ed[k].nxt)
#define cls(s,v) memset(s,v,sizeof(s))
#define mp(a,b) make_pair<int,int>(a,b)
#define cp pair<int,int>
using namespace std;
const int maxn = 400005,maxm = 100005,INF = 0x3f3f3f3f;
inline int read(){
	int out = 0,flag = 1; char c = getchar();
	while (c < 48 || c > 57){if (c == '-') flag = 0; c = getchar();}
	while (c >= 48 && c <= 57){out = (out << 1) + (out << 3) + c - 48; c = getchar();}
	return flag ? out : -out;
}
int pr[]={469762049,998244353,1004535809};
int R[maxn];
inline LL qpow(LL a,LL b,LL p){
	LL re = 1; a %= p;
	for (; b; b >>= 1,a = a * a % p)
		if (b & 1) re = re * a % p;
	return re;
}
struct FFT{
	int G,P,A[maxn];
	void NTT(int* a,int n,int f){
		for (int i = 0; i < n; i++) if (i < R[i]) swap(a[i],a[R[i]]);
		for (int i = 1; i < n; i <<= 1){
			int gn = qpow(G,(P - 1) / (i << 1),P);
			for (int j = 0; j < n; j += (i << 1)){
				int g = 1,x,y;
				for (int k = 0; k < i; k++,g = 1ll * g * gn % P){
					x = a[j + k],y = 1ll * g * a[j + k + i] % P;
					a[j + k] = (x + y) % P,a[j + k + i] = (x + P - y) % P;
				}
			}
		}
		if (f == 1) return;
		int nv = qpow(n,P - 2,P); reverse(a + 1,a + n);
		for (int i = 0; i < n; i++) a[i] = 1ll * a[i] * nv % P;
	}
}fft[3];
int F[maxn],G[maxn],B[maxn],deg1,deg2,deg,md;
LL ans[maxn];
LL inv(LL n,LL p){return qpow(n % p,p - 2,p);}
LL mul(LL a,LL b,LL p){
	LL re = 0;
	for (; b; b >>= 1,a = (a + a) % p)
		if (b & 1) re = (re + a) % p;
	return re;
}
void CRT(){
	deg = deg1 + deg2;
	LL a,b,c,t,k,M = 1ll * pr[0] * pr[1];
	LL inv1 = inv(pr[1],pr[0]),inv0 = inv(pr[0],pr[1]),inv3 = inv(M % pr[2],pr[2]);
	for (int i = 0; i <= deg; i++){
		a = fft[0].A[i],b = fft[1].A[i],c = fft[2].A[i];
		t = (mul(a * pr[1] % M,inv1,M) + mul(b * pr[0] % M,inv0,M)) % M;
		k = ((c - t % pr[2]) % pr[2] + pr[2]) % pr[2] * inv3 % pr[2];
		ans[i] = ((k % md) * (M % md) % md + t % md) % md;
	}
}
void conv(){
	int n = 1,L = 0;
	while (n <= (deg1 + deg2)) n <<= 1,L++;
	for (int i = 1; i < n; i++) R[i] = (R[i >> 1] >> 1) | ((i & 1) << (L - 1));
	for (int u = 0; u <= 2; u++){
		fft[u].G = 3; fft[u].P = pr[u];
		for (int i = 0; i <= deg1; i++) fft[u].A[i] = F[i];
		for (int i = 0; i <= deg2; i++) B[i] = G[i];
		for (int i = deg2 + 1; i < n; i++) B[i] = 0;
		fft[u].NTT(fft[u].A,n,1); fft[u].NTT(B,n,1);
		for (int i = 0; i < n; i++) fft[u].A[i] = 1ll * fft[u].A[i] * B[i] % pr[u];
		fft[u].NTT(fft[u].A,n,-1);
	}
}
int main(){
	deg1 = read(); deg2 = read(); md = read();
	for (int i = 0; i <= deg1; i++) F[i] = read();
	for (int i = 0; i <= deg2; i++) G[i] = read();
	conv(); CRT();
	for (int i = 0; i <= deg; i++) printf("%lld ",ans[i]);
	return 0;
}

posted @ 2018-07-12 08:45  Mychael  阅读(2458)  评论(6编辑  收藏  举报