任意模数NTT
三模数NTT
不会
本来想写三模数NTT,发现常数太TM大了,还不知道怎么优化,于是就写拆系数FFT吧
首先一个简单的方法就是直接FFT开long double 去跑,发现精度掉得十分严重(值域大于long long),于是考虑如何解决
通过学习bitset的经验我们知道要压位
考虑拆系数
假设多项式 A ∗ B , 模 数 是 P A * B, 模数是P A∗B,模数是P
确 定 一 个 常 数 C 一 般 为 2 15 = 32 , 768 或 P 确定一个常数 C 一般为2^{15}= 32,768 或 \sqrt{P} 确定一个常数C一般为215=32,768或P
考
虑
把
A
i
拆
成
a
i
∗
C
+
b
i
考虑把A_i 拆成 a_i *C+b_i
考虑把Ai拆成ai∗C+bi
把
B
i
拆
成
c
i
∗
C
+
d
i
把B_i 拆成 c_i *C+d_i
把Bi拆成ci∗C+di
即
A
i
=
a
i
∗
C
+
b
i
A_i = a_i *C+b_i
Ai=ai∗C+bi
B
i
=
c
i
∗
C
+
d
i
B_i = c_i *C+d_i
Bi=ci∗C+di
考虑两个数
X
,
Y
X,Y
X,Y相乘
设
X
=
a
∗
C
+
b
设X = a*C+b
设X=a∗C+b
Y
=
c
∗
C
+
d
Y= c *C+d
Y=c∗C+d
X
∗
Y
=
(
a
∗
C
+
b
)
∗
(
c
∗
C
+
d
)
X*Y=(a *C+b)*(c *C+d)
X∗Y=(a∗C+b)∗(c∗C+d)
=
a
c
∗
C
2
+
(
a
d
+
b
c
)
C
+
b
d
=ac*C^2 + (ad+bc)C+bd
=ac∗C2+(ad+bc)C+bd
发现C先不用管,要求的是
a
c
,
a
d
+
b
c
,
b
d
ac,ad+bc,bd
ac,ad+bc,bd
然后把
a
,
b
,
c
,
d
a,b,c,d
a,b,c,d分别求值(DFT),乘起来后再对
a
c
,
a
d
+
b
c
,
b
d
ac,ad+bc,bd
ac,ad+bc,bd做插值(IDFT)就行了
一共是4次DFT+3次IDFT=7次FFT
常数巨大
发现一开始的虚数部并没有用到,参考FFT3次变2次的优化,试试这个能不能行
设
X
=
a
+
b
i
设X = a+bi
设X=a+bi
Y
=
c
+
d
i
Y= c +di
Y=c+di
X ∗ Y = ( a + b i ) ∗ ( c + d i ) = a c − b d + ( b c + a d ) i X*Y=(a+bi)*(c+di)=ac-bd + (bc+ad)i X∗Y=(a+bi)∗(c+di)=ac−bd+(bc+ad)i
然鹅和我们要求的
a
c
,
b
d
,
a
d
+
b
c
ac,bd,ad+bc
ac,bd,ad+bc
现在已经就出来了
a
d
+
b
c
ad+bc
ad+bc
考虑如何求剩下两个
设
X
′
=
a
−
b
i
设X' = a - bi
设X′=a−bi
Y
=
c
+
d
i
Y= c + di
Y=c+di
X
′
∗
Y
=
(
a
−
b
i
)
∗
(
c
+
d
i
)
=
a
c
+
b
d
+
(
a
d
−
b
c
)
i
X'*Y=(a-bi)*(c+di)=ac+bd + (ad-bc)i
X′∗Y=(a−bi)∗(c+di)=ac+bd+(ad−bc)i
然
后
就
知
道
了
实
数
部
然后就知道了实数部
然后就知道了实数部ac+bd
的
值
的值
的值
结
合
上
面
a
c
−
b
d
的
值
就
可
以
求
出
来
a
c
和
b
d
了
结合上面ac-bd的值就可以求出来ac和bd了
结合上面ac−bd的值就可以求出来ac和bd了
这
样
貌
似
只
用
对
X
,
Y
,
X
′
这样貌似只用对X,Y,X'
这样貌似只用对X,Y,X′做3次求值(DFT)
再
对
X
∗
Y
,
X
′
∗
Y
再对X*Y,X'*Y
再对X∗Y,X′∗Y做2次插值就可以了
也就是说一共5次
据说myy有4次的%%%%% orz
code:
#include<bits/stdc++.h>
#define N 4000005
#define ll long long
#define double long double
using namespace std;
const double pi = acos(-1);
const double eps = 0.49;
const int C = 32768;
struct cp {
double a, b;
}X[N], Y[N], Xd[N];
cp operator +(cp x, cp y) {return cp{x.a + y.a, x.b + y.b};}
cp operator -(cp x, cp y) {return cp{x.a - y.a, x.b - y.b};}
cp operator *(cp x, cp y) {return cp{x.a * y.a - x.b * y.b, x.b * y.a + x.a * y.b};}
int rev[N], n, m, P;
void fft(cp *a, int len, int o) {
for(int i = 1; i <= len; i ++) rev[i] = (rev[i >> 1] >> 1) | ((i&1) * len >> 1);
for(int i = 1; i <= len; i ++) if(i < rev[i]) swap(a[i], a[rev[i]]);
for(int i = 2; i <= len; i <<= 1) {
cp wn = cp{cos(2 * pi / i), o * sin(2 * pi / i)};
for(int j = 0, p = i / 2; j + i - 1 <= len; j += i) {
cp w0 = cp{1, 0};
for(int k = j; k < j + p; k ++, w0 = w0 * wn) {
cp x = a[k], y = w0 * a[k + p];
a[k] = x + y;
a[k + p] = x - y;
}
}
}
if(o == -1) for(int i = 0; i <= len; i ++) a[i].a /= len, a[i].b /= len;
}
int main() {
scanf("%d%d%d", &n, &m, &P);
for(int i = 0; i <= n; i ++) {
int x;
scanf("%d", &x);
X[i].a = x / C;
X[i].b = x % C;
Xd[i].a = x / C;
Xd[i].b = -(x % C);
}
for(int i = 0; i <= m; i ++) {
int x;
scanf("%d", &x);
Y[i].a = x / C;
Y[i].b = x % C;
}
int len = 1;
for(; len <= n + m;) len <<= 1;
fft(X, len, 1), fft(Xd, len, 1), fft(Y, len, 1);
for(int i = 0; i <= len; i ++) X[i] = X[i] * Y[i], Xd[i] = Xd[i] * Y[i];
fft(X, len, -1), fft(Xd, len, -1);
for(int i = 0; i <= n + m; i ++) {
ll ac = (X[i].a + Xd[i].a) / 2 + eps;
ll bd = Xd[i].a - ac + eps;
ll bcad = X[i].b + eps;
printf("%lld ", (ac * C % P * C % P + bcad * C % P + bd) % P);
}
return 0;
}
板子一遍过,感动.jpg
任意模数多项式求逆
code:
#include<bits/stdc++.h>
#define N 4000005
#define ll long long
#define double long double
using namespace std;
const double pi = acos(-1);
const double eps = 0.49;
const int C = 32768;
struct cp {
double a, b;
}X[N], Y[N], Xd[N];
cp operator +(cp x, cp y) {return cp{x.a + y.a, x.b + y.b};}
cp operator -(cp x, cp y) {return cp{x.a - y.a, x.b - y.b};}
cp operator *(cp x, cp y) {return cp{x.a * y.a - x.b * y.b, x.b * y.a + x.a * y.b};}
int rev[N], P;
void fft(cp *a, int len, int o) {
for(int i = 1; i <= len; i ++) rev[i] = (rev[i >> 1] >> 1) | ((i&1) * len >> 1);
for(int i = 1; i <= len; i ++) if(i < rev[i]) swap(a[i], a[rev[i]]);
for(int i = 2; i <= len; i <<= 1) {
cp wn = cp{cos(2 * pi / i), o * sin(2 * pi / i)};
for(int j = 0, p = i / 2; j + i - 1 <= len; j += i) {
cp w0 = cp{1, 0};
for(int k = j; k < j + p; k ++, w0 = w0 * wn) {
cp x = a[k], y = w0 * a[k + p];
a[k] = x + y;
a[k + p] = x - y;
}
}
}
if(o == -1) for(int i = 0; i <= len; i ++) a[i].a /= len, a[i].b /= len;
}
void mul(ll *a, ll *b, int n, int m) {
for(int i = 0; i <= n; i ++) {
X[i].a = a[i] / C;
X[i].b = a[i] % C;
Xd[i].a = a[i] / C;
Xd[i].b = -(a[i] % C);
}
for(int i = 0; i <= m; i ++) {
Y[i].a = b[i] / C;
Y[i].b = b[i] % C;
}
int len = 1;
for(; len <= n + m;) len <<= 1;
fft(X, len, 1), fft(Xd, len, 1), fft(Y, len, 1);
for(int i = 0; i <= len; i ++) X[i] = X[i] * Y[i], Xd[i] = Xd[i] * Y[i];
fft(X, len, -1), fft(Xd, len, -1);
for(int i = 0; i <= n + m; i ++) {
ll ac = (X[i].a + Xd[i].a) / 2 + eps;
ll bd = Xd[i].a - ac + eps;
ll bcad = X[i].b + eps;
a[i] = (ac % P * C % P * C % P + bcad % P * C % P + bd % P) % P;
}
for(int i = 0; i <= len; i ++) X[i].a = X[i].b = Y[i].a = Y[i].b = Xd[i].a = Xd[i].b = 0;
}
int qpow(ll x, int y) {
ll ret = 1;
for(; y; y >>= 1, x = x * x % P) if(y & 1) ret = ret * x % P;
return ret;
}
ll c[N], d[N], e[N], n;
void inv(ll *a, ll *b, int sz){
if(sz == 0) {b[0] = qpow(a[0], P - 2); return;}
inv(a, b, sz / 2);
int len = 1;
for(; len <= sz + sz; len <<= 1);
for(int i = 0; i <= sz; i ++) c[i] = a[i];
for(int i = sz + 1; i <= len; i ++) c[i] = 0;
//for(int i = 0; i <= len; i ++) b[i] = (b[i] * 2 % mod - b[i] * b[i] % mod * c[i] % mod + mod) % mod;//Ö±½ÓËã
for(int i = 0; i <= sz; i ++) e[i] = b[i] * 2 % P;
for(int i = sz + 1; i <= len; i ++) e[i] = 0;
for(int i = 0; i <= sz; i ++) d[i] = b[i];
for(int i = sz + 1; i <= len; i ++) d[i] = 0;
mul(b, d, sz, sz); mul(b, c, sz, sz);
for(int i = 0; i <= sz; i ++) b[i] = (e[i] - b[i] + P) % P;
for(int i = sz + 1; i <= len; i ++) b[i] = 0;
}
ll a[N], b[N];
int main() {
scanf("%d", &n);
n --;
P = 1000000007;
for(int i = 0; i <= n; i ++) scanf("%lld", &a[i]);
inv(a, b, n);
for(int i = 0; i <= n; i ++) printf("%lld ", b[i]);
return 0;
}