Codechef Future of draughts
4次FFT的MTT
https://kewth.blog.luogu.org/solution-p4245
拆系数就不说了,把两个多项式\(A(x), B(x)\)分别拆成\(A_0(x), A_1(x)\)和\(B_0(x), B_1(x)\)后,考虑求出它们的点值表示,也就是做DFT。朴素地做DFT需要4次,但是由于这些多项式虚部都为\(0\) ,可以考虑将两次DFT合并成一次。
例如要给两个多项式\(A, B\)做DFT,考虑构造两个多项式:
那么由于\(A, B\)的虚部都为\(0\),\(P, Q\)的每一项系数都互为共轭,同样每一个点值也互为共轭。那么只需对\(P\)做一次DFT,就可以通过共轭\(O(n)\)求出\(Q\)的点值表示。
具体而言
\[Q(\omega_n^k)=\operatorname{conj}(P(\omega_n^{-k})) \]
然后通过\(P, Q\)的点值表示求\(A, B\)的点值表示就是解上面的二元一次方程组,也是可以\(O(n)\)做到的。
于是就可以用两次DFT求出\(A_0, A_1, B_0, B_1\)的点值表示。
接下来需要求\(A, B\)之间的两两乘积。直接乘出来后要对\(A_0 B_0, A_0 B_1, A_1 B_0, A_1 B_1\)四个多项式做IDFT。并且这时候它们的虚部并不为\(0\),不能用上述的方法。
但是上述方法的思想仍可借鉴,考虑构造两个多项式:
通过已知的点值求出此时\(P, Q\)的点值,然后分别对\(P, Q\)做IDFT。由于\(A_0 B_0, A_0 B_1, A_1 B_0, A_1 B_1\)这四个多项式卷起来后的系数表示中虚部一定为\(0\),那么此时\(P\)的实部和虚部就分别为\(A_0(x) B_0(x)\)和\(A_1(x) B_0(x)\),同样\(Q\)的实部和虚部就分别为\(A_0(x) B_1(x)\)和\(A_1(x) B_1(x)\)。
Future of draughts
给出\(T\)张无向图,有\(Q\)次询问,每次给出\(L, R, k\),在第\(L\sim R\)张图上玩耍,先给每张图选一个起点,然后每一步选一个图的非空子集,把这些图中的点移动一步。要在\(k\)步以内结束,结束时所有图都要处在起点。求方案数。
\(T, n\leq 50, k\leq 10^4, Q\leq 2\times 10^5\)。
题解
https://www.cnblogs.com/cjoierShiina-Mashiro/p/12254832.html
可以发现不走其实就是走自环,所以我们给每张图的每个点添加一个自环。
对于第\(i\)张图,设其邻接矩阵为\(A_i\),那么在第\(i\)张图上走\(k\)步回到原点的方案数就是\(\operatorname{tr}(A_i^k)\)。
其中\(\operatorname{tr}(A)\)表示\(A\)主对角线元素的和。
设\(g_{l,r,k}=\prod_{i=l}^r\operatorname{tr}(A_i^k)\),\(f_{l,r,k}\)表示在\(G_l,\dots,G_r\)进行\(k\)轮移动的方案数。
注意到\(g\)可能存在一轮全都走自环的情况,所以我们枚举没有全都走自环的轮数得到
二项式反演得到
注意到这是个卷积的形式,我们可以拆组合数将其化为
因此如果我们能够求出所有的\(g\),那么我们就能在\(O(T^2K\log K)\)的时间复杂度内求出所有\(f\),进而\(O(1)\)地回答每个询问。
要求所有的\(g_{l,r,k}\)就是要求所有的\(g_{i,i,k}=\operatorname{tr}(A_i^k)\),直接做是\(O(TN^3K)\)的,考虑优化。
根据Cayley-Hamilton定理,对于\(A_i\),我们求出其特征多项式\(f_i(\lambda)=|\lambda E-A_i|\),同时求出所有的\(h_{i,k}(\lambda)=\lambda^k\bmod f_i(\lambda)\),那么
特征多项式可以\(O(N^4)\)插值也可以用对角化+上Hessenberg矩阵做到\(O(N^3)\),这里用的是插值。
插值指的是直接把\(\lambda\)代入\((n+1)\)个点值,求完行列式之后插值即可。
因为我们是要求出所有\(k\)的\(h_{i,k}\),所以可以\(O(NK)\)递推。
然后求\(j\in[0,n_i-1)\)的\(\operatorname{tr}(A_i^j)\)直接\(O(N^4)\)暴力就好了。
总时间复杂度为\(O(T(N^4+NK)+T^2K\log K+Q)\)。
因为模数是\(10^9+7\)所以要用MTT,码量增加的同时常数也大大增加了,需要加一些小小的常数优化。
#include <cmath>
#include <cstdio>
#include <cctype>
#include <cstring>
#include <algorithm>
namespace IO {
char ibuf[(1 << 21) + 1], obuf[(1 << 21) + 1], st[15], *iS, *iT, *oS = obuf, *oT = obuf + (1 << 21);
char Get() {
return (iS == iT ? (iT = (iS = ibuf) + fread(ibuf, 1, (1 << 21) + 1, stdin), (iS == iT ? EOF : *iS++))
: *iS++);
}
void Flush() { fwrite(obuf, 1, oS - obuf, stdout), oS = obuf; }
void Put(char x) {
*oS++ = x;
if (oS == oT)
Flush();
}
int read() {
int x = 0, c = Get();
while (!isdigit(c)) c = Get();
while (isdigit(c)) x = x * 10 + c - 48, c = Get();
return x;
}
} // namespace IO
using IO::read;
using ld = long double;
const int P = 1000000007;
const ld pi = acos(-1);
int inc(int a, int b) { return a += b - P, a + (a >> 31 & P); }
int dec(int a, int b) { return a -= b, a + (a >> 31 & P); }
int mul(int a, int b) { return 1ll * a * b % P; }
int pow(int a, int k) {
int r = 1;
for (; k; k >>= 1, a = mul(a, a))
if (k & 1)
r = mul(a, r);
return r;
}
int inv(int a) { return pow(a, P - 2); }
struct matrix {
int a[51][51], n;
matrix() { memset(a, 0, sizeof a); }
int* operator[](int x) { return a[x]; }
};
matrix operator+(matrix a, matrix b) {
for (int i = 1; i <= a.n; ++i)
for (int j = 1; j <= a.n; ++j) a[i][j] = inc(a[i][j], b[i][j]);
return a;
}
matrix operator-(matrix a, matrix b) {
for (int i = 1; i <= a.n; ++i)
for (int j = 1; j <= a.n; ++j) a[i][j] = dec(a[i][j], b[i][j]);
return a;
}
matrix operator*(matrix a, matrix b) {
matrix c;
c.n = a.n;
for (int i = 1; i <= a.n; ++i)
for (int j = 1; j <= a.n; ++j)
for (int k = 1; k <= a.n; ++k) c[i][j] = inc(c[i][j], mul(a[i][k], b[k][j]));
return c;
}
matrix I(int n) {
matrix a;
a.n = n;
for (int i = 1; i <= n; ++i) a[i][i] = 1;
return a;
}
int tr(matrix a) {
int r = 0;
for (int i = 1; i <= a.n; ++i) r = inc(r, a[i][i]);
return r;
}
int det(matrix a) {
int r = 1;
for (int i = 1; i <= a.n; ++i) {
int f = 0;
for (int j = i; j <= a.n; ++j)
if (a[j][i]) {
if (!f) {
if (f = 1, i ^ j)
r = dec(0, r), std::swap(a.a[j], a.a[i]);
r = mul(r, a[i][i]);
for (int k = i, x = inv(a[i][i]); k <= a.n; ++k) a[i][k] = mul(a[i][k], x);
} else
for (int k = a.n; k >= i; --k) a[j][k] = dec(a[j][k], mul(a[i][k], a[j][i]));
}
if (!f)
return 0;
}
return r;
}
void work(matrix a, int* f) {
static int r[51], t[51];
int n = a.n;
memset(f, 0, (n + 1) << 2);
for (int i = 1; i <= n; ++i)
for (int j = 1; j <= n; ++j) a[i][j] = dec(0, a[i][j]);
for (int i = 0; i <= n; ++i) r[i] = det(a = a + I(n));
for (int i = 0, s; i <= n; ++i) {
memset(t, 0, (n + 1) << 2), t[0] = 1, s = r[i];
for (int j = 0, k; j <= n; ++j)
if (i ^ j)
for (s = mul(s, inv(dec(i, j))), k = n; ~k; --k)
t[k] = inc(mul(P - j - 1, t[k]), k ? t[k - 1] : 0);
for (int j = 0; j <= n; ++j) f[j] = inc(f[j], mul(s, t[j]));
}
}
struct complex {
ld x, y;
complex(ld a = 0, ld b = 0) { x = a, y = b; }
} w[32769];
complex operator+(complex a, complex b) { return { a.x + b.x, a.y + b.y }; }
complex operator-(complex a, complex b) { return { a.x - b.x, a.y - b.y }; }
complex operator*(complex a, ld x) { return { a.x * x, a.y * x }; }
complex operator/(complex a, ld x) { return { a.x / x, a.y / x }; }
complex operator*(complex a, complex b) { return { a.x * b.x - a.y * b.y, a.x * b.y + a.y * b.x }; }
complex conj(complex a) { return { a.x, -a.y }; }
int rev[32769], lim;
void init(int n) {
static ld x;
lim = 1 << (32 - __builtin_clz(n));
for (int i = 1; i < lim; ++i) rev[i] = (rev[i >> 1] >> 1) | (i & 1 ? lim >> 1 : 0);
for (int i = 0; i < lim; ++i) x = pi * i / lim, w[i] = { cos(x), sin(x) };
}
void FFT(complex* a, int f) {
static complex x;
if (!~f) // another implementation for IDFT
std::reverse(a + 1, a + lim);
for (int i = 1; i < lim; ++i)
if (i < rev[i])
std::swap(a[i], a[rev[i]]);
for (int i = 1; i < lim; i <<= 1)
for (int l = i << 1, j = 0; j < lim; j += l)
for (int k = 0; k < i; ++k)
x = a[i + j + k] * w[lim / i * k], a[i + j + k] = a[j + k] - x, a[j + k] = a[j + k] + x;
if (!~f)
for (int i = 0; i < lim; ++i) a[i] = a[i] / lim;
}
void DFT(complex* a, complex* b) {
static complex t[2][32769], x, y;
for (int i = 0; i < lim; ++i) t[0][i] = a[i] + b[i] * complex{ 0, 1 };
FFT(t[0], 1), memcpy(t[1], t[0], lim << 5), std::reverse(t[1] + 1, t[1] + lim);
for (int i = 0; i < lim; ++i)
x = t[0][i], y = conj(t[1][i]), a[i] = (x + y) / 2, b[i] = (x - y) * complex{ 0, -1 } / 2;
}
void IDFT(complex* a, complex* b) {
for (int i = 0; i < lim; ++i) a[i] = a[i] + b[i] * complex{ 0, 1 };
FFT(a, -1);
for (int i = 0; i < lim; ++i) b[i] = { a[i].y, 0 }, a[i].y = 0;
}
void MTT(int* a, int* b, int* c, int n, int m) {
static complex t[4][32769], A, B, C, D;
static int r[4] = { 1 << 30, 1 << 15, 1 << 15, 1 };
init(n + m - 1), memset(t, 0, sizeof t);
for (int i = 0; i < n; ++i) t[0][i].x = a[i] >> 15, t[1][i].x = a[i] & 32767;
for (int i = 0; i < m; ++i) t[2][i].x = b[i] >> 15, t[3][i].x = b[i] & 32767;
DFT(t[0], t[1]), DFT(t[2], t[3]);
for (int i = 0; i < lim; ++i)
A = t[0][i] * t[2][i], B = t[0][i] * t[3][i], C = t[1][i] * t[2][i], D = t[1][i] * t[3][i],
t[0][i] = A, t[1][i] = B, t[2][i] = C, t[3][i] = D;
IDFT(t[0], t[1]), IDFT(t[2], t[3]), memset(c, 0, (n + m) << 2);
for (int i = 0; i < 4; ++i)
for (int j = 0; j < n + m; ++j) c[j] = inc(c[j], mul(r[i], (long long)(t[i][j].x + 0.5) % P));
}
int f[51][51][10001], g[51][51][10001], fac[10001], ifac[10001], mx[51][51], T, Q;
struct query {
int l, r, k;
} q[200001];
int main() {
T = read(), fac[0] = 1;
for (int i = 1; i <= 10000; ++i) fac[i] = mul(fac[i - 1], i);
ifac[10000] = inv(fac[10000]);
for (int i = 10000; i; --i) ifac[i - 1] = mul(ifac[i], i);
for (int t = 1; t <= T; ++t) {
int n = read(), m = read();
static matrix A, E;
static int r[51], s[51], f[51];
A = I(n), E = I(n);
for (int i = 1, u, v; i <= m; ++i) u = read(), v = read(), A[u][v] = A[v][u] = 1;
work(A, f), memset(s, 0, n << 2), s[0] = 1;
for (int i = 0; i < n; ++i) r[i] = tr(E), E = E * A;
for (int i = 0, x; i <= 1e4; ++i) {
for (int j = 0; j < n; ++j) g[t][t][i] = inc(g[t][t][i], mul(s[j], r[j]));
x = s[n - 1], memcpy(s + 1, s, (n - 1) << 2), s[0] = 0;
if (x)
for (int j = 0; j < n; ++j) s[j] = dec(s[j], mul(x, f[j]));
}
}
Q = read();
for (int i = 1; i <= Q; ++i)
q[i] = { read(), read(), read() }, mx[q[i].l][q[i].r] = std::max(mx[q[i].l][q[i].r], q[i].k);
for (int i = 1; i <= T; ++i)
for (int j = i + 1; j <= T; ++j)
for (int k = 0; k <= 10000; ++k) g[i][j][k] = mul(g[i][j - 1][k], g[j][j][k]);
for (int i = 1; i <= T; ++i)
for (int j = i; j <= T; ++j) {
for (int k = 0; k <= mx[i][j]; ++k)
g[i][j][k] = mul(g[i][j][k], ifac[k]), f[i][j][k] = k & 1 ? dec(0, ifac[k]) : ifac[k];
MTT(g[i][j], f[i][j], f[i][j], mx[i][j] + 1, mx[i][j] + 1), f[i][j][0] = 0;
for (int k = 1; k <= mx[i][j]; ++k) f[i][j][k] = inc(f[i][j][k - 1], mul(f[i][j][k], fac[k]));
}
for (int i = 1; i <= Q; ++i) printf("%d\n", f[q[i].l][q[i].r][q[i].k]);
}