Loading

学习笔记(8)多项式

强调强调再强调:多项式的空间要开到 \(4\) 倍!!

引入

对于多项式乘法,暴力计算只能做到 \(O(n^2)\),而通过将多项式系数 \(c_i\) 利用复平面内的单位根 \(\omega_{n}^{s}\) 转换为 \(F(\omega_{n}^{s})\) 的点值表达形式,可以利用复数的运算降低复杂度(简单来说就是借助单位根实现:系数->点值表达,运算->系数 的过程)

快速傅里叶变换(\(FFT\)

对于形如以下形式的多项式

\[g(x) = c_{0} + c_{1}a_{1} + c_{2}a_{2}^{2} + c_{3}a_{3}^{3} + \ldots + c_{n}a_{n}^{n} \]

\(FT\)(傅里叶变换) 主要依据以下公式实现系数与点值表达之间的变换:

\[\large F( \omega_{n}^{s}) = \sum_{i = 0}^{n - 1} f(i) \omega_{n}^{si} \]

\[\large f(i) = \frac{1}{n} \sum_{i = 0}^{n - 1} F( \omega_{n}^{s}) \omega_{n}^{-si} \]

其中 \(f(i)\) 为原多项式的系数

然而,直接对多项式用以上公式变形仍然是总复杂度 \(O(n^{2})\) 的,递归实现下的空间复杂度也未免太过可观,于是——

\(FFT\)(快速傅里叶变换) : 我们对原多项式的系数按照顺序的奇偶性进行划分,分治地进行求解后合并答案(分治 \(FFT\) 是“变种”,类似于 \(cdq\) 分治而非传统分治)

将正变换的柿子稍加处理:

\[\large h( \omega_{n}^{s}) = \sum_{i = 0}^{n - 1} f(i) \omega_{n}^{si} \]

\[= \sum_{i = 0}^{\lfloor \frac{n}{2} \rfloor} f_{2i} \omega_{\frac{n}{2}}^{2is} + \sum_{i = 0}^{\lfloor \frac{n}{2} \rfloor } f_{2i + 1} \omega_{\frac{n}{2}}^{(2i + 1)s} \]

\[= \sum_{i \in even}^{n} f_{i} \omega_{\frac{n}{2}}^{is} + \omega_{n}^{s} \times \sum_{i \in odd}^{n} f_{i} \omega_{\frac{n}{2}}^{is} \]

\(h_e()\) 为偶数项(\(even\)),\(h_o()\) 为奇数项(\(odd\)),则整理得如下柿子(\(s\) 在求解过程中视作 \(\frac{n}{2} - 1\)):

\[\large F( \omega_{n}^{s}) = f_{e}( \omega_{\frac{n}{2}}^{s}) + \omega_{n}^{s} \times f_{o}( \omega_{\frac{n}{2}}^{s}) \]

\[\large F( \omega_{n}^{s + \frac{n}{2}}) = f_{e}( \omega_{\frac{n}{2}}^{s}) - \omega_{n}^{s} \times f_{o}( \omega_{\frac{n}{2}}^{s}) \]

于是可以实现 \(2^{i}\) 规模之间的不同系数项的合并,实现(本来在不知道系数划分的情况下只能递归求解后合并,然而通过神秘操作使得递归实现可以被改为递推实现,以下为递推实现):

//【神秘操作】
for(int len = 1; len < lim; len <<= 1){//枚举要合并的答案区间长度
	complex Wn(cos(Pi / len), opt * sin(Pi / len));//逆变换只需要改变系数
	for(int i = 0; i < lim; i += (len << 1)){//枚举在一段区间中的位置,只需要计算左区间
		complex w(1, 0);
		for(int k = 0; k < len; k++, w = w * Wn){
			complex x = A[i + k], y = w * A[i + k + len];
			A[i + k] = x + y;
			A[i + k + len] = x - y;
		}
	}
}

然后通过“观察”发现,系数 \(c_i\) 在经过划分形成的新序列中,其位置下标与原序列中下标呈二进制的翻转关系:

\[c_0\to c_0 \]

\[c_1 \to c_4 \]

\[c_3 \to c_6 \]

\[...... \]

于是可以通过对原序数序列进蝴蝶反转 直接求出要合并的“系数项”:

void rever(){
	for(int i = 0; i < lim; i++){
		rev[i] = (rev[i >> 1] >> 1) | ((i & 1) << (cnt - 1));
	}
}

然后直接倍增合并即可

于是就得到了一个变换复杂度 \(O(nlogn)\),计算复杂度 \(O(n)\),再进行一次逆变换将点值转回原系数(代码实现中只需要对 \(\sin{n}\) 乘上一个 \(-1\) 的系数),总复杂度为 \(O(nlogn)\) 的快速解决多项式乘法的算法——\(FFT\)

完整代码:(例题:【模板】多项式乘法(FFT)

点击查看代码
#include <iostream>
#include <cstdio>
#include <cmath>
#define N 2500006
using namespace std;
int n, m, lim, cnt;
int rev[N];
const double Pi = acos(-1.0);
struct complex{//怕毒瘤出题人卡std敲的复数结构体,万用头会冲突只能忍痛割爱qaq
	double x, y;
	complex(double xx = 0, double yy = 0){x = xx, y = yy;}
	complex operator + (complex &b)const{return complex(x + b.x, y + b.y);}
	complex operator - (complex &b)const{return complex(x - b.x, y - b.y);}
	complex operator * (complex &b)const{return complex(x * b.x - y * b.y, x * b.y + y * b.x);}
}F[N], G[N];

inline void init(int len){
	cnt = 32 - __builtin_clz(len);//直接计算大于 n 的最小的 2^{k}
	lim = 1 << cnt;//确定需要的“n”
	for(int i = 0; i < lim; i++) rev[i] = (rev[i >> 1] >> 1) | ((i & 1) << (cnt - 1));//蝴蝶变换 
}

inline void FFT(complex *A, int opt){
	for(int i = 0; i < lim; i++) if(i < rev[i]) swap(A[i], A[rev[i]]);
	for(int len = 1; len < lim; len <<= 1){
		complex Wn(cos(Pi / len), opt * sin(Pi / len));//当前单位根 
		for(int i = 0; i < lim; i += (len << 1)){
			complex w(1, 0);
			for(int k = 0; k < len; k++, w = w * Wn){
				complex x = A[i + k], y = w * A[i + len + k];
				A[i + k] = x + y;
				A[i + len + k] = x - y;
			}
		}
	}
}

void Mul(complex *A, int len1, complex *B, int len2){
	init(len1 + len2);
	FFT(A, 1);//将两个多项式的系数转为点值表示 
	FFT(B, 1);
	for(int i = 0; i < lim; i++) A[i] = A[i] * B[i];
	FFT(A, -1);//逆变换转回实数 
}

inline int read(){
	char ch = getchar(); int x = 0, f = 1;
	while(!isdigit(ch)){if(ch == '-') f = -1; ch = getchar();}
	while(isdigit(ch)){x = (x << 3) + (x << 1) + (ch ^ 48); ch = getchar();}
	return x * f;
}

int main(){
//	freopen(".in", "r", stdin);
//	freopen(".out", "w", stdout);
	n = read(), m = read();
	for(int i = 0; i <= n; i++) scanf("%lf", &F[i].x);
	for(int i = 0; i <= m; i++) scanf("%lf", &G[i].x);
	Mul(F, n, G, m);
	for(int i = 0; i <= n + m; i++) printf("%d ", int(F[i].x / lim + 0.5));
	return 0;
}

不过某人太菜了导致现在多项式求逆及模意义下的运算还不会、、

快速数论变换(\(NTT\)

对,可以 \(Update\)

由于 \(FFT\) 是通过将系数转为复数的点值表达的形式进行变换,因此在面对模意义下的运算及取逆,除法运算时力不从心

于是 \(NTT\)(快速数论变换) 将系数转为具有相似性质的原根进行变换,可以达到相同的效果(以及可以取模的性质)

code:

点击查看代码
#include <bits/stdc++.h>
#define N 4000005
#define p 998244353
using namespace std;
int n, m, lim, cnt, inv;
const int g = 3, gi = 332748118;
int rev[N], F[N], G[N];

int add(int x, int y){return (x + y >= p)? x - p + y : x + y;}
int sub(int x, int y){return (x < y)? x - y + p : x - y;}
int mul(int x, int y){return 1ll * x * y % p;}
int qpow(int x, int k){
	int res = 1;
	for(; k; k >>= 1, x = mul(x, x)) if(k & 1) res = mul(res, x);
	return res;
}

void init(int len){
	cnt = 32 - (__builtin_clz(len));
	lim = 1 << cnt;
	inv = qpow(lim, p - 2);
	for(int i = 0; i < lim; i++) rev[i] = (rev[i >> 1] >> 1) | ((i & 1) << (cnt - 1));
}

void NTT(int *A, int opt){
	for(int i = 0; i < lim; i++) if(i < rev[i]) swap(A[i], A[rev[i]]);
	for(int len = 1; len < lim; len <<= 1){
		int Wn = qpow((opt == 1)? g : gi, (p - 1) / (len << 1));
		for(int i = 0; i < lim; i += (len << 1)){
			int w = 1;
			for(int k = 0; k < len; k++, w = mul(w, Wn)){
				int x = A[i + k], y = mul(w, A[i + k + len]);
				A[i + k] = add(x, y);
				A[i + k + len] = sub(x, y); 
			}
		}
	}
	if(opt == -1) for(int i = 0; i < lim; i++) A[i] = mul(A[i], inv);
}

void Mul(int *A, int len1, int *B, int len2){
	init(len1 + len2);
	NTT(A, 1);
	NTT(B, 1);
	for(int i = 0; i < lim; i++) A[i] = mul(A[i], B[i]);
	NTT(A, -1);
}

inline int read(){
	char ch = getchar(); int x = 0, f = 1;
	while(!isdigit(ch)){if(ch == '-') f = -1; ch = getchar();}
	while(isdigit(ch)){x = (x << 3) + (x << 1) + (ch ^ 48); ch = getchar();}
	return x * f;
}

int main(){
//	freopen(".in", "r", stdin);
//	freopen(".out", "w", stdout);
	n = read(), m = read();
    for(int i = 0; i <= n; i++) F[i] = read();
    for(int i = 0; i <= m; i++) G[i] = read();
	Mul(F, n, G, m);
	for(int i = 0; i <= n + m; i++) printf("%d ", F[i]);
	return 0;
}

另外预处理 \(w\) 可以极大的减小常数(比如把原本最多跑到 \(1.14s\) 的程序硬生生压到了 \(828ms\)

极致卡常版本
#include <iostream>
#define N 2500005
#define p 998244353
namespace fastIO{
	char buf1[0xfffff], buf2[0xfffff], *p1 = buf1, *p2 = buf1, *tp;
	#define getchar() (p1 == p2 && (p2 = (p1 = buf1) + fread(buf1, 1, 0xfffff, stdin), p1 == p2)? EOF : *p1++)
	template <typename T>
	inline void read(T &x){
		x = 0;
		char ch = getchar(); int f = 1;
		while(!isdigit(ch)){if(ch == '-') f = -1; ch = getchar();}
		while(isdigit(ch)){x = (x << 3) + (x << 1) + (ch ^ 48); ch = getchar();}
		x *= f;
	}
	template<typename T>
	void write(T x){
		if(x > 9) write(x / 10);
		*tp++ = (x % 10) ^ 48;
	}
	template<typename T>
	inline void writeln(T x, char sep = '\n'){
		tp = buf2;
		if(x < 0) putchar('-'), x = -x;
		write(x), fwrite(buf2, tp - buf2, 1, stdout);
		putchar(sep);
	}
	#undef getchar
}

int n, m, lim = 1, cnt, inv;
constexpr int g = 3, gi = 332748118;
int rev[N], F[N], G[N], w[N];

inline int add(int x, int y){return (x + y >= p)? x - p + y : x + y;}
inline int sub(int x, int y){return (x < y)? x - y + p : x - y;}
inline int mul(int x, int y){return 1ll * x * y % p;}
inline int qpow(int x, int k){
	int res = 1;
	for(; k; k >>= 1, x = mul(x, x)) if(k & 1) res = mul(res, x);
	return res;
}

inline void NTT(int *A, int opt){
	for(int i = 0; i < lim; i++){
		if(i < rev[i]){
			int t = A[i];
			A[i] = A[rev[i]];
			A[rev[i]] = t;
		}
	}
	for(int len = 1; len < lim; len <<= 1){
		int Wn = qpow((opt == 1)? g : gi, (p - 1) / (len << 1));
		w[0] = 1;
		for(int i = 1; i <= len; i++) w[i] = mul(w[i - 1], Wn);
		for(int i = 0; i < lim; i += (len << 1)){
			for(int k = 0; k < len; k++){
				int x = A[i + k], y = mul(w[k], A[i + k + len]);
				A[i + k] = add(x, y);
				A[i + k + len] = sub(x, y); 
			}
		}
	}
	if(opt == -1) for(int i = 0; i < lim; i++) A[i] = mul(A[i], inv);
}

int main(){
//	freopen(".in", "r", stdin);
//	freopen(".out", "w", stdout);
	fastIO::read(n), fastIO::read(m);
    for(int i = 0; i <= n; i++) fastIO::read(F[i]);
    for(int i = 0; i <= m; i++) fastIO::read(G[i]);
	while(lim <= n + m) lim <<= 1, ++cnt;
	inv = qpow(lim, p - 2);
	for(int i = 0; i < lim; i++) rev[i] = (rev[i >> 1] >> 1) | ((i & 1) << (cnt - 1));
	NTT(F, 1), NTT(G, 1);
	for(int i = 0; i < lim; i++) F[i] = mul(F[i], G[i]);
	NTT(F, -1);
	for(int i = 0; i <= n + m; i++) fastIO::writeln(F[i], ' ');
	return 0;
}

多项式求逆

求多项式 \(B(x)\) 满足 \(B(x)A(x) \equiv 1 \bmod x^{n}\)

原理:牛顿迭代

对于多项式 \(A(x)\) ,不妨记其逆多项式为 \(B_{t}(x) \equiv A^{-1}(x) \bmod x^{2^{t}}\),即:

\[\large A(x)B_{t}(x) \equiv 1 \bmod x^{2^{t}} \]

\(B_{t}(x)\) 换为 \(u\),令 \(G(u) = 1 - uA(x)\),显然这样构造有 \(G(u) \equiv 0 \bmod x^{2^{t}}\)(牛顿迭代中一般令构造的泛函数 \(\equiv 0\) 以便改造柿子,注意这只是一种表达,不是函数,因此求导时不遵从复合函数求导原则

随后用泰勒展开可以得到 \(G(u)\)\(B_{t}(x)\) 处的展开(只展开两项即可,因为 \(B_{t + 1}(x) \equiv B_{t}(x) \bmod x^{n}\),所以 \(B_{t + 1}(x) - B_{t}(x) \bmod x^{2n}\) 剩余的最低次项为 \(x^{n}\) 项,当 \(i\) 取到 \(2\) 时,在 \(\bmod x^{2n}\) 意义下后面的高次项被全部模掉,因此 \(i\) 只需要取到 \(1\)):

\[\large G(u) = G(B_{t}(x)) + \frac{G^{'}(B_{t}(x))}{1!} \times (u - B_{t}(x)) \]

移项得:

\[\large u = B_{t}(x) - \frac{G(B_{t}(x))}{G^{'}(B_{t}(x))} \]

\(B_{t + 1}(x)\) 代入 \(u\) 得:

\[\large B_{t + 1}(x) = B_{t}(x) - \frac{G(B_{t}(x))}{G^{'}(B_{t}(x))} \]

回代泛函数,整理柿子:

\[\large B_{t + 1}(x) = B_{t}(x) - \frac{1 - A(x)B_{t}(x)}{(1 - A(x)B_{t}(x))^{'}} \]

\[\large B_{t + 1}(x) = B_{t}(x) - \frac{1 - A(x)B_{t}(x)}{-A(x)} \]

\[\large B_{t + 1}(x) = B_{t}(x) + B_{t}(x) \times (1 - A(x)B_{t}(x)) \]

最后得到:

\[\large B_{t + 1}(x) = 2B_{t}(x) - A(x)B_{t}^{2}(x) \]

于是针对上式即可递归(倍增)地求解 \(B_{t}(x)\)(即所求的 \(A^{-1}(x)\)),用 \(FFT/NTT\) 即可(不过一般情况下会需要取模所以多用 \(NTT\)),边界 \(B(x_{0}) \equiv A^{-1}(x_{0}) \bmod p\)

code:

int C[N];//这里把辅助数组C提到外面防止频繁定义炸空间
void Inv(int *A, int *B, int len){
	if(!len){B[0] = qpow(A[0], p - 2); return;}
	Inv(A, B, len >> 1);
	init(len << 1);
	for(int i = 0; i < lim; i++) C[i] = 0;
	for(int i = 0; i <= len; i++) C[i] = A[i];
	NTT(C, 1);
	NTT(B, 1);
	for(int i = 0; i < lim; i++) B[i] = mul(B[i], sub(2, mul(B[i], C[i])));
	NTT(B, -1);
	for(int i = len + 1; i < lim; i++) B[i] = 0;//将答案记录在B数组
}

多项式开根

求多项式 \(B(x)\) 满足 \(B(x)^{2} \equiv A(x) \bmod x^{n}\)

构造泛函数 \(G(B_{t}(x)) = B_{t}^{2}(x) - A(x)\),有 \(G(u) \equiv 0\),仍然套用牛顿迭代的柿子:

\[B_{t + 1}(x) = B_{t}(x) - \frac{G(B_{t}(x))}{G^{'}(B_{t}(x))} \]

回代得:

\[\large B_{t + 1}(x) = b_{t}(x) - \frac{b_{t}^{2}(x) - A(x)}{2B_{t}(x)} = \frac{B_{t}(x)}{2} - \frac{A(x)}{2B_{t}(x)} \]

然后调用多项式求逆倍增地去算就行了,code:

int D[N], Bi[N];
void Sqrt(int *A, int *B, int len){
	if(!len){B[0] = 1; return;}
	Sqrt(A, B, len >> 1);
	for(int i = 0; i < lim; i++) Bi[i] = 0;
	Inv(B, Bi, len);
	init(len << 1);
	for(int i = 0; i < lim; i++) D[i] = 0;
	for(int i = 0; i <= len; i++) D[i] = A[i];
	NTT(D, 1);
	NTT(B, 1);
	NTT(Bi, 1);
	for(int i = 0; i < lim; i++) B[i] = mul(add(B[i], mul(D[i], Bi[i])), inv2);
	NTT(B, -1);
	for(int i = len + 1; i < lim; i++) B[i] = 0;
}

不过这里会出现一个问题是边界 \(B(x_{0})\) 是直接通过 \(\sqrt{A(x_{0}) = 1} = 1\) 计算的,当 \(A(x_{0}) \neq 1\) 时需要求相应的二次剩余(虽然但是在【模板】多项式开根(加强版)中貌似直接枚举 \(0\) ~ \(p - 1\) 直接找就水过去了、、)

似乎一般通过 \(Cipolla\) 算法结合欧拉判别式求解形如 \(x^{k} \equiv a \bmod p\) 的问题,其中 \(p\) 为奇素数。

首先需要引入一个符号(勒根德 \(Legendre\) 符号):

\[\large \left( \frac{a}{p} \right) = \begin{cases} 0, & p|a, \\ 1, & (p \not{|} a) \land ((\exists x \in Z), x^{2} \equiv a \bmod p),\\ -1, & otherwise \end{cases}\]

根据欧拉判别法

  1. \(a\) \(p\) 的二次剩余当且仅当 \(a^{\frac{p - 1}{2}} \equiv 1 \bmod p\)

  2. \(a\) 不是 \(p\) 的二次剩余当且仅当 \(a^{\frac{p - 1}{2}} \equiv -1 \bmod p\)

\(\left( \frac{a}{p} \right) \equiv a^{\frac{p - 1}{2}} \bmod p\)

随机寻找 \(r\) 满足 \(r^{2} - a\) 为二次非剩余,则 \((r - x)^{\frac{p - 1}{2}} \bmod (x^{2} - (r^{2} - a))\) 为其中一个解。由于有 \(\frac{p - 1}{2}\) 种选择可以使得 \(r^{2} - a\) 为二次非剩余,使用随机方法平均约两次可得 \(r\)

点击查看代码
#include <bits/stdc++.h>
#define N 400005
#define p 998244353
#define ll long long
using namespace std;
int n, lim, cnt, inv, inv2 = 499122177, w;
int F[N], G[N], rev[N];

constexpr int g = 3, gi = 332748118;
inline int add(int x, int y){return (x + y >= p)? x - p + y : x + y;}
inline int sub(int x, int y){return (x < y)? x - y + p : x - y;}
inline int mul(int x, int y){return 1ll * x * y % p;}
inline int qpow(int x, int k){
	int res = 1;
	for(; k; k >>= 1, x = mul(x, x)) if(k & 1) res = mul(res, x);
	return res;
}

struct pll{
	int x, y;
	pll(int xx = 0, int yy = 0){x = xx, y = yy;}
	pll operator * (pll &b)const{return pll(add(mul(x, b.x), mul(w, mul(y, b.y))), add(mul(x, b.y), mul(y, b.x)));}
};
inline int fpow(pll x, int k){
	pll res = pll(1, 0);
	for(; k; k >>= 1, x = x * x) if(k & 1) res = res * x;
	return res.x;
}

int Cipolla(int x){
	srand(time(0));
	if(qpow(x, (p - 1) >> 1) == p - 1) return -1;
	while(1){
		int a = (1ll * rand() << 15 | rand()) % p;
		w = sub(mul(a, a), x);
		if(qpow(w, (p - 1) >> 1) == p - 1){
			int res = fpow(pll(a, 1), (p + 1) >> 1);
			return min(res, p - res);
		}
	}
}

inline void init(int len){
	cnt = 32 - __builtin_clz(len);
	lim = 1 << cnt;
	inv = qpow(lim, p - 2);
	for(int i = 0; i < lim; i++) rev[i] = (rev[i >> 1] >> 1) | ((i & 1) << (cnt - 1));
}

inline void NTT(int *A, int opt){
	for(int i = 0; i < lim; i++) if(i < rev[i]) swap(A[i], A[rev[i]]);
	for(int len = 1; len < lim; len <<= 1){
		int Wn = qpow((opt == 1)? g : gi, (p - 1) / (len << 1));
		for(int i = 0; i < lim; i += (len << 1)){
			int w = 1;
			for(int k = 0; k < len; k++, w = mul(w, Wn)){
				int x = A[i + k], y = mul(w, A[i + k + len]);
				A[i + k] = add(x, y);
				A[i + k + len] = sub(x, y);
			}
		}
	}
	if(opt ^ 1) for(int i = 0; i < lim; i++) A[i] = mul(A[i], inv);
}

int C[N];
void Inv(int *A, int *B, int len){
	if(!len){B[0] = qpow(A[0], p - 2); return;}
	Inv(A, B, len >> 1);
	init(len << 1);
	for(int i = 0; i < lim; i++) C[i] = 0;
	for(int i = 0; i <= len; i++) C[i] = A[i];
	NTT(C, 1);
	NTT(B, 1);
	for(int i = 0; i < lim; i++) B[i] = mul(B[i], sub(2, mul(B[i], C[i])));
	NTT(B, -1);
	for(int i = len + 1; i < lim; i++) B[i] = 0;
}

int D[N], Bi[N];
void Sqrt(int *A, int *B, int len){
	if(!len){B[0] = Cipolla(A[0]); return;}
	Sqrt(A, B, len >> 1);
	for(int i = 0; i < lim; i++) Bi[i] = 0;
	Inv(B, Bi, len);
	init(len << 1);
	for(int i = 0; i < lim; i++) D[i] = 0;
	for(int i = 0; i <= len; i++) D[i] = A[i];
	NTT(D, 1);
	NTT(B, 1);
	NTT(Bi, 1);
	for(int i = 0; i < lim; i++) B[i] = mul(add(B[i], mul(D[i], Bi[i])), inv2);
	NTT(B, -1);
	for(int i = len + 1; i < lim; i++) B[i] = 0;
}

inline int read(){
	char ch = getchar(); int x = 0, f = 1;
	while(!isdigit(ch)){if(ch == '-') f = -1; ch = getchar();}
	while(isdigit(ch)){x = (x << 3) + (x << 1) + (ch ^ 48); ch = getchar();}
	return x * f;
}

int main(){
//	freopen(".in", "r", stdin);
//	freopen(".out", "w", stdout);
	n = read() - 1;
	for(int i = 0; i <= n; i++) F[i] = read();
	Sqrt(F, G, n);
	for(int i = 0; i <= n; i++) printf("%d ", G[i]);
	return 0;
}

然而貌似还有用 \(BSGS\) 的常数更小的做法,截止现在并不会

多项式除法

求多项式 \(B(x)\) 满足 \(A(x) \equiv P(x) \times B(x) + R(x) \bmod x^{n}\)

什么你说直接乘上另外一个柿子的逆?请考虑考虑除不尽的余项……

\((Updating...)\)

求"\(ln\)"型

求多项式 \(B(x)\) 满足 \(B(x)\equiv ln(A(x)) \bmod x^{n}\)

\(f(x)=ln(x)\),则原式可以化为 \(B(x) \equiv f(A(x)) \bmod x^{n}\)

考虑到 \(ln'(x)= \frac{1}{x}\),对两边求导得:$$B'(x) \equiv \frac{A'(x)}{A(x)}$$
于是用多项式求逆解决。最后得到 \(B(x)\) 再做一次不定积分即可(求导与积分使用生成函数的原理)//然而注意到这个算法要求 \(A_{0} = 1\),巨坑……

int df[N], finv[N];
void Ln(int *A, int *B, int len){
	for(int i = 0; i <= (len << 2); i++) df[i] = finv[i] = 0;
	for(int i = 0; i < len; i++) df[i] = mul(i + 1, A[i + 1]);
	Inv(A, finv, len);
	Mul(df, len, finv, len);
	for(int i = 0; i < len - 1; i++) B[i + 1] = mul(df[i], inv[i + 1]);
	B[0] = 0;
	for(int i = len; i < lim; i++) B[i] = 0;
}

求"\(exp\)"型

求多项式 \(B(x)\) 满足 \(B(x)\equiv e^{A(x)} \bmod x^{n}\)

两边取 \(\ln\)\(\ln(B(x)) \equiv A(x) \bmod x^{n}\),设泛函数 \(G(u) = A(x) - \ln{u}\),代入牛顿迭代得出的柿子有:

\[B_{t + 1}(x) = B_{t}(x) \times (1 - B_{t}(x) + A(x)) \]

仍然递归求解

int D[N], E[N];
void Exp(int len, int *A, int *B){
	for(int i = 0; i <= (len << 2); i++) B[i] = 0; 
	if(len == 1){B[0] = 1; return;}
	Exp((len + 1) >> 1, A, B);
	for(int i = 0; i <= (len << 1); i++) D[i] = 0;
	Ln(B, D, len);
	init((len << 1) - 1);
	for(int i = 0; i < len; i++) E[i] = A[i];
	for(int i = len; i < lim; i++) E[i] = 0;
	NTT(E, 1);
	NTT(B, 1);
	NTT(D, 1);
	for(int i = 0; i < lim; i++) B[i] = mul(B[i], sub(add(1, E[i]), D[i]));
	NTT(B, -1);
	for(int i = len; i < lim; i++) B[i] = 0;
}

多项式快速幂

求多项式 \(B(x)\) 满足 \(B(x)\equiv A(x)^k \bmod x^{n}\)

对两边取对数(\(ln\)),得 \(B(x)\equiv e^{k ln(A(x))}\),直接计算……

吗?

\(A_{0} = 1\) 的情况下确实是的,然后你发现在【模板】多项式幂函数(加强版)里并没有约定 \(A_{0} =1\),甚至还有一堆为 \(0\) 的系数,而这对于多项式求逆是一个过于友好的条件,

然后就寄了

所以首先需要找到第一个不为 \(0\) 的系数项,将多项式整体平移,处理完后再平移回去,这是不影响答案的。考虑将每一项系数除以第一项来强制使得新的 \(A_{0}=1\),然后在做快速幂之前特判一下 \(0\) 的问题。另外由于指数 \(k\) 不能直接对 \(998244353\) 取模,需要用一下欧拉公式

代码:

点击查看代码
#include <bits/stdc++.h>
#define N 400005
#define p 998244353
using namespace std;
int n, cnt, lim, liv;
int rev[N], inv[N];

constexpr int g = 3, gi = 332748118;
inline int add(int x, int y){return (x + y >= p)? x - p + y : x + y;}
inline int sub(int x, int y){return (x < y)? x + p - y : x - y;}
inline int mul(int x, int y){return 1ll * x * y % p;}
inline int qpow(int x, int k){
	int res = 1;
	for(; k; k >>= 1, x = mul(x, x)) if(k & 1) res = mul(res, x);
	return res;
}

inline void init(int len){
	cnt = 32 - (__builtin_clz(len));
	lim = 1 << cnt;
	liv = qpow(lim, p - 2);
	for(int i = 0; i < lim; i++) rev[i] = (rev[i >> 1] >> 1) | ((i & 1) << (cnt - 1));
}

inline void NTT(int *A, int opt){
	for(int i = 0; i < lim; i++){
		if(i < rev[i]) swap(A[i], A[rev[i]]);
	}
	for(int len = 1; len < lim; len <<= 1){
		int Wn = qpow((opt == 1)? g : gi, (p - 1) / (len << 1));
		for(int i = 0; i < lim; i += (len << 1)){
			int w = 1;
			for(int k = 0; k < len; k++, w = mul(w, Wn)){
				int x = A[i + k], y = mul(w, A[i + len + k]);
				A[i + k] = add(x, y);
				A[i + len + k] = sub(x, y);
			}
		}
	}
	if(opt == -1) for(int i = 0; i < lim; i++) A[i] = mul(A[i], liv);
}

void Mul(int *A, int len1, int *B, int len2){
	init(len1 + len2);
	NTT(A, 1);
	NTT(B, 1);
	for(int i = 0; i < lim; i++) A[i] = mul(A[i], B[i]);
	NTT(A, -1);
}

int C[N];
void Inv(int *A, int *B, int len){
	for(int i = 0; i <= (len << 2); i++) B[i] = 0;
	if(len == 1){B[0] = qpow(A[0], p - 2); return;}
	Inv(A, B, (len + 1) >> 1);
	init((len << 1) - 1);
	for(int i = 0; i < lim; i++) C[i] = 0;
	for(int i = 0; i < len; i++) C[i] = A[i];
	NTT(C, 1);
	NTT(B, 1);
	for(int i = 0; i < lim; i++) B[i] = mul(B[i], sub(2, mul(B[i], C[i])));
	NTT(B, -1);
	for(int i = len; i < lim; i++) B[i] = 0;
}

int df[N], finv[N];
void Ln(int *A, int *B, int len){
	for(int i = 0; i <= (len << 2); i++) df[i] = finv[i] = 0;
	for(int i = 0; i < len; i++) df[i] = mul(i + 1, A[i + 1]);
	Inv(A, finv, len);
	Mul(df, len, finv, len);
	for(int i = 0; i < len - 1; i++) B[i + 1] = mul(df[i], inv[i + 1]);
	B[0] = 0;
	for(int i = len; i < lim; i++) B[i] = 0;
}

int D[N], E[N];
void Exp(int len, int *A, int *B){
	for(int i = 0; i <= (len << 2); i++) B[i] = 0; 
	if(len == 1){B[0] = 1; return;}
	Exp((len + 1) >> 1, A, B);
	for(int i = 0; i <= (len << 1); i++) D[i] = 0;
	Ln(B, D, len);
	init((len << 1) - 1);
	for(int i = 0; i < len; i++) E[i] = A[i];
	for(int i = len; i < lim; i++) E[i] = 0;
	NTT(E, 1);
	NTT(B, 1);
	NTT(D, 1);
	for(int i = 0; i < lim; i++) B[i] = mul(B[i], sub(add(1, E[i]), D[i]));
	NTT(B, -1);
	for(int i = len; i < lim; i++) B[i] = 0;
}

int low, num;
int F[N];
void Qpow(int *A, int *B, int len, int k, int kn){
	while(!A[low]) ++low;
	if(1ll * low * k >= len){memset(B, 0, len << 2); return;}
	len -= 1ll * low * k;
	num = qpow(A[low], kn);
	liv = qpow(A[low], p - 2);
	for(int i = 0; i < len; i++) A[i] = mul(A[i + low], liv);
	Ln(A, F, len);
	for(int i = 0; i < len; i++) F[i] = mul(k, F[i]);
	Exp(len, F, B);
	len += 1ll * low * k;
	low = 1ll * low * k;
	for(int i = len - 1; i >= low; i--) B[i] = mul(B[i - low], num);
	for(int i = 0; i < low; i++) B[i] = 0;
}

inline int read(){
	char ch = getchar(); int x = 0, f = 1;
	while(!isdigit(ch)){if(ch == '-') f = -1; ch = getchar();}
	while(isdigit(ch)){x = (x << 3) + (x << 1) + (ch ^ 48); ch = getchar();}
	return x * f;
}

int k1, k2, k3, siz;
int G[N], H[N];
char s[N];
int main(){
//	freopen(".in", "r", stdin);
//	freopen(".out", "w", stdout);
	n = read();
	scanf(" %s", s);
	for(int i = 0; s[i]; i++){
		++siz;
		k1 = add(mul(k1, 10), s[i] - '0');
		k2 = (10ll * k2 + s[i] - '0') % (p - 1);
	}
	for(int i = 0; i < min(6, siz); i++) k3 = 10 * k3 + s[i] - '0';
	inv[0] = inv[1] = 1;
	for(int i = 2; i <= n; i++) inv[i] = mul(p - (p / i), inv[p % i]);
	for(int i = 0; i < n; i++) G[i] = read();
	if(!G[0] && k3 >= n){for(int i = 0; i < n; i++) printf("0 "); return 0;}
	Qpow(G, H, n, k1, k2);
	for(int i = 0; i < n; i++) printf("%d ", H[i]);
	return 0;
}

细节真的巨多……

应用题都还没怎么练,据说一般都只是加速计算过程(包括在字符串的 \(FFT\) 匹配加速字符串特征值的计算),同时省选也不常考

快速沃尔什变换(\(FWT\)

对于耳熟能详的卷积柿子:

\[(F * G) = \sum_{j\otimes k = i}^{n}F(j)G(k) \]

若运算 \(\otimes\) 满足交换律与结合律,则可以尝试利用 \(FWT\) 对该卷积进行优化。记 \(FWT[A]\) 表示原序列进行快速沃尔什变换后得到的结果,其核心思想是通过 \(O(n\log{n})\) 的互相转换 \(A\)\(FWT[A]\) 并在 \(O(n)\) 的时间内根据 \(FWT[A] \times FWT[B] = FWT[C]\) 计算出 \(FWT[C]\) 来加速卷积过程

而在 \(OI\) 中,\(FWT\) 用于解决对下标进行位运算卷积的问题,如按位与,按位或,按位异或

核心公式:

\[c_{i} = \sum_{i = j\otimes k}a_{j}b_{k} \]

或运算

\((Updating \dots)\)

与运算

\((Updating \dots)\)

异或运算

\((Updating \dots)\)

posted @ 2024-06-02 21:12  HRcohc  阅读(14)  评论(0编辑  收藏  举报