Luogu4191
前言
性能优化……经典题。
这题可以看到它给你暴力算了循环卷积,于是想到 FFT 单位根处点值乘法对应循环卷积的本质。
可是 FFT 长度是 \(2^n\) 的,会挂掉。
于是我们可以考虑 Bluestein 算法。
Bluestein 算法
前置知识:任意模数 Chirp Z-Transform,确保你能过板以免被卡常。
以下设 \(\omega_n\) 为 \(n\) 次本原单位根。
设一个长度为 \(n\) 的数列 \(\{f_n\}\) 的循环卷积点值数列 \(\{g_n\}\) 为
\[g_t=\sum_{k=0}^{n-1}f_k\omega_n^{kt}
\]
由单位根反演,有
\[f_t=\frac1n\sum_{k=0}^{n-1}g_k\omega_n^{-kt}
\]
以上两项均可用 CZT 加速。
回到本题
由于单位根点值点乘本质即数列循环卷积,这题变得可做。
把 \(a,b\) 点值搞一下,点值按要求乘起来,回演系数,输出,做完了?!
于是你写,交上去,发现被卡常了……
卡卡常就过了。
Code
\(c\) 能对 \(n\) 取模是因为在点值计算时能使用费马小定理。
// Problem: P4191 [CTSC2010]性能优化
// Contest: Luogu
// URL: https://www.luogu.com.cn/problem/P4191
// Memory Limit: 250 MB
// Time Limit: 6000 ms
#include <algorithm>
#include <math.h>
#include <stdio.h>
#include <vector>
typedef long long llt;
typedef unsigned uint;typedef unsigned long long ullt;
typedef bool bol;typedef char chr;typedef void voi;
typedef double dbl;
template<typename T>bol _max(T&a,T b){return(a<b)?a=b,true:false;}
template<typename T>bol _min(T&a,T b){return(b<a)?a=b,true:false;}
template<typename T>T power(T base,T index,T mod){return((index<=1)?(index?base:1):(power(base*base%mod,index>>1,mod)*power(base,index&1,mod)))%mod;}
template<typename T>T lowbit(T n){return n&-n;}
template<typename T>T gcd(T a,T b){return b?gcd(b,a%b):a;}
template<typename T>T lcm(T a,T b){return(a!=0||b!=0)?a/gcd(a,b)*b:(T)0;}
template<typename T>T exgcd(T a,T b,T&x,T&y){if(!b)return y=0,x=1,a;T ans=exgcd(b,a%b,y,x);y-=a/b*x;return ans;}
const dbl Pi=acos(-1);
class cpx
{
public:
dbl a,b;
cpx():a(0),b(0){}
cpx(dbl a):a(a),b(0){}
cpx(dbl a,dbl b):a(a),b(b){}
voi unit(dbl alpha){a=cos(alpha),b=sin(alpha);}
cpx friend operator+(cpx a,cpx b){return cpx(a.a+b.a,a.b+b.b);}
cpx friend operator-(cpx a,cpx b){return cpx(a.a-b.a,a.b-b.b);}
cpx operator-(){return cpx(-a,-b);}
cpx friend operator*(cpx a,cpx b){return cpx(a.a*b.a-a.b*b.b,a.b*b.a+b.b*a.a);}
cpx friend operator/(cpx a,ullt v){return cpx(a.a/v,a.b/v);}
cpx conj(){return cpx(a,-b);}
cpx mul_i(){return cpx(-b,a);}
cpx div_i(){return cpx(b,-a);}
public:
cpx&operator=(ullt v){return a=v,b=0,*this;}
cpx&operator+=(cpx v){return*this=*this+v;}
cpx&operator-=(cpx v){return*this=*this-v;}
cpx&operator*=(cpx v){return*this=*this*v;}
cpx&operator/=(ullt v){return a/=v,b/=v,*this;}
dbl&real(){return a;}
dbl&imag(){return b;}
};
ullt Mod;
ullt chg(ullt v){return(v<Mod)?v:v-Mod;}
class poly
{
private:
std::vector<ullt>V;
public:
class FFT
{
private:
std::vector<uint>V;std::vector<cpx>G;uint len;
public:
uint length(){return len;}
voi bzr(uint length)
{
uint p=0;len=1,V.clear(),G.clear();
while(length){p++,len<<=1,length>>=1;}
V.resize(len),G.resize(len);
for(uint i=0;i<len;++i)V[i]=((i&1)?(V[i>>1]|len)>>1:(V[i>>1]>>1)),G[i].unit(Pi*2/len*i);
}
voi fft(std::vector<cpx>&y,bol op)
{
if(y.size()<len)y.resize(len);
for(uint i=0;i<len;i++)if(V[i]<i)std::swap(y[i],y[V[i]]);
for(uint h=2;h<=len;h<<=1)for(uint j=0;j<len;j+=h)for(uint k=j;k<j+(h>>1);k++){cpx u=y[k],t=G[len/h*(k-j)]*y[k+h/2];y[k]=u+t,y[k+h/2]=u-t;}
if(op){uint l=1,r=len-1;while(l<r)std::swap(y[l++],y[r--]);for(uint i=0;i<len;i++)y[i]/=len;}
}
voi fft_fft(std::vector<cpx>&a,std::vector<cpx>&b,bol op)
{
if(a.size()<len)a.resize(len);
if(b.size()<len)b.resize(len);
for(uint i=0;i<len;i++)a[i]+=b[i].mul_i();
fft(a,op),b[0]=a[0].conj();for(uint i=1;i<len;i++)b[i]=a[len-i].conj();
for(uint i=0;i<len;i++){cpx p=a[i],q=b[i];a[i]=(p+q)/2llu,b[i]=(p-q).div_i()/2llu;}
}
};
public:
poly(){V.clear();}
poly(std::vector<ullt>V){for(uint i=0;i<V.size();i++)push(V[i]%Mod);cut_zero();}
bol empty(){return cut_zero(),!size();}
voi bzr(){V.clear();}
voi push(ullt v){V.push_back(v%Mod);}
voi pop(){V.pop_back();}
ullt val(uint n){return(n<V.size())?V[n]:0;}
uint deg(){return V.size()-1;}
uint size(){return V.size();}
voi add(uint p,ullt v)
{
if(deg()<p)chg_deg(p);
V[p]=(V[p]+v)%Mod;
}
poly friend operator+(poly a,ullt v){a.add(0,v);return a;}
poly friend operator+(poly a,poly b)
{
uint len=std::max(a.size(),b.size());
a.chg_siz(len),b.chg_siz(len);
for(uint i=0;i<len;i++)a[i]=chg(a[i]+b[i]);
a.cut_zero();
return a;
}
poly friend operator-(poly a,poly b)
{
uint len=std::max(a.size(),b.size());
a.chg_siz(len),b.chg_siz(len);
for(uint i=0;i<len;i++)a[i]=chg(a[i]+Mod-b[i]);
a.cut_zero();
return a;
}
poly operator-()
{
cut_zero();uint len=size();
poly ans;ans.chg_siz(len);
for(uint i=0;i<len;i++)ans[i]=chg(Mod-V[i]);
return ans;
}
poly friend operator*(poly a,poly b)
{
FFT s;poly p;
uint n=a.deg(),m=b.deg(),len;
s.bzr(n+m+1),len=s.length();
std::vector<cpx>v1(len),v2(len),v3(len),v4(len);
for(uint i=0;i<len;i++)v3[i]=cpx(a.val(i)&32767),v1[i]=cpx(a.val(i)>>15),v4[i]=cpx(b.val(i)&32767),v2[i]=cpx(b.val(i)>>15);
s.fft_fft(v1,v2,0),s.fft_fft(v3,v4,0);
for(uint i=0;i<len;i++)v4[i]=(v3[i]+v1[i].mul_i())*v4[i],v2[i]=(v3[i]+v1[i].mul_i())*v2[i];
s.fft(v2,1),s.fft(v4,1),p.chg_deg(n+m);for(uint i=0;i<=n+m;i++)p[i]=(((ullt)(v2[i].b+.5)%Mod<<30)+((ullt)(v2[i].a+v4[i].b+.5)%Mod<<15)+(ullt)(v4[i].a+.5))%Mod;
p.cut_zero();
return p;
}
poly inv(){return inv(size());}
poly inv(uint prec)
{
poly ans,f,tmp,w;
llt x,y;
exgcd<llt>(val(0),Mod,x,y);
ans.push(x%(llt)Mod+(llt)Mod),f.push(val(0));
for(uint k=1;k<prec;k<<=1)
{
for(uint i=k;i<(k<<1);++i)f.push(val(i));
tmp=f*ans,tmp.chg_siz(k<<1),w.bzr();for(uint i=0;i<k;++i)w.push(tmp[i+k]);
w*=ans;for(uint i=0;i<k;++i)ans.push(Mod-w[i]);
}
return ans;
}
poly diff(){uint n=size();poly ans;for(uint i=1;i<n;++i)ans.push(V[i]*i);return ans;}
poly inte()
{
uint n=size();
poly ans;
ans.chg_deg(n);
ullt k=1;llt x,y;
std::vector<ullt>W;W.push_back(1),W.push_back(1);
for(uint i=2;i<n;++i)W.push_back(k=(k*i)%Mod);
exgcd<llt>(k*n%Mod,Mod,x,y);
k=chg(x%(llt)Mod+(llt)Mod);
for(uint i=n;i;--i)ans[i]=V[i-1]*k%Mod*W[i-1]%Mod,k=k*i%Mod;
return ans;
}
poly ln(){return(this->diff()*this->inv()).inte().chg_deg_ed(deg());}
poly exp(){return exp(size());}
poly exp(uint prec)
{
poly m;m.push(1);
if(empty())return m;
uint tp=1;
while(tp<prec)m*=*this-(m.diff()*m.inv(tp<<=1)).inte()+1,m.chg_siz(tp);
m.chg_siz(prec);
return m;
}
poly reverse(){poly ans;for(uint i=deg();~i;--i)ans.push(V[i]);return ans;}
poly operator/(poly b)
{
cut_zero(),b.cut_zero();uint m=size(),n=b.deg();if(m<=n)return poly();
poly f=this->reverse()*b.reverse().inv(m-n);f.chg_siz((m>n)?m-n:0);return f.reverse();
}
poly operator%(poly b){poly f=*this-*this/b*b;f.cut_zero();return f;}
voi cut_zero(){while(!V.empty()&&!V.back())V.pop_back();}
voi chg_siz(const uint siz){while(V.size()<siz)V.push_back(0);while(V.size()>siz)V.pop_back();}
voi chg_deg(const uint d){chg_siz(d+1);}
poly chg_deg_ed(const uint d){poly ans=*this;return ans.chg_deg(d),ans;}
public:
ullt&operator[](uint num){return V[num];}
poly&operator=(std::vector<ullt>V){bzr();for(uint i=0;i<V.size();i++)push(V[i]%Mod);cut_zero();return*this;}
poly&operator=(std::vector<cpx>V){bzr();for(uint i=0;i<V.size();i++)push((llt)(V[i].a+.5)%(llt)Mod+(llt)(Mod));cut_zero();return*this;}
poly&operator+=(poly b){return*this=*this+b;}
poly&operator-=(poly b){return*this=*this-b;}
poly&operator*=(poly b){return*this=*this*b;}
poly&operator/=(poly b){return*this=*this/b;}
poly&operator%=(poly b){return*this=*this%b;}
};
ullt gotg()
{
static ullt Fac[15];uint cnt=0;
ullt v=Mod-1;
for(ullt i=2;i*i<=v;i++)
if(!(v%i))
{
Fac[cnt++]=i;
do v/=i;while(!(v%i));
}
if(v>1)Fac[cnt++]=v;
for(ullt ans=2;;ans++)if(power<ullt>(ans,Mod-1,Mod)==1)
{
bol b=true;
for(uint i=0;b&&i<cnt;i++)if(power<ullt>(ans,(Mod-1)/Fac[i],Mod)==1)b=false;
if(b)return ans;
}
return 0;
}
ullt A[500005],B[500005];
uint g_pow[1000005];
uint g_binom_pow[1000005];
uint inv_pow[1000005];
uint inv_binom_pow[1000005];
poly P1,P2;
int main()
{
uint n;ullt c;scanf("%u%llu",&n,&c),Mod=n+1,c%=n;
ullt g=gotg();ullt inv=power(g,Mod-2,Mod);
g_pow[0]=inv_pow[0]=g_binom_pow[0]=inv_binom_pow[0]=1;
for(uint i=1;i<=n*2;i++)
{
g_pow[i]=(ullt)g_pow[i-1]*g%Mod,inv_pow[i]=(ullt)inv_pow[i-1]*inv%Mod,
g_binom_pow[i]=(ullt)g_binom_pow[i-1]*g_pow[i-1]%Mod,
inv_binom_pow[i]=(ullt)inv_binom_pow[i-1]*inv_pow[i-1]%Mod;
}
for(uint i=0;i<n;i++)scanf("%llu",A+i),A[i]%=Mod;
for(uint i=0;i<n;i++)scanf("%llu",B+i),B[i]%=Mod;
P1.chg_siz(n<<1),P2.chg_siz(n);
for(uint i=0;i<(n<<1);i++)P1[i]=g_binom_pow[i];
for(uint i=0;i<n;i++)
P2[i]=A[i]*inv_binom_pow[i]%Mod;
P2=P2.reverse()*P1;
for(uint i=0;i<n;i++)
A[i]=P2.val(n+i-1)*inv_binom_pow[i]%Mod;
P2.chg_siz(n);
for(uint i=0;i<n;i++)
P2[i]=B[i]*inv_binom_pow[i]%Mod;
P2=P2.reverse()*P1;
for(uint i=0;i<n;i++)
A[i]=A[i]*power(P2.val(n+i-1)*inv_binom_pow[i]%Mod,c,Mod)%Mod;
for(uint i=0;i<(n<<1);i++)P1[i]=inv_binom_pow[i];
P2.chg_siz(n);
for(uint i=0;i<n;i++)P2[i]=A[i]*g_binom_pow[i]%Mod;
P2=P2.reverse()*P1;
for(uint i=0;i<n;i++)printf("%llu\n",P2.val(n+i-1)*(Mod-g_binom_pow[i])%Mod);
return 0;
}
本文来自博客园,作者:myee,转载请注明原文链接:https://www.cnblogs.com/myee/p/Luogu-solution-p4191.html