CF1093
题解:
D:
比较显然这个图得是二分图才行
然后每个二分图上的方案是$(2^a+2^b) (a,b是两种颜色的个数)$
E:
我tm就不该先写bitset的
正解和bitset都很好想
因为是个排列,所以所有元素都不同,会有很多性质
bitset就是我们对序列维护一个前缀和表示前i位有哪些数
发现你开不下空间
于是分块一下
复杂度 $n\sqrt{n}+\frac{nm}{32}$
写了没多久卡常数卡了3个小时,然后还没过。。(题解说是不想让它过得。。)
大概是
把bitset换成了手写 循环展开
询问用4个矩形减一减改成两个减一减再做&运算(这个在我本机快了很多 可提交上去影响并不大 不知道为什么)
另外真没发现指针比数组快。。我感觉一般都比数组慢
不过那个4次访问数组把它先用个变量存下来能快一点
cf的机子开不开O2啊。。我这开O2本机跑全是询问的也才7s啊。。
#pragma GCC optimize("Ofast") #include <bits/stdc++.h> using namespace std; #define rint register int #define IL inline #define rep(i,h,t) for(int i=h;i<=t;i++) #define dep(i,t,h) for(int i=t;i>=h;i--) #define ll long long #define me(x) memset(x,0,sizeof(x)) #define mep(x,y) memcpy(x,y,sizeof(y)) #define mid ((h+t)>>1) #define ull unsigned long long namespace IO{ char ss[1<<24],*A=ss,*B=ss; IL char gc() { return A==B&&(B=(A=ss)+fread(ss,1,1<<24,stdin),A==B)?EOF:*A++; } template<class T> void read(T &x) { rint f=1,c; while (c=gc(),c<48||c>57) if (c=='-') f=-1; x=(c^48); while (c=gc(),c>47&&c<58) x=(x<<3)+(x<<1)+(c^48); x*=f; } char sr[1<<24],z[20]; ll Z,C1=-1; template<class T>void wer(T x) { if (x<0) sr[++C1]='-',x=-x; while (z[++Z]=x%10+48,x/=10); while (sr[++C1]=z[Z],--Z); } IL void wer1() { sr[++C1]=' '; } IL void wer2() { sr[++C1]='\n'; } template<class T>IL void maxa(T &x,T y) {if (x<y) x=y;} template<class T>IL void mina(T &x,T y) {if (x>y) x=y;} template<class T>IL T MAX(T x,T y){return x>y?x:y;} template<class T>IL T MIN(T x,T y){return x<y?x:y;} }; using namespace IO; const int N=2.1e5+10; const int M=500; const int l=N/64+2; int ans[70000]; struct Bitset{ ull a[l+1]; IL void operator = (const Bitset o) { register int i; for (i=1;i+8<=l;i+=8) { a[i]=o.a[i]; a[i+1]=o.a[i+1]; a[i+2]=o.a[i+2]; a[i+3]=o.a[i+3]; a[i+4]=o.a[i+4]; a[i+5]=o.a[i+5]; a[i+6]=o.a[i+6]; a[i+7]=o.a[i+7]; } for (;i<=l;i++) a[i]=o.a[i]; } IL Bitset operator & (Bitset &o) { Bitset c; register int i; for (i=1;i+8<=l;i+=8) { c.a[i]=o.a[i]&a[i]; c.a[i+1]=o.a[i+1]&a[i+1]; c.a[i+2]=o.a[i+2]&a[i+2]; c.a[i+3]=o.a[i+3]&a[i+3]; c.a[i+4]=o.a[i+4]&a[i+4]; c.a[i+5]=o.a[i+5]&a[i+5]; c.a[i+6]=o.a[i+6]&a[i+6]; c.a[i+7]=o.a[i+7]&a[i+7]; } for (;i<=l;i++) c.a[i]=o.a[i]&a[i]; return c; } IL Bitset operator ^ (Bitset &o) { Bitset c; register int i; for (i=1;i+8<=l;i+=8) { c.a[i]=o.a[i]^a[i]; c.a[i+1]=o.a[i+1]^a[i+1]; c.a[i+2]=o.a[i+2]^a[i+2]; c.a[i+3]=o.a[i+3]^a[i+3]; c.a[i+4]=o.a[i+4]^a[i+4]; c.a[i+5]=o.a[i+5]^a[i+5]; c.a[i+6]=o.a[i+6]^a[i+6]; c.a[i+7]=o.a[i+7]^a[i+7]; } for (;i<=l;i++) c.a[i]=o.a[i]&a[i]; return c; } IL int count() { int ansl=0; register int i; for (i=1;i+8<=l;i+=8) { ull x0=a[i],x1=a[i+1],x2=a[i+2],x3=a[i+3],x4=a[i+4],x5=a[i+5],x6=a[i+6],x7=a[i+7]; ansl+=ans[(x0&65535)]+ans[(x0>>16)&65535] +ans[(x0>>32)&65535]+ans[(x0>>48)&65535]; ansl+=ans[(x1&65535)]+ans[(x1>>16)&65535] +ans[(x1>>32)&65535]+ans[(x1>>48)&65535]; ansl+=ans[(x2&65535)]+ans[(x2>>16)&65535] +ans[(x2>>32)&65535]+ans[(x2>>48)&65535]; ansl+=ans[(x3&65535)]+ans[(x3>>16)&65535] +ans[(x3>>32)&65535]+ans[(x3>>48)&65535]; ansl+=ans[(x4&65535)]+ans[(x4>>16)&65535] +ans[(x4>>32)&65535]+ans[(x4>>48)&65535]; ansl+=ans[(x5&65535)]+ans[(x5>>16)&65535] +ans[(x5>>32)&65535]+ans[(x5>>48)&65535]; ansl+=ans[(x6&65535)]+ans[(x6>>16)&65535] +ans[(x6>>32)&65535]+ans[(x6>>48)&65535]; ansl+=ans[(x7&65535)]+ans[(x7>>16)&65535] +ans[(x7>>32)&65535]+ans[(x7>>48)&65535]; } for (;i<=l;i++) ansl+=ans[(a[i]&65535)]+ans[(a[i]>>16)&65535] +ans[(a[i]>>32)&65535]+ans[(a[i]>>48)&65535]; return ansl; } IL void set(int x,int y) { int pos=(x-1)/64+1; int k=x-pos*64; if (y==1) a[pos]|=1ll<<(k-1); else a[pos]|=1ll<<(k-1),a[pos]^=1ll<<(k-1); } }; int a[N],b[N],pos[N],n,m,block,num; Bitset a1[M],b1[M]; void reset(int x) { a1[x]=a1[x-1]; b1[x]=b1[x-1]; int h1=(x-1)*block+1,t1=MIN(n,x*block); rep(i,h1,t1) a1[x].set(a[i],1),b1[x].set(b[i],1); } IL int query(int x,int y) { if (!x||!y) return(0); int x1=pos[x],y1=pos[y]; Bitset nowa=a1[x1-1],nowb=b1[y1-1]; rep(i,(x1-1)*block+1,x) nowa.set(a[i],1); rep(i,(y1-1)*block+1,y) nowb.set(b[i],1); Bitset nowc=nowa&nowb; return (nowa&nowb).count(); } Bitset kong; IL Bitset get_query1(int x) { if (!x) return(kong); int x1=pos[x]; Bitset nowa=a1[x1-1]; rep(i,(x1-1)*block+1,x) nowa.set(a[i],1); return nowa; } IL Bitset get_query2(int x) { if (!x) return(kong); int x1=pos[x]; Bitset nowa=b1[x1-1]; rep(i,(x1-1)*block+1,x) nowa.set(b[i],1); return nowa; } IL int query(int l1,int r1,int l2,int r2) { Bitset k1=get_query1(l1-1),k2=get_query1(r1); k2=k2^k1; Bitset k3=get_query2(l2-1),k4=get_query2(r2); k4=k4^k3; return (k2&k4).count(); } IL int change(int x,int y) { int x1=pos[x],y1=pos[y],v1=b[x],v2=b[y]; swap(b[x],b[y]); rep(i,x1,y1-1) b1[i].set(v2,1),b1[i].set(v1,0); } #define lowbit(x) (x&(-x)) int main() { freopen("1.in","r",stdin); freopen("1.out","w",stdout); read(n); read(m); rep(i,1,65536) ans[i]=ans[i-lowbit(i)]+1; rep(i,1,n) read(a[i]); rep(i,1,n) read(b[i]); block=sqrt(n); num=(n-1)/block+1; rep(i,1,num) reset(i); rep(i,1,n) pos[i]=(i-1)/block+1; rep(i,1,m) { int kk,l1,r1,l2,r2,x,y; read(kk); if (kk==1) { read(l1); read(r1); read(l2); read(r2); // wer(query(r1,r2)-query(l1-1,r2)-query(r1,l2-1)+query(l1-1,l2-1)); wer(query(l1,r1,l2,r2)); wer2(); } else { read(x); read(y); if (x>y) swap(x,y); change(x,y); } } fwrite(sr,1,C1+1,stdout); return 0; }
正解也很简单
把每个点对应到第二个序列
变成二维查询点数,单点修改
线段树套线段树/平衡树就可以了
然而这个东西空间很傻比
线段树套线段树不用说肯定gg了
空间比较小的线段树套平衡树
本身空间$nlogn$
然后每个里面要开$ls,rs,num,v$
然后一共插入个数是$n+4*m$的
所以计算一下就是$6e6*logn$的 可能再把ls,rs压压是可以的。。
比较简单又最快的方法是用cdq分治再套个数据结构
#include <bits/stdc++.h> using namespace std; #define rint register int #define IL inline #define rep(i,h,t) for (int i=h;i<=t;i++) #define dep(i,t,h) for (int i=t;i>=h;i--) #define me(x) memset(x,0,sizeof(x)) #define ll long long #define mep(x) memcpy(x,y,sizeof(y)) #define mid ((h+t)>>1) namespace IO{ char ss[1<<24],*A=ss,*B=ss; IL char gc() { return A==B&&(B=(A=ss)+fread(ss,1,1<<24,stdin),A==B)?EOF:*A++; } template<class T>void read(T &x) { rint f=1,c; while (c=gc(),c<48||c>57) if (c=='-') f=-1; x=(c^48); while (c=gc(),c>47&&c<58) x=(x<<3)+(x<<1)+(c^48); x*=f; } char sr[1<<24],z[20]; int Z,C=-1; template<class T>void wer(T x) { if (x<0) sr[++C]='-',x=-x; while (z[++Z]=x%10+48,x/=10); while (sr[++C]=z[Z],--Z); } IL void wer1() {sr[++C]=' ';} IL void wer2() {sr[++C]='\n';} template<class T>IL void maxa(T &x,T y) { if (x<y) x=y;} template<class T>IL void mina(T &x,T y) { if (x>y) x=y;} template<class T>IL T MAX(T x,T y) {return x>y?x:y;} template<class T>IL T MIN(T x,T y) {return x<y?x:y;} }; const int N=3e5; const int M=N*8; int a[N],b[N],c[N],d[N],e[N],ans[N],n,m; struct re{ int a,b,c,d; }p[M],p1[M],p2[M]; #define lowbit(x) (x&(-x)) struct BIT{ int sum[N]; int query(int x) { int ans=0; for (int y=x;y>0;y-=lowbit(y)) ans+=sum[y]; return ans; } void change(int x,int k) { for (;x<=n;x+=lowbit(x)) sum[x]+=k; } }B; bool cmp(re x,re y){ return x.a<y.a; } void cdq_fz(int h,int t) { if (h==t) return; int cnt1=0,cnt2=0; rep(i,h,mid) if (p[i].c<=1) p1[++cnt1]=p[i]; rep(i,mid+1,t) if (p[i].c>=2) { p2[++cnt2]=p[i]; } sort(p1+1,p1+cnt1+1,cmp); sort(p2+1,p2+cnt2+1,cmp); int t1=1; rep(i,1,cnt2) { while (t1<=cnt1&&p1[t1].a<=p2[i].a) B.change(p1[t1].b,p1[t1].c),t1++; ans[p2[i].d]+=(p2[i].c-3)*B.query(p2[i].b); } dep(i,t1-1,1) B.change(p1[i].b,-p1[i].c); cdq_fz(h,mid); cdq_fz(mid+1,t); } bool t[N]; int main() { freopen("1.in","r",stdin); freopen("1.out","w",stdout); IO::read(n); IO::read(m); rep(i,1,n) IO::read(a[i]),e[a[i]]=i; rep(i,1,n) IO::read(b[i]),c[b[i]]=i; rep(i,1,n) d[i]=c[a[i]]; int num=0; rep(i,1,n) p[++num]=(re){i,d[i],1,i}; rep(i,1,m) { int kk,x1,y1,x2,y2,x,y; IO::read(kk); if (kk==1) { t[i]=1; IO::read(x1); IO::read(x2); IO::read(y1); IO::read(y2); p[++num]=(re){x2,y2,4,i}; p[++num]=(re){x2,y1-1,2,i}; p[++num]=(re){x1-1,y2,2,i}; p[++num]=(re){x1-1,y1-1,4,i}; } else { IO::read(x); IO::read(y); p[++num]=(re){e[b[x]],x,-1,i}; p[++num]=(re){e[b[x]],y,1,i}; p[++num]=(re){e[b[y]],y,-1,i}; p[++num]=(re){e[b[y]],x,1,i}; swap(b[x],b[y]); } } cdq_fz(1,num); rep(i,1,m) if (t[i]) IO::wer(ans[i]),IO::wer2(); fwrite(IO::sr,1,IO::C+1,stdout); return 0; }
F:
G:
第一眼感觉很像kd-tree呀(n那么大跑啥kd-tree啊)
正解很好想
因为k只有5,所以我们对符号讨论一下维护最大最小值就好了
$nlogn*32$