个人感觉 O(n*sqrt(log(n))) 求逆序对个数是 non-practical 的

只实现了值域为 \([0,2^\sqrt{w})\) 的子问题,这个算法跑得比树状数组还慢。

#include<bits/stdc++.h>
#define rp(i,a,b) for(int i=a;i<=b;++i)
using namespace std;
typedef unsigned int ui;
typedef unsigned long long ul;
ul seed=10245;
inline ul nxt(){
	seed^=seed<<17;
	seed^=seed>>5;
	seed^=seed<<13;
	seed^=seed>>8;
	seed^=seed<<25;
	return seed;
}
const int N=1e8;
int a[N+9],b[257];
void add(int x){
	for(int i=x;i<256;i+=i&-i)++b[i];
}
int sum(int x){
	int ans=0;
	for(int i=x;i;i^=i&-i)ans+=b[i];
	return ans;
}
int main(){
	rp(i,1,N){
		a[i]=nxt()&255;
	}
	int st=clock(); 
	ul ans=0;
	rp(i,1,N){
		ans+=sum(a[i]);
		add(a[i]+1);
	}
	printf("%llu\n",ans);
	printf("time used: %dms\n",clock()-st);
	return 0;
}
/*
time used: 968ms
*/
#include<bits/stdc++.h>
#define rp(i,a,b) for(int i=a;i<=b;++i)
using namespace std;
typedef unsigned int ui;
typedef unsigned long long ul;
ul seed=10245;
ul nxt(){
	seed^=seed<<17;
	seed^=seed>>5;
	seed^=seed<<13;
	seed^=seed>>8;
	seed^=seed<<25;
	return seed;
}
const int L=8,A=(1<<L)-1;
int F[A+2][2];
const ul E=1|1<<2|1<<4|1<<6; 
const ul K=
	1ull|1ull<<L|1ull<<2*L|1ull<<3*L|
	1ull<<4*L|1ull<<5*L|1ull<<6*L|1ull<<7*L;
ul S[A+2][5],T[A+2][4],Sp[A+2],Pre[L+1];
ul sh_0(ul x){
	ul y=x&(~x>>1)&E;
	y=y*K;
	return x^3*((y&x)^(y<<1&x)>>1);
}
ul sh_1(ul x){
	x=sh_0(x);
	ui y=x&255;
	return x&S[y][0]|(x&S[y][4])<<2|(x&S[y][2])>>2|(x&S[y][3])<<1|(x&S[y][1])>>1;
}
ul sh_2(ul x){
	x=sh_1(x);
	int y=x&255,z,w;
	z=T[y][0]&7;
	w=4-(T[y][0]>>3);
	return x&T[y][3]|(x&T[y][1])<<w|(x&T[y][2])>>z; 
}
void init(){
	int TR[3]={0,2,3};
	int TL[3]={0,1,3};
	rp(i,0,A){
		ui j=i;
		int x=(j&3)+1>>1;
		int y=(~j>>2&3)+1>>1;
		int z=(j>>4&3)+1>>1;
		int w=(~j>>6&3)+1>>1;
		if(y){
			S[i][y+2]|=TR[x]; 
		}
		if(w){
			S[i][w+2]|=16*TR[z];
		}
		if(x){
			S[i][x]|=4*TL[y];
		}
		if(z){
			S[i][z]|=64*TL[w];
		}
		S[i][0]=255^S[i][1]^S[i][2]^S[i][3]^S[i][4];
		rp(j,0,4){
			S[i][j]*=K;
		}
	}
	rp(i,0,A){
		T[i][0]=
			__builtin_popcount(i&15)+
			__builtin_popcount(i>>4)*8;
		T[i][1]=K*(i&15);
		T[i][2]=K*(~i&240);
		T[i][3]=~0ull^T[i][1]^T[i][2];
	}
	rp(i,0,A){
		rp(j,0,L-1)if(i>>j&1){
			++F[i][0];
			rp(k,0,j-1)if(~i>>k&1){
				++F[i][1];
			}
		}
		rp(j,0,7){
			Sp[i]|=ul(i>>(L-1-j)&1)<<(j*L);
		}
	}
	rp(i,0,L){
		Pre[i]=K*((1<<i)-1);
	}
}
const int N=1e8;
const int M=N/64*10+9;
unsigned char a[N+9];
ul b[M],c[M],ans;
void Add(ul*a,int&n,int&p,int c,ul msk){
	if(p+c<=L){
		a[n]|=msk<<p;
		p+=c;
	}else{
		a[n]|=(msk&K*((1<<L-p)-1))<<p;
		a[++n]=msk>>L-p&K*((1<<c+p-L)-1);
		p+=c-L;
	}
}
void solve(ul*a,ul*b,int n,int l){
	ul sum=0;
	ul*c=a+n;
	int bn=0,bp=L;
	int cn=0,cp=L;
	rp(i,1,n){
		int ones=F[a[i]&A][0];
		int zeros=L-ones;
		ans+=sum*ones+F[a[i]&A][1];
		sum+=zeros;
		ul x=sh_2(a[i])>>L;
		Add(b,bn,bp,zeros,x&Pre[zeros]);
		Add(c,cn,cp,ones,(x>>zeros)&Pre[ones]);
	}
	if(l>1){
		solve(b,c+cn,bn,l-1);
		solve(c,b+bn,cn,l-1);
	}
}
void solve(){
	int m=0,p=L;
	rp(i,1,N){
		if(p==L){
			++m,p=0;
		}
		b[m]|=Sp[a[i]]<<p++;
	} 
	solve(b,c,m,L);
	cout<<ans<<endl;
}
int main(){
	rp(i,1,N){
		a[i]=nxt()&A;
	}
	int st=clock(); 
	init();
	solve();
	printf("time used: %dms\n",clock()-st);
	return 0;
}
/*
time used: 1814ms
*/
posted @ 2023-04-06 13:53  alfalfa_w  阅读(134)  评论(0编辑  收藏  举报