zsyzlzy

导航

 

题意:
sum=i=1nj=1ilowbit(j)sum=\sum_{i=1}^n \sum_{j=1}^i lowbit(j)

我们先考虑后半部分,j=1ilowbit(j)\sum_{j=1}^i lowbit(j).
设此时的i=ni=n,lowbit=2klowbit=2^k.
if( n>>k&1),sum+=((n>>(k+1))+1)2k=(n>>(k+1))2k+2kif (~n>>k\&1),sum+=( (n>>(k+1) )+1 )*2^k=(n>>(k+1))*2^k+2^k
else,sum+=(n>>(k+1))2kelse ,sum+=(n>>(k+1))*2^k
综上:sum+=(n>>(k+1))2k+nsum+=(n>>(k+1))*2^k+n
现在,需要加速求的是(i>>(k+1))\sum(i>>(k+1)).
这很明显长这样:
1……1,2……2,3……3,……
每一段的长度都是2(k+1)2^(k+1).
j=n/2k+1j=\lfloor{n/2^{k+1}}\rfloor
用等差数列求和公式可得其和为:
j(j1)2k+j(nj2k+1+1)j*(j-1)*2^k+j*(n-j*2^{k+1}+1).
代码:

#pragma GCC optimize("Ofast")
#include<cstdio>
#include<cctype>
#include<cstring>
#include<algorithm>
#define g (p1==p2&&(p2=(p1=buf)+fread(buf,1,size,stdin),p1==p2)?EOF:*p1++)
#define put(c) ( ( (p3==p4)?(fwrite(puf,1,size,stdout),p3=puf):0) ,*p3++=c)
using namespace std;
typedef long long ll;
const int size=1<<25;
char buf[size],*p1=buf,*p2=buf;
char puf[size],*p3=puf,*p4=puf+size;
void qr(ll &x) {
	char c=g;x=0;
	while(!isdigit(c))c=g;
	while(isdigit(c))x=x*10+c-'0',c=g;
}
void write(ll x) {
	if(x/10)write(x/10);
	put(x%10+'0');
}
ll n,p,sum;
ll mult(ll a,ll b) {
	ll c=0;a%=p;b%=p;
	if(b>a)swap(a,b);
	while(b) {
		if(b&1)c=(c+a)%p;
		a=(a<<1)%p;b=b>>1;
	}
	return c;
}
int main() {
	int T,tot;qr(n);T=n;
	while(T--) {
		qr(n);qr(p);
		if(n&1)sum=mult(n,(n+1)>>1);
		else   sum=mult(n+1,n>>1);
		for(int i=0;(1LL<<(i+1))<=n;i++) {
			ll j=n>>(i+1),tmp;
			tmp=mult(mult(j,j-1),1LL<<i)+mult(j,n-(j<<(i+1))+1);
			sum=(sum+mult(tmp,1LL<<i))%p;
		}
		write(sum);
		put('\n');
	}
	fwrite(puf,1,p3-puf,stdout);
	return 0;
}

posted on 2019-09-21 10:42  zsyzlzy  阅读(111)  评论(0编辑  收藏  举报