洛谷 P6778 - [Ynoi2009] rpdq(分块+虚树)

又一道 shaber 卡常题。卡了一下午常,目前只能勉强卡过校内 OJ 上时限 6s 的版本,原题则 T 飞了/ll

考虑分块,块长为 \(B\)。记第 \(i\) 块块内左右端点为 \([L_i,R_i]\),那么我们先预处理出 \(sum_{i,j}\) 表示 \(\sum_{k=1}^{R_i}\text{dis}(i,j)\),这样可以进一步求出 \(s_{i,j}=\sum\limits_{i=L_i}^{R_j}\sum\limits_{j=L_i}^{i-1}\text{dis}(i,j)\)。这样运用 \(O(1)\) LCA 可以做到 \(O(B)\) 查询整块间与散块间的贡献。而散块内部则是经典的虚树问题,建出虚树可以做到 \(O(B\log B)\) 查询,发现瓶颈在于排序,直接预处理每个块内对 dfn 排序后的结果然后左右两块归并一下可以做到 \(O(B)\)

接下来是卡常的问题:

  1. 使用 DFS 序求 LCA instead of 欧拉序/树剖/倍增。详情可见某个人的博客。
  2. 预处理整块的时候不要 DFS,类似于 SAM 对 len 桶排以后 \(O(n)\) 处理的技巧,可以倒着枚举 DFS 序求 siz,这样常数会小不少。
  3. 在虚树计算散点的部分,发现我们只用知道每个点的 siz 就行了,而我们本身在求虚树的时候就是自底向上建的,因此甚至不用显式地把图建出来,直接在插入新点的时候合并信息就可以了。具体实现可以见代码。这是一个很有用的卡常技巧,去年联考搬过一个虚树卡常题当时就这么卡过去的,但当时忘了总结了/ll
  4. 原题卡空间,上面的做法直接做空间确实是根号的,但发现可以离线,所以离线求 \(sum\) 就不用 \(i\) 那一维了。
  5. 极限调块长。我测的大概块长 \(B=250\) 时最优。

但是还是不足以卡进 4s 时限,有没有懂哥教教/kel

#pragma GCC optimize(3)
#pragma GCC target("avx")
#pragma GCC optimize("Ofast")
#pragma GCC optimize("inline")
#pragma GCC optimize("-fgcse")
#pragma GCC optimize("-fgcse-lm")
#pragma GCC optimize("-fipa-sra")
#pragma GCC optimize("-ftree-pre")
#pragma GCC optimize("-ftree-vrp")
#pragma GCC optimize("-fpeephole2")
#pragma GCC optimize("-ffast-math")
#pragma GCC optimize("-fsched-spec")
#pragma GCC optimize("unroll-loops")
#pragma GCC optimize("-falign-jumps")
#pragma GCC optimize("-falign-loops")
#pragma GCC optimize("-falign-labels")
#pragma GCC optimize("-fdevirtualize")
#pragma GCC optimize("-fcaller-saves")
#pragma GCC optimize("-fcrossjumping")
#pragma GCC optimize("-fthread-jumps")
#pragma GCC optimize("-funroll-loops")
#pragma GCC optimize("-fwhole-program")
#pragma GCC optimize("-freorder-blocks")
#pragma GCC optimize("-fschedule-insns")
#pragma GCC optimize("inline-functions")
#pragma GCC optimize("-ftree-tail-merge")
#pragma GCC optimize("-fschedule-insns2")
#pragma GCC optimize("-fstrict-aliasing")
#pragma GCC optimize("-fstrict-overflow")
#pragma GCC optimize("-falign-functions")
#pragma GCC optimize("-fcse-skip-blocks")
#pragma GCC optimize("-fcse-follow-jumps")
#pragma GCC optimize("-fsched-interblock")
#pragma GCC optimize("-fpartial-inlining")
#pragma GCC optimize("no-stack-protector")
#pragma GCC optimize("-freorder-functions")
#pragma GCC optimize("-findirect-inlining")
#pragma GCC optimize("-fhoist-adjacent-loads")
#pragma GCC optimize("-frerun-cse-after-loop")
#pragma GCC optimize("inline-small-functions")
#pragma GCC optimize("-finline-small-functions")
#pragma GCC optimize("-ftree-switch-conversion")
#pragma GCC optimize("-foptimize-sibling-calls")
#pragma GCC optimize("-fexpensive-optimizations")
#pragma GCC optimize("-funsafe-loop-optimizations")
#pragma GCC optimize("inline-functions-called-once")
#pragma GCC optimize("-fdelete-null-pointer-checks")
#define FASTIO
#include<bits/stdc++.h>
//#include<ext/pb_ds/assoc_container.hpp>
//#include<ext/pb_ds/hash_policy.hpp>
//#include<ext/pb_ds/priority_queue.hpp>
using namespace std;
//using namespace __gnu_pbds;
#define fi first
#define se second
#define fill0(a) memset(a,0,sizeof(a))
#define fill1(a) memset(a,-1,sizeof(a))
#define fillbig(a) memset(a,63,sizeof(a))
#define pb push_back
#define ppb pop_back
#define mp make_pair
#define mt make_tuple
#define SZ(v) ((int)v.size())
#ifdef LOCAL
#define eprintf(...) fprintf(stderr,__VA_ARGS__)
#else
#define eprintf(...) 1064
#endif
template<typename T1,typename T2>void chkmin(T1 &x,T2 y){if(x>y)x=y;}
template<typename T1,typename T2>void chkmax(T1 &x,T2 y){if(x<y)x=y;}
typedef pair<int,int> pii;
typedef long long ll;
typedef unsigned int u32;
typedef unsigned long long u64;
typedef long double ld;
#ifdef FASTIO
#define FILE_SIZE 1<<23
char rbuf[FILE_SIZE],*p1=rbuf,*p2=rbuf,wbuf[FILE_SIZE],*p3=wbuf;
#ifdef LOCAL
inline char getc(){return getchar();}
inline void putc(char c){putchar(c);}
#else
inline char getc(){return p1==p2&&(p2=(p1=rbuf)+fread(rbuf,1,FILE_SIZE,stdin),p1==p2)?-1:*p1++;}
inline void putc(char x){*p3++=x;}
#endif
template<typename T>void read(T &x){
	x=0;char c=getc();T neg=0;
	while(!isdigit(c))neg|=(c=='-'),c=getc();
	while(isdigit(c))x=x*10+(c-'0'),c=getc();
	if(neg)x=-x;
}
template<typename T>void recursive_print(T x){if(!x)return;recursive_print(x/10);putc(x%10^48);}
template<typename T>void print(T x){if(!x)putc('0');if(x<0)putc('-'),x=-x;recursive_print(x);}
template<typename T>void print(T x,char c){print(x);putc(c);}
void readstr(char *s){char c=getc();while(c<=32||c>=127)c=getc();while(c>32&&c<127)s[0]=c,s++,c=getc();(*s)=0;}
void printstr(string s){for(int i=0;i<s.size();i++)putc(s[i]);}
void printstr(char *s){int len=strlen(s);for(int i=0;i<len;i++)putc(s[i]);}
void print_final(){fwrite(wbuf,1,p3-wbuf,stdout);}
#endif
const int MAXN=2e5;
const int LOG_N=18;
const int BLK=1000;
int n,qu,hd[MAXN+5],to[MAXN*2+5],nxt[MAXN*2+5],val[MAXN*2+5],ec;
void adde(int u,int v,int w){to[++ec]=v;val[ec]=w;nxt[ec]=hd[u];hd[u]=ec;};
int dep[MAXN+5],fa[MAXN+5],fae[MAXN+5];
int dfn[MAXN+5],edt[MAXN+5],tim,rid[MAXN+5],st[LOG_N+2][MAXN+5],ord[MAXN+5];
u32 dis[MAXN+5];
int get(int x,int y){return (dep[x]<dep[y])?x:y;}
void dfs(int x,int f){
	st[0][dfn[x]=++tim]=fa[x]=f;rid[tim]=x;
	for(int e=hd[x];e;e=nxt[e]){
		int y=to[e],z=val[e];if(y==f)continue;
		fae[y]=z;dis[y]=dis[x]+z;dep[y]=dep[x]+1;dfs(y,x);
	}edt[x]=tim;
}
int getlca(int u,int v){
	if((u=dfn[u])>(v=dfn[v]))swap(u,v);
	int d=__lg(v-u++);
	return get(st[d][u],st[d][v-(1<<d)+1]);
}
int blk_sz,blk_cnt,L[MAXN+5],R[MAXN+5],bel[MAXN+5];
namespace virt_tree{
	int pt[MAXN+5],len,stk[MAXN+5],tp=0,cnt[MAXN+5];u32 res;
	void _adde(int x,int y){
		res+=1u*(dis[y]-dis[x])*cnt[y]*(len-cnt[y]);
		cnt[x]+=cnt[y];cnt[y]=0;
	}
	void insert(int x){
		if(!tp)return stk[++tp]=x,void();
		int lc=getlca(x,stk[tp]);
		while(tp>=2&&dep[lc]<dep[stk[tp-1]])_adde(stk[tp-1],stk[tp]),tp--;
		if(tp&&dep[lc]<dep[stk[tp]])_adde(lc,stk[tp--]);
		if(!tp||stk[tp]!=lc)stk[++tp]=lc;stk[++tp]=x;
	}
	void fin(){while(tp>=2)_adde(stk[tp-1],stk[tp]),tp--;tp=0;}
	void calc(){
		if(!len)return;
		for(int i=1;i<=len;i++)cnt[pt[i]]=1;
		for(int i=1;i<=len;i++)insert(pt[i]);fin();
		cnt[stk[1]]=0;
	}
}using namespace virt_tree;
u32 ans[MAXN+5],sum_in[MAXN+5],s1[BLK+5][BLK+5],s2[BLK+5][BLK+5];
struct qry{int l,r;}q[MAXN+5];
vector<tuple<int,int,int,int> >qv[MAXN+5];
int main(){
	freopen("flower.in","r",stdin);freopen("flower.out","w",stdout);
	read(n);read(qu);
	for(int i=1,u,v,w;i<n;i++)read(u),read(v),read(w),adde(u,v,w),adde(v,u,w);
	dfs(1,0);
	for(int i=1;i<=LOG_N;i++)for(int j=1;j+(1<<i)-1<=n;j++)
		st[i][j]=get(st[i-1][j],st[i-1][j+(1<<i-1)]);
	blk_sz=250;blk_cnt=n/blk_sz+1;
	for(int i=1;i<=blk_cnt;i++){
		L[i]=(i-1)*blk_sz+1;R[i]=min(i*blk_sz,n+1);
		for(int j=L[i];j<=R[i];j++)bel[j]=i;
	}
	for(int i=1;i<=n;i++)ord[i]=i;
	for(int i=1;i<=blk_cnt;i++)sort(ord+L[i],ord+R[i]+1,[&](int x,int y){return dfn[x]<dfn[y];});
	for(int i=1;i<=blk_cnt;i++){
		res=len=tp=0;
		for(int j=L[i];j<=R[i];j++)pt[++len]=ord[j];
		calc();sum_in[i]=res;
	}
	bool flg=1;
	for(int i=1;i<=qu;i++){
		read(q[i].l);read(q[i].r);--q[i].l;++q[i].r;
		if(bel[q[i].l]==bel[q[i].r]){
			res=len=tp=0;
			for(int j=q[i].l+1;j<q[i].r;j++)cnt[j]=1;
			for(int j=L[bel[q[i].l]];j<=R[bel[q[i].l]];j++)
				if(q[i].l<ord[j]&&ord[j]<q[i].r)pt[++len]=ord[j];
			calc();ans[i]=res;
		}else{
			static int A[MAXN+5],B[MAXN+5];int la=0,lb=0;
			res=len=tp=0;
			for(int j=L[bel[q[i].l]];j<=R[bel[q[i].l]];j++)
				if(q[i].l<ord[j]&&ord[j]<q[i].r)A[++la]=ord[j];
			for(int j=L[bel[q[i].r]];j<=R[bel[q[i].r]];j++)
				if(q[i].l<ord[j]&&ord[j]<q[i].r)B[++lb]=ord[j];
			for(int x=1,y=1;x<=la||y<=lb;){
				if(y>lb||(x<=la&&dfn[A[x]]<dfn[B[y]]))pt[++len]=A[x++];
				else pt[++len]=B[y++];
			}
			calc();ans[i]=res;
			qv[bel[q[i].r]-1].pb(mt(q[i].l,q[i].r,1,i));
			qv[bel[q[i].l]].pb(mt(q[i].l,q[i].r,-1,i));
		} 
	}
	s1[1][1]=sum_in[1];
	for(int i=1;i<=blk_cnt;i++){
		static int siz[MAXN+5];static u32 dif[MAXN+5],sum[MAXN+5];
		for(int j=1;j<=n;j++)siz[j]=(j<=R[i]);
		for(int j=n;j>=2;j--)siz[fa[rid[j]]]+=siz[rid[j]];
		memset(dif,0,sizeof(dif));
		auto add=[&](int l,int r,u32 x){dif[l]+=x;dif[r+1]-=x;};
		for(int x=2;x<=n;x++){
			add(1,dfn[x]-1,1u*siz[x]*fae[x]);
			add(dfn[x],edt[x],1u*(R[i]-siz[x])*fae[x]);
			add(edt[x]+1,n,1u*siz[x]*fae[x]);
		}
		for(int x=1;x<=n;x++)dif[x]+=dif[x-1];
		for(int x=1;x<=n;x++)sum[x]=sum[x-1]+dif[dfn[x]];
		for(auto t:qv[i]){
			int l=get<0>(t),r=get<1>(t),coef=get<2>(t),id=get<3>(t);
			ans[id]+=(sum[R[bel[l]]]-sum[l])*coef;
			ans[id]+=(sum[r-1]-sum[L[bel[r]]-1])*coef;
		}
		if(i<blk_cnt){
			for(int j=1;j<=i+1;j++)s1[j][i+1]=s1[j][i]+sum_in[i+1]+sum[R[i+1]]-sum[R[i]];
			for(int j=i;j<=blk_cnt;j++)s2[i+1][j]=-sum[R[j]]+sum[R[i]];
		}
	}
	for(int i=1;i<=qu;i++)ans[i]+=s1[bel[q[i].l]+1][bel[q[i].r]-1]+s2[bel[q[i].l]+1][bel[q[i].r]-1];
	for(int i=1;i<=qu;i++)print(ans[i],'\n');print_final();
	return 0;
}
/*
6 6
2 1 1
5 1 1
3 1 3
4 5 1
6 3 3
1 1
1 2
1 3
1 4
1 5
1 6
*/
posted @ 2023-05-12 21:37  tzc_wk  阅读(96)  评论(1编辑  收藏  举报