Atcoder ABC388F Dangerous Sugoroku 题解 [ 蓝 ] [ 矩阵加速 ] [ 状压矩乘 ] [ 模拟 ]

Dangerous Sugoroku：赛时写了矩乘 T 飞了，受到 sunkuangzheng 大佬的启发才知道要状压矩乘。

暴力矩乘思路

直接像过河那样写模拟细节非常多，于是考虑像美食家一样的思路，利用矩阵分段加速。

定义 $d p_{i}$ 表示 $i$ 能否到达，则有如下转移：

d p_{i} = ⋁_{j = i - B}^{i - A} d p_{j}

因为 $A, B \leq 20$ ，所以可用状态非常少，就可以用矩阵优化了。

那么很显然就把段拆一下，然后直接跑矩乘就好了。这个做法甚至连 $m = 0, l_{i} - 1 = r_{i - 1}$ 的细节都不用判。

代码如下，如果你真这么写了，会收获 TLE 和 WA 的好成绩！

下面这份暴力中 $d p_{i}$ 表示 $d p_{i}$ 的方案数，所以转移有所不同，本质还是和上面一样的。只要 $d p_{i} > 0$ 就能到达。

#include <bits/stdc++.h>
#define fi first
#define se second
#define lc (p<<1)
#define rc ((p<<1)|1)
#define eb(x) emplace_back(x)
#define pb(x) push_back(x)
using namespace std;
typedef long long ll;
typedef unsigned long long ull;
typedef long double ldb;
using pi=pair<int,int>;
struct mat{
	ull a[25][25];
	mat(){memset(a,0,sizeof(a));}
	mat operator*(const mat &x)const{
		mat res;
		for(int i=0;i<=20;i++)
		{
			for(int k=0;k<=20;k++)
			{
				ull l=a[i][k];
				for(int j=0;j<=20;j++)
				{
					res.a[i][j]=(res.a[i][j]+l*x.a[k][j]);
				}
			}
		}
		return res;
	}
}s,dp1,dp2;
mat qpow(mat x,ll k)
{
	mat res;
	for(int i=0;i<=20;i++)res.a[i][i]=1;
	while(k>0)
	{
		if(k&1)res=res*x;
		x=x*x;
		k>>=1;
	}
	return res;
}
ll n,m,a,b,l[200005],r[200005];
int main()
{
    //freopen("sample.in","r",stdin);
    //freopen("sample.out","w",stdout);
    ios::sync_with_stdio(0);
    cin.tie(0);
    cout.tie(0);
    cin>>n>>m>>a>>b;
    s.a[1][20]=1;
    for(int i=1;i<=19;i++)dp1.a[i+1][i]=dp2.a[i+1][i]=1;
    for(int i=20-a+1;i>=20-b+1;i--)dp1.a[i][20]=1;
    r[0]=1;
    for(int i=1;i<=m;i++)
    {
        cin>>l[i]>>r[i];
        if(r[i]-l[i]+1>=21||l[i]==1)
        {
            cout<<"No";
            return 0;
        }
        if(l[i]-r[i-1]-1>0)s=s*qpow(dp1,l[i]-r[i-1]-1);
        if(r[i]-l[i]+1>0)s=s*qpow(dp2,r[i]-l[i]+1);
    }
    if(n+1-r[m]-1>0)s=s*qpow(dp1,n+1-r[m]-1);
    if(s.a[1][20])cout<<"Yes";
    else cout<<"No";
    return 0;
}

原因是复杂度为 $O (m \times 20^{3} \log n)$ ，常数又大，无法通过。

优化矩乘

因为状态中只有 $0$ 和 $1$ ，每行每列的数又只有 $20$ 个，考虑用状压与位运算去掉一个 $20$ 。

我们先看暴力的矩乘代码，再来考虑如何优化它。

struct mat{
	ull a[25][25];
	mat(){memset(a,0,sizeof(a));}
	mat operator*(const mat &x)const{
		mat res;
		for(int i=0;i<=20;i++)
		{
			for(int k=0;k<=20;k++)
			{
				for(int j=0;j<=20;j++)
				{
					res.a[i][j]=(res.a[i][j]|(a[i][k]&x.a[k][j]));
				}
			}
		}
		return res;
	}
}

可以发现，当 $a_{i, k} = 1$ 时， $r e s_{i, j}$ 取决于 $x_{i, j}$ 是否为 $1$ 。

因此，就可以省去循环 $j$ ，当 $a_{i, k} = 1$ 时，直接让 $r e s_{i} \leftarrow r e s_{i} \lor x_{k}$ 即可。

时间复杂度 $O (m \times 20^{2} \log n)$ ，可以通过。

代码如下：

#include <bits/stdc++.h>
#define fi first
#define se second
#define lc (p<<1)
#define rc ((p<<1)|1)
#define eb(x) emplace_back(x)
#define pb(x) push_back(x)
using namespace std;
typedef long long ll;
typedef unsigned long long ull;
typedef long double ldb;
using pi=pair<int,int>;
struct mat{
	ull a[25];
	mat(){memset(a,0,sizeof(a));}
	mat operator*(const mat &x)const{
		mat res;
		for(int i=0;i<=20;i++)
		{
			for(int k=0;k<=20;k++)
			{
				if((a[i]>>k)&1)res.a[i]=(res.a[i]|x.a[k]);
			}
		}
		return res;
	}
}s,dp1,dp2;
mat qpow(mat x,ll k)
{
	mat res;
	for(int i=0;i<=20;i++)res.a[i]=(1<<i);
	while(k>0)
	{
		if(k&1)res=res*x;
		x=x*x;
		k>>=1;
	}
	return res;
}
ll n,m,a,b,l[200005],r[200005];
int main()
{
    ios::sync_with_stdio(0);
    cin.tie(0);
    cout.tie(0);
    cin>>n>>m>>a>>b;
    s.a[1]=(1<<20);
    for(int i=1;i<=19;i++)dp1.a[i+1]=dp2.a[i+1]=(1<<i);
    for(int i=20-a+1;i>=20-b+1;i--)dp1.a[i]|=(1<<20);
    r[0]=1;
    for(int i=1;i<=m;i++)
    {
        cin>>l[i]>>r[i];
        if(r[i]-l[i]+1>=21||l[i]==1)
        {
            cout<<"No";
            return 0;
        }
        if(l[i]-r[i-1]-1>0)s=s*qpow(dp1,l[i]-r[i-1]-1);
        if(r[i]-l[i]+1>0)s=s*qpow(dp2,r[i]-l[i]+1);
    }
    if(n+1-r[m]-1>0)s=s*qpow(dp1,n+1-r[m]-1);
    if((s.a[1]>>20)&1)cout<<"Yes";
    else cout<<"No";
    return 0;
}