期望DP

期望DP

期望dp都是从后往前考虑的
设状态基本为:设\(dp_{st}\)从st状态到最终状态的期望
\(dp_{st}=\sum (dp_{st'}+w[st-->st'])*p[st-->st']\)
\(st'表示st之后能转移的状态,p[st-->st']表示st到st'状态的概率\)
\(w[st-->st']是从st状态转移到st'时对期望的贡献\)

例题

1.恶意竞争

把左右两边f[i][j]提出来放到一边即可,在这里\(w[st-->st']=1\)

点击查看代码
#include<functional>
#include<algorithm>
#include<iostream>
#include<cstdlib>
#include<cstring>
#include<complex>
#include<string>
#include<cstdio>
#include<vector>
#include<cmath>
#include<queue>
#include<deque>
#include<stack>
#include<map>
#include<set>
#define ll long long 
#define pa pair<int,int>
#define mp make_pair
#define pb push_back
#define fi first
#define se second
#define YES {puts("YES");return;}
#define NO {puts("NO");return ;}
using namespace std;
const int maxn=2e5+101;
const int MOD=20020219;
const ll inf=2147383647;
const double eps=1e-12;

ll read(){
    ll x=0,f=1;char ch=getchar();
    for(;!isdigit(ch);ch=getchar())if(ch=='-')f=-1;
    for(;isdigit(ch);ch=getchar())x=x*10+ch-'0';
    return x*f;
}

double dp[1001][1001];
double n,s;
int main(){
    cin>>n>>s;
    for(double i=s;i>=0;i--)for(double j=n;j>=0;j--){
        if(i==s && j==n)continue;
        double t=(s*n-i*j);
        int ii=i,jj=j;
        dp[ii][jj]+=i*j/t;
        dp[ii][jj]+=(dp[ii+1][jj]+1)*(s-i)*j/t;
        dp[ii][jj]+=(dp[ii][jj+1]+1)*i*(n-j)/t;
        dp[ii][jj]+=(dp[ii+1][jj+1]+1)*(s-i)*(n-j)/t;
    }
    printf("%.5lf",dp[0][0]);
    return 0;
}

2.带富翁
\(dp_i\)表示从i到n的期望得分,初始化\(dp_n=a[n]\)
\(dp_i=\sum_{j=i+1}^{min(n,i+6)} \frac{1}{min(6,n-i)} (dp[j]+a[i])\)

点击查看代码
#include<functional>
#include<algorithm>
#include<iostream>
#include<cstdlib>
#include<cstring>
#include<complex>
#include<string>
#include<cstdio>
#include<vector>
#include<cmath>
#include<queue>
#include<deque>
#include<stack>
#include<map>
#include<set>
#define ll long long 
#define pa pair<int,int>
#define mp make_pair
#define pb push_back
#define fi first
#define se second
#define YES {puts("YES");return;}
#define NO {puts("NO");return ;}
using namespace std;
const int maxn=2e5+101;
const int MOD=20020219;
const ll inf=2147383647;
const double eps=1e-12;

ll read(){
    ll x=0,f=1;char ch=getchar();
    for(;!isdigit(ch);ch=getchar())if(ch=='-')f=-1;
    for(;isdigit(ch);ch=getchar())x=x*10+ch-'0';
    return x*f;
}

double dp[101],a[101];
double n;
int main(){
    cin>>n;
    for(int i=1;i<=n;i++)cin>>a[i];
    dp[n]=a[i];
    for(int i=n-1;i>=1;i--){
        for(int j=1;j<=6;j++){
            if(i+j>n)break;
            dp[i]+=dp[j+i]+a[i];
        }
        dp[i]=dp[i]/min(6.0,n-i);
    }
    printf("%.7lf",dp[1]);
    return 0;
}

3.筛子游戏
\(dp_i\)表示得分从i到大于n的期望次数,\(p_i\)表示一次得分加i分的概率,\(p_0\)表示得分回到0分的概率
\(dp_i=(\sum_t p_t*(dp_{i+t}+1) )+p_0*(dp_0+1)=1+\sum_t p_t*dp_{i+t} +p_0*dp_0\)
与之前的题不一样,dp转移方程有\(dp_0\)这一项,我们是从后往前推,但是\(dp_0\)我们无法得知
每个状态都和\(dp_0\)有关系,而且\(dp_0\)就是我们所求,为一个常数
对于这种方程,我们设
\(dp_i=A[i]*dp_0+B[i]\)
带入原始方程的右边
\(dp_i=(\sum_t p_t*A[i+t]*dp_0+p_t*B[i+t])+dp_0*p_0 +1\)
化简
\(dp_i=((\sum_t p_t*A[i+t])+p_0)*dp_0+(\sum_t p_t*B[i+t])+1\)
所以\(A[i]=((\sum_t p_t*A[i+t])+p_0),B[i]=(\sum_t p_t*B[i+t])+1\)
对于\(i>n,A[i]=B[i]=0,因为dp_i=0\)
递归求解A和B
\(dp_0=\frac{B[0]}{1-A[0]}\)

点击查看代码
#include<functional>
#include<algorithm>
#include<iostream>
#include<cstdlib>
#include<cstring>
#include<complex>
#include<string>
#include<cstdio>
#include<vector>
#include<cmath>
#include<queue>
#include<deque>
#include<stack>
#include<map>
#include<set>
#define ll long long 
#define pa pair<int,int>
#define mp make_pair
#define pb push_back
#define fi first
#define se second
#define YES {puts("YES");return;}
#define NO {puts("NO");return ;}
using namespace std;
const int maxn=2e5+101;
const int MOD=20020219;
const ll inf=2147383647;
const double eps=1e-12;

ll read(){
    ll x=0,f=1;char ch=getchar();
    for(;!isdigit(ch);ch=getchar())if(ch=='-')f=-1;
    for(;isdigit(ch);ch=getchar())x=x*10+ch-'0';
    return x*f;
}
int n,k[4],a,b,c;
double A[1010],B[1010];
int main(){
    map<int,double>p;
    n=read();
    for(int i=1;i<=3;i++)k[i]=read();
    a=read(),b=read(),c=read();
    double zong=k[1]*k[2]*k[3];
    for(int i=1;i<=k[1];i++)for(int j=1;j<=k[2];j++){
        for(int t=1;t<=k[3];t++){
            if(i==a && j==b && t==c)p[0]=1.0;
            else p[i+j+t]+=1.0;
        }
    }
    for(int i=0;i<=k[1]+k[2]+k[3];i++)p[i]=p[i]/zong;
    for(int i=n;i>=0;i--){
        for(int j=1;j<=k[1]+k[2]+k[3];j++){
            A[i]+=p[j]*A[i+j];
            B[i]+=p[j]*B[i+j];
        }
        B[i]++;A[i]+=p[0];
    }
    double ans=B[0]/(1-A[0]);
    printf("%.7lf",ans);
    return 0;
}

4.食堂
因为有当前队伍总人数,吉吉的位置的二维状态
我们不妨设\(dp_{i,j}\)表示从\((i,j)状态(当前队伍总共i人,吉吉在j位置)\)到最终状态(食堂关门时吉吉在前k位)的概率
转移方程
\(当j=1,dp_{i,1}=p1*dp_{i,1}+p2*dp_{i,i}+p4\)

\(当2\leq j \leq k, dp_{i,j}=p1*dp_{i,j}+p2*dp_{i,j-1}+p3*dp_{i-1,j-1}+p4\)

\(当k<j,dp_{i,j}=p1*dp_{i,j}+p2*dp_{i,j-1}+p3*dp_{i-1.j-1}\)

把左右两边相同的移项,并设\(p21=\frac{p2}{1-p1}, p31=\frac{p3}{1-p1}, p41=\frac{p4}{1-p1}\)
所以转移方程为:
\(当j=1,dp_{i,1}=p21*dp_{i,i}+p41\)

\(当2\leq j \leq k, dp_{i,j}=p21*dp_{i,j-1}+p31*dp_{i-1,j-1}+p41\)

\(当k<j,dp_{i,j}=p21*dp_{i,j-1}+p31*dp_{i-1.j-1}\)

但是j=1时,我们无法得知\(dp_{i,i}\)的值,且后面\(dp_{i,j}\)都与它有关,那么我们可以向上面一道题一样
\(dp_{i.j}=A[j]*dp_{i.i}+B[j]\)

\(A[1]=p21,B[1]=p41\)

\(当2\leq j \leq k, A[j]=A[j-1]*p21, B[j]=B[j-1]*p21+p31*dp_{i,j-1}+p41\)

\(当k<j , \: \: \:\:\:\:\:\: A[j]=A[j-1]*p21, B[j]=B[j-1]*p21+p31*dp_{i,j-1}\)

\(dp_{i,i}=\frac{B[i]}{1-A[i]}\)

然后再代入上面的转移方程进行求解,最后答案为\(dp_{n,m}\)

点击查看代码
#include<functional>
#include<algorithm>
#include<iostream>
#include<cstdlib>
#include<cstring>
#include<complex>
#include<string>
#include<cstdio>
#include<vector>
#include<cmath>
#include<queue>
#include<deque>
#include<stack>
#include<map>
#include<set>
#define ll long long 
#define pa pair<int,int>
#define mp make_pair
#define pb push_back
#define fi first
#define se second
#define YES {puts("YES");return;}
#define NO {puts("NO");return ;}
using namespace std;
const int maxn=2e5+101;
const int MOD=20020219;
const ll inf=2147383647;
const double eps=1e-12;

ll read(){
    ll x=0,f=1;char ch=getchar();
    for(;!isdigit(ch);ch=getchar())if(ch=='-')f=-1;
    for(;isdigit(ch);ch=getchar())x=x*10+ch-'0';
    return x*f;
}
int n,m,k;
double p1,p2,p3,p4,dp[2001][2001];
int main(){ 
    n=read();m=read();k=read();
    cin>>p1>>p2>>p3>>p4;
    double p21=p2/(1-p1),p31=p3/(1-p1),p41=p4/(1-p1);
    for(int i=1;i<=n;i++){
        vector<double>A(n+1),B(n+1);
        A[1]=p21,B[1]=p41;
        for(int j=2;j<=i;j++){
            if(j<=k){
                A[j]=A[j-1]*p21;
                B[j]=B[j-1]*p21+p41+p31*dp[i-1][j-1];
            }
            else {
                A[j]=A[j-1]*p21;
                B[j]=B[j-1]*p21+p31*dp[i-1][j-1];
            }
        }
        dp[i][i]=B[i]/(1-A[i]);
        for(int j=1;j<i;j++){
            if(j==1){
                dp[i][j]=p21*dp[i][i]+p41;
            }
            else if(j<=k){
                dp[i][j]=p21*dp[i][j-1]+p31*dp[i-1][j-1]+p41;
            }
            else {
                dp[i][j]=p21*dp[i][j-1]+p31*dp[i-1][j-1];
            }
        }
    }
    printf("%.5lf",dp[n][m]);
    return 0;
}
posted @ 2022-08-10 13:11  I_N_V  阅读(16)  评论(0编辑  收藏  举报