BZOJ2468 : [中山市选2010]三核苷酸

令d[i]为第i个样本数据,cnt为样本个数,经过化简可得

\[ans=\frac{\sum(d[i]^2)}{cnt}-(\frac{\sum d[i]}{cnt})^2\]

枚举每一种可能的三核苷酸,得到它出现的各个位置,假设当前出现了tot个,第i个的编号为a[i],经过化简可得

\[cnt+=C_{tot}^2\]

\[\sum d[i]+=\sum (a[i+1]-a[i])i(tot-i)\]

\[\sum (d[i]^2)+=tot\sum(a[i]^2)-(\sum a[i])^2\]

时间复杂度$O(n)$。

 

#include<cstdio>
#include<cstring>
#define N 100010
typedef long long ll;
int T,n,i,j,tot[64],q[64][N];ll cnt,sumd,sumd2,s,s2;char a[N];
inline ll C2(ll x){return x*(x-1)/2;}
inline ll sqr(ll x){return x*x;}
double sqr(double x){return x*x;}
double solve(){
  scanf("%s",a+1);n=strlen(a+1);
  for(i=1;i<=n;i++){
    if(a[i]=='A')a[i]=0;
    else if(a[i]=='G')a[i]=1;
    else if(a[i]=='C')a[i]=2;
    else a[i]=3;
  }
  for(cnt=sumd=sumd2=i=0;i<64;i++)tot[i]=0;
  for(i=1;i<n-1;i++)j=a[i]|(a[i+1]<<2)|(a[i+2]<<4),q[j][++tot[j]]=i;
  for(i=0;i<64;i++)if(tot[i]>=2){
    cnt+=C2(tot[i]);
    for(j=1;j<tot[i];j++)sumd+=1LL*(q[i][j+1]-q[i][j])*j*(tot[i]-j);
    for(s=s2=0,j=1;j<=tot[i];j++)s+=q[i][j],s2+=sqr(1LL*q[i][j]);
    sumd2+=s2*tot[i]-sqr(s);
  }
  if(!cnt)return 0;
  return 1.0*sumd2/cnt-sqr(1.0*sumd/cnt);
}
int main(){
  for(scanf("%d",&T);T--;printf("%.6f\n",solve()));
  return 0;
}

  

posted @ 2015-09-17 23:02  Claris  阅读(316)  评论(0编辑  收藏  举报