POJ 3294 Life Forms 二分 + 哈希

终于搞死这题了,先是TLE,后是MLE, 然后是WA,AC得的不容易啊。。多项式差值取模神奇的算法。。。1280ms

View Code
#include<stdio.h>
#include<string.h>
#include<stdlib.h>
#include<iostream>
#include<algorithm>
#include<string>
#include<vector>
#include<set>
using namespace std;
#define Prime 113
#define MOD 5210
int N, yes;
char DNA[112][1210];
int hash[1020]; //统计长度
struct Anode
{
  unsigned long long key; //HASH值 
  int Num; //所树字符串编号
  int cnt; //出现次数 
}info;
vector<Anode>Phash[MOD];
 

set<string>Myset, My;

unsigned long long Fly[1010];
//unsigned long long my[1010];

/*
算法:
1.二分枚举长度K( 1, len ) 
you are to find the longest substring that is shared by more than half of them.
2. HASH表,给所有DNA字符串,建立一个HASH表,长度为K, 用Vector[i]存储第I个DNA字符串的所有长度为K的字串长度
 
3. 枚举 每个DNA长度为K的字符串

4.若一半以上有,就l = mid + 1, 继续枚举 

*/
 
void init( )
{
   for( int i = 0; i < MOD; i++)
       Phash[i].clear();
   yes = 0;
}

//key, 字符串号, 当前长度 
int get_hash(int Num, unsigned long long key,int k, char *str)
{
   int x = key % MOD;
   int n = (int) Phash[x].size( );
   int flag = 0;
   for(int i = 0; i < n; i++)
   {
      info = Phash[x][i];
      if( info.key == key )
      {   
          flag = 1;
          if( info.Num !=  Num )
          {
             Phash[x][i].cnt++;
             Phash[x][i].Num = Num;
             if( Phash[x][i].cnt > N / 2 )
             {  
                 yes = 1;
                 My.insert(str);
                //printf("k =%d here:%s\n",k,str);
                 return 1;      
             }   
          }
      }        
      
   }  
   if( !flag )
   {
      info.key = key;
      info.Num = Num;
      info.cnt = 1;
      Phash[x].push_back(info);
   }
   return 0; 
}


void get_sub( int t, int x )
{
   int length = strlen(DNA[t]);
   char temp[1010];
   char save[1010];
   unsigned long long ans = 0;
   if( length <  x )
       return;
   for( int i = 0; i < x; i++)
   {
      ans = ans + (DNA[t][i] - 'a' + 1) * Fly[x-i-1];
      temp[i] = DNA[t][i];
   }
   temp[x] = '\0';
   strcpy(save, temp);
   get_hash( t, ans, x , temp);
   for( int i = 1; i < length; i++)
   {  
       if( i + x > length)
           break;
       ans -= (DNA[t][i-1] - 'a' + 1) * Fly[x-1];
       ans *= Prime;
       ans += DNA[t][x + i - 1]  - 'a' + 1;
       save[x+i-1] = DNA[t][x+i-1];
       int u = 0;
       for( int j = i; j <= x + i - 1; j++)
       {
            temp[u++] = save[j];
       }
       temp[u] = '\0';
       get_hash( t, ans, x ,temp); 
      
   }
     
}

int solve( int x  )
{
  init( );
  My.clear( );
  for( int i = 1; i <= N; i++)
      get_sub(i, x);     
  if( yes )
  {
       Myset.clear();
       Myset = My;
       return 1;
  }
  return 0;
}



int find( int l, int r )
{
   int ans = 0, vt = 0;
   while( l <= r )
   {
      int mid = (l + r) / 2;
      if ( solve(mid) )
      {
         ans = mid;
         l = mid + 1;
      }
      else
         r = mid - 1;
             
   }  
   return ans;
}
 
int main( )
{ 
  int vv = 0; 
  Fly[0] = 1;
  for( int i = 1; i <= 1000; i++)
     Fly[i] = Fly[i-1] * Prime; 
  while( scanf("%d",&N),N)
  {
     int maxn = 0;
     memset(hash, 0, sizeof(hash));
     for( int i = 1; i <= N; i++)
     {
       scanf("%s", DNA[i]);
       int len = strlen(DNA[i]);
       hash[len]++;
       if( len > maxn )
           maxn = len;
     }
     int f = 0;
     //找到有效区间范围
     for( int x = maxn; x >= 1; x--)
     {
        if ( hash[x] > N/2 )
        {
            f = x; 
            break;   
        }
        else
           hash[x-1] += hash[x];
        
     }
     int ans = find(1, maxn);
     if( vv ) puts(" ");
     vv = 1;
     if( ans == 0 )
         puts("?");
     else
     {
      set<string>::iterator its; 
      for( its = Myset.begin(); its != Myset.end(); its++)
         cout<<*its<<endl;
      
     }
  }   
  return 0;
}

posted on 2012-07-22 07:29  more think, more gains  阅读(149)  评论(0编辑  收藏  举报

导航