【原】 POJ 1007 DNA Sorting 逆序数解题报告

http://poj.org/problem?id=1007

求逆序数的方法：
1、此题每个string只有4种字母，所以可以用类似counting sort的方法来以逆序扫描字符串，并以a[1...3]记录相应字符串组的个数，计算每位数与其后面几位的逆序数。复杂度n。但输入若没有限制就不能靠a[1...3]这样做了，那样的话可能每扫描一个字符++的数组位很多，需要判断的分支也很多，因此复杂度太高。
2、利用merge sort求逆序数n*lgn，对所有字符串的逆序数排序（这里用multimap简单代替）

Description

One measure of ``unsortedness'' in a sequence is the number of pairs of entries that are out of order with respect to each other. For instance, in the letter sequence ``DAABEC'', this measure is 5, since D is greater than four letters to its right and E is greater than one letter to its right. This measure is called the number of inversions in the sequence. The sequence ``AACEDGG'' has only one inversion (E and D)---it is nearly sorted---while the sequence ``ZWQM'' has 6 inversions (it is as unsorted as can be---exactly the reverse of sorted).
You are responsible for cataloguing a sequence of DNA strings (sequences containing only the four letters A, C, G, and T). However, you want to catalog them, not in alphabetical order, but rather in order of ``sortedness'', from ``most sorted'' to ``least sorted''. All the strings are of the same length.

Input

The first line contains two integers: a positive integer n (0 < n <= 50) giving the length of the strings; and a positive integer m (0 < m <= 100) giving the number of strings. These are followed by m lines, each containing a string of length n.

Output

Output the list of input strings, arranged from ``most sorted'' to ``least sorted''. Since two strings can be equally sorted, then output them according to the orginal order.

Sample Input

10 6

AACATGAAGG

TTTTGGCCAA

TTTGGCCAAA

GATCAGATTT

CCCGGGGGGA

ATCGATGCAT

Sample Output

CCCGGGGGGA

AACATGAAGG

GATCAGATTT

ATCGATGCAT

TTTTGGCCAA

TTTGGCCAAA

1:

   2: #include <stdio.h>

   3: #include <iostream>

   4: #include <string>

   5: #include <map>

   6: #include <fstream>

7:

   8: using namespace std ;

9:

  10: //逆序扫描字符串求逆序数

  11: //复杂度n

  12: __int64 GetInversionCount( char *str, int n )

  13: {

  14:     __int64 cnt ;

  15:     __int64 a[4] = {0} ; //a[1]:A , a[2]:A,C , a[3]:A,C,G 的个数

  16:     int i ;

17:

  18:     //从后往前扫描，计算每位数与其后面几位的逆序数

  19:     cnt = 0 ;

  20:     for( i=n-1 ; i>=0 ; --i )

  21:     {

  22:         switch(str[i])

  23:         {

  24:         case 'A' :  //与其后面不会形成逆序对

  25:             ++a[1] ;

  26:             ++a[2] ;

  27:             ++a[3] ;

  28:             break ;

  29:         case 'C' :  //与其后面的A形成逆序，所以逆序数要加上其后A的个数

  30:             ++a[2] ;

  31:             ++a[3] ;

  32:             cnt += a[1] ;

  33:             break ;

  34:         case 'G' :  //与其后面的A、C形成逆序，所以逆序数要加上其后A、C的个数

  35:             ++a[3] ;

  36:             cnt += a[2] ;

  37:             break ;

  38:         case 'T' :  //与其后面的A、C、G形成逆序，所以逆序数要加上其后A、C、G的个数

  39:             cnt += a[3] ;

  40:             break ;

  41:         }

  42:     }

  43:     return cnt ;

  44: }

45:

  46: //**********************************

47:

  48: __int64 inversionNum = 0 ;

49:

  50: void Merge( char *a, char* tmpArr, int lb, int rb, int re )

  51: {

  52:     int le = rb-1 ;

  53:     int tmpStart = lb ;

  54:     int tmpEnd = re ;

  55:     int tmpIndex = lb ;

  56:     while( lb<=le && rb<=re )

  57:     {

  58:         if( a[lb] <= a[rb] )

  59:             tmpArr[tmpIndex++] = a[lb++] ;

  60:         else

  61:         {

  62:             tmpArr[tmpIndex++] = a[rb++] ;

  63:             inversionNum += le-lb+1 ;

  64:         }

  65:     }

66:

  67:     while(lb<=le)

  68:         tmpArr[tmpIndex++] = a[lb++] ;

  69:     while(rb<=re)

  70:         tmpArr[tmpIndex++] = a[rb++] ;

71:

  72:     while(tmpStart<=tmpEnd)

  73:     {

  74:         a[tmpStart] = tmpArr[tmpStart] ;

  75:         ++tmpStart ;

  76:     }

  77: }

78:

  79: void Msort( char *a, char *tmpArr, int b, int e )

  80: {

  81:     if(b>=e)

  82:         return ;

  83:     int mid = b+(e-b)/2 ;

  84:     Msort(a,tmpArr,b,mid) ;

  85:     Msort(a,tmpArr,mid+1,e) ;

  86:     Merge(a,tmpArr,b,mid+1,e) ;

  87: }

88:

  89: void MergeSort( char *a , int n )

  90: {

  91:     char *tmpArr = new char[n+1] ;

  92:     Msort(a,tmpArr,0,n-1) ;

  93:     delete []tmpArr ;

  94: }

95:

  96: //**********************************

97:

  98: void run1007()

  99: {

 100:     ifstream in("in.txt");

 101:

 102:     int n,m ;

 103:     string tmpStr ;

 104:     multimap< __int64 , string > strMap ;

 105:     multimap< __int64 , string >::iterator iter ;

 106:

 107:     in>>n>>m ;

 108:

 109:     char *a = new char[n+1] ;  //末尾'0'

 110:

 111:     while( m-- && in>>a )  //不能用gets(a)，不然会从第一行输入的末尾开始，其末尾为换行符，而gets不会读入换行符

 112:     {                       //所以gets读入的第一行字符为空字符

 113:         tmpStr = a ;

 114:         MergeSort(a,n) ;

 115:         strMap.insert( make_pair(inversionNum,tmpStr) ) ;

 116:         inversionNum = 0 ;

 117:     }

 118:

 119:     for( iter=strMap.begin() ; iter!=strMap.end() ; ++iter )

 120:         cout<<iter->second<<endl ;

 121:

 122:     delete []a ;

 123: }

 124:

 125:

 126: void run1007_1()

 127: {

 128:     ifstream in("in.txt");

 129:

 130:     int n,m ;

 131:     string tmp ;

 132:     multimap< __int64 , string > strMap ;

 133:     multimap< __int64 , string >::iterator iter ;

 134:

 135:     in>>n>>m ;

 136:

 137:     char *a = new char[n+1] ;  //末尾'0'

 138:

 139:     while( m-- && in>>a )  //不能用gets(a)，不然会从第一行输入的末尾开始，其末尾为换行符，而gets不会读入换行符

 140:     {                       //所以gets读入的第一行字符为空字符

 141:         /*

 142:         //这样会导致strMap中所有的second都是最后一个读入的字符串

 143:         //因为作为second的a每次输入时都会被改变，其并没有复制

 144:         __int64 count = GetInversionCount(a,n) ;

 145:         strMap.insert( make_pair(count,a) ) ;

 146:         */

 147:         tmp = a ;

 148:         __int64 count = GetInversionCount(a,n) ;

 149:         strMap.insert( make_pair(count,tmp) ) ;

 150:

 151:     }

 152:

 153:     for( iter=strMap.begin() ; iter!=strMap.end() ; ++iter )

 154:         cout<<iter->second<<endl ;

 155:

 156:     delete []a ;

 157: }

posted @ 2010-11-05 14:36 Allen Sun 阅读(393) 评论(0) 编辑收藏举报

Allen Sun的技术博客

SDE LIFE

【原】 POJ 1007 DNA Sorting 逆序数解题报告

公告

Allen Sun的技术博客

SDE LIFE

【原】 POJ 1007 DNA Sorting 逆序数 解题报告

公告

【原】 POJ 1007 DNA Sorting 逆序数解题报告