【原】 POJ 1007 DNA Sorting 逆序数 解题报告
http://poj.org/problem?id=1007
求逆序数的方法:
1、此题每个string只有4种字母,所以可以用类似counting sort的方法来以逆序扫描字符串,并以a[1...3]记录相应字符串组的个数,计算每位数与其后面几位的逆序数。复杂度n。但输入若没有限制就不能靠a[1...3]这样做了,那样的话可能每扫描一个字符++的数组位很多,需要判断的分支也很多,因此复杂度太高。
2、利用merge sort求逆序数n*lgn,对所有字符串的逆序数排序(这里用multimap简单代替)
Description
One measure of ``unsortedness'' in a sequence is the number of pairs of entries that are out of order with respect to each other. For instance, in the letter sequence ``DAABEC'', this measure is 5, since D is greater than four letters to its right and E is greater than one letter to its right. This measure is called the number of inversions in the sequence. The sequence ``AACEDGG'' has only one inversion (E and D)---it is nearly sorted---while the sequence ``ZWQM'' has 6 inversions (it is as unsorted as can be---exactly the reverse of sorted).
You are responsible for cataloguing a sequence of DNA strings (sequences containing only the four letters A, C, G, and T). However, you want to catalog them, not in alphabetical order, but rather in order of ``sortedness'', from ``most sorted'' to ``least sorted''. All the strings are of the same length.
Input
The first line contains two integers: a positive integer n (0 < n <= 50) giving the length of the strings; and a positive integer m (0 < m <= 100) giving the number of strings. These are followed by m lines, each containing a string of length n.
Output
Output the list of input strings, arranged from ``most sorted'' to ``least sorted''. Since two strings can be equally sorted, then output them according to the orginal order.
Sample Input
10 6
AACATGAAGG
TTTTGGCCAA
TTTGGCCAAA
GATCAGATTT
CCCGGGGGGA
ATCGATGCAT
Sample Output
CCCGGGGGGA
AACATGAAGG
GATCAGATTT
ATCGATGCAT
TTTTGGCCAA
TTTGGCCAAA
1:
2: #include <stdio.h>
3: #include <iostream>
4: #include <string>
5: #include <map>
6: #include <fstream>
7:
8: using namespace std ;
9:
10: //逆序扫描字符串求逆序数
11: //复杂度n
12: __int64 GetInversionCount( char *str, int n )
13: {
14: __int64 cnt ;
15: __int64 a[4] = {0} ; //a[1]:A , a[2]:A,C , a[3]:A,C,G 的个数
16: int i ;
17:
18: //从后往前扫描,计算每位数与其后面几位的逆序数
19: cnt = 0 ;
20: for( i=n-1 ; i>=0 ; --i )
21: {
22: switch(str[i])
23: {
24: case 'A' : //与其后面不会形成逆序对
25: ++a[1] ;
26: ++a[2] ;
27: ++a[3] ;
28: break ;
29: case 'C' : //与其后面的A形成逆序,所以逆序数要加上其后A的个数
30: ++a[2] ;
31: ++a[3] ;
32: cnt += a[1] ;
33: break ;
34: case 'G' : //与其后面的A、C形成逆序,所以逆序数要加上其后A、C的个数
35: ++a[3] ;
36: cnt += a[2] ;
37: break ;
38: case 'T' : //与其后面的A、C、G形成逆序,所以逆序数要加上其后A、C、G的个数
39: cnt += a[3] ;
40: break ;
41: }
42: }
43: return cnt ;
44: }
45:
46: //**********************************
47:
48: __int64 inversionNum = 0 ;
49:
50: void Merge( char *a, char* tmpArr, int lb, int rb, int re )
51: {
52: int le = rb-1 ;
53: int tmpStart = lb ;
54: int tmpEnd = re ;
55: int tmpIndex = lb ;
56: while( lb<=le && rb<=re )
57: {
58: if( a[lb] <= a[rb] )
59: tmpArr[tmpIndex++] = a[lb++] ;
60: else
61: {
62: tmpArr[tmpIndex++] = a[rb++] ;
63: inversionNum += le-lb+1 ;
64: }
65: }
66:
67: while(lb<=le)
68: tmpArr[tmpIndex++] = a[lb++] ;
69: while(rb<=re)
70: tmpArr[tmpIndex++] = a[rb++] ;
71:
72: while(tmpStart<=tmpEnd)
73: {
74: a[tmpStart] = tmpArr[tmpStart] ;
75: ++tmpStart ;
76: }
77: }
78:
79: void Msort( char *a, char *tmpArr, int b, int e )
80: {
81: if(b>=e)
82: return ;
83: int mid = b+(e-b)/2 ;
84: Msort(a,tmpArr,b,mid) ;
85: Msort(a,tmpArr,mid+1,e) ;
86: Merge(a,tmpArr,b,mid+1,e) ;
87: }
88:
89: void MergeSort( char *a , int n )
90: {
91: char *tmpArr = new char[n+1] ;
92: Msort(a,tmpArr,0,n-1) ;
93: delete []tmpArr ;
94: }
95:
96: //**********************************
97:
98: void run1007()
99: {
100: ifstream in("in.txt");
101:
102: int n,m ;
103: string tmpStr ;
104: multimap< __int64 , string > strMap ;
105: multimap< __int64 , string >::iterator iter ;
106:
107: in>>n>>m ;
108:
109: char *a = new char[n+1] ; //末尾'0'
110:
111: while( m-- && in>>a ) //不能用gets(a),不然会从第一行输入的末尾开始,其末尾为换行符,而gets不会读入换行符
112: { //所以gets读入的第一行字符为空字符
113: tmpStr = a ;
114: MergeSort(a,n) ;
115: strMap.insert( make_pair(inversionNum,tmpStr) ) ;
116: inversionNum = 0 ;
117: }
118:
119: for( iter=strMap.begin() ; iter!=strMap.end() ; ++iter )
120: cout<<iter->second<<endl ;
121:
122: delete []a ;
123: }
124:
125:
126: void run1007_1()
127: {
128: ifstream in("in.txt");
129:
130: int n,m ;
131: string tmp ;
132: multimap< __int64 , string > strMap ;
133: multimap< __int64 , string >::iterator iter ;
134:
135: in>>n>>m ;
136:
137: char *a = new char[n+1] ; //末尾'0'
138:
139: while( m-- && in>>a ) //不能用gets(a),不然会从第一行输入的末尾开始,其末尾为换行符,而gets不会读入换行符
140: { //所以gets读入的第一行字符为空字符
141: /*
142: //这样会导致strMap中所有的second都是最后一个读入的字符串
143: //因为作为second的a每次输入时都会被改变,其并没有复制
144: __int64 count = GetInversionCount(a,n) ;
145: strMap.insert( make_pair(count,a) ) ;
146: */
147: tmp = a ;
148: __int64 count = GetInversionCount(a,n) ;
149: strMap.insert( make_pair(count,tmp) ) ;
150:
151: }
152:
153: for( iter=strMap.begin() ; iter!=strMap.end() ; ++iter )
154: cout<<iter->second<<endl ;
155:
156: delete []a ;
157: }