DNA排序
逆序数可以用来描述一个序列混乱程度的量。例如,“DAABEC”的逆序数为5,其中D大于他右边的4个数,E大于他右边的1个数,4+1=5;又如,“ZWQM”的逆序数为3+2+1+0=6。
现在有许多长度一样的字符串,每个字符串里面只会出现四种字母(A,T,G,C)。要求编写程序,将这些字符串按照他们的逆序数进行排序。
输入:
输入数据有多组,以EOF结束。其中,每组数据:
第一行包括两个正整数,第一个正整数N给出了字符串的长度,第二个正整数M给出了字符串的数量。(1<=N,M<=100)
输出:
输出每组数据,不需要额外空行。
将输入的字符串按照其逆序数进行排序,如果两个字符串的逆序数相等,则按照输入中两者先后顺序进行排序。
Sample Input
10 6 AACATGAAGG TTTTGGCCAA TTTGGCCAAA GATCAGATTT CCCGGGGGGA ATCGATGCAT
Sample Output
CCCGGGGGGA AACATGAAGG GATCAGATTT ATCGATGCAT TTTTGGCCAA TTTGGCCAAA
Source
分析:要求用稳定的排序算法,所以选择了归并排序。计算逆序数原本没想太多用的暴力遍历,但是后来看评论,发现大神的一种有趣的算法。
1 #include <stdio.h> 2 #include <stdlib.h> 3 #include <string.h> 4 5 #define DNA_LEN 50 6 #define DNA_NUM 100 7 8 #define BUFFER_SIZE 10000 9 10 typedef struct 11 { 12 int unsortedness; 13 char dnaString[DNA_LEN]; 14 }Dna; 15 16 typedef struct 17 { 18 int dnaLen; 19 int dnaNum; 20 Dna dna[DNA_NUM]; 21 Dna* pDna[DNA_NUM]; 22 }DnaSequence; 23 24 DnaSequence DnaSeq; 25 26 void GetDnaSequence(DnaSequence *dnaSeq) 27 { 28 int i; 29 30 scanf("%d %d\n", &dnaSeq->dnaLen, &dnaSeq->dnaNum); 31 32 for(i = 0; i < dnaSeq->dnaNum; i++) 33 { 34 if(NULL == gets(dnaSeq->dna[i].dnaString)) break; 35 36 dnaSeq->pDna[i] = &dnaSeq->dna[i]; 37 } 38 } 39 40 void PrintDnaSequence(DnaSequence *dnaSeq) 41 { 42 int i; 43 44 for(i = 0; i < dnaSeq->dnaNum; i++) 45 { 46 printf("%s\n", dnaSeq->pDna[i]->dnaString); 47 } 48 } 49 /* 50 void CalcUnsortedness(Dna* dna, int dnaLen) 51 { 52 int delta,i,j; 53 dna->unsortedness = 0; 54 for(i = 0; i < dnaLen; i++) 55 { 56 for(j = i+1; j < dnaLen; j++) 57 { 58 delta = dna->dnaString[i] - dna->dnaString[j]; 59 if(delta > 0) dna->unsortedness++; 60 } 61 } 62 } 63 */ 64 void CalcUnsortedness(Dna* dna, int dnaLen) 65 { 66 int i; 67 int A = 0, C = 0, G = 0; 68 dna->unsortedness = 0; 69 for(i = dnaLen - 1; i >= 0; i--) 70 { 71 switch(dna->dnaString[i]) 72 { 73 case 'A': 74 A++; 75 break; 76 case 'C': 77 C++; 78 dna->unsortedness += A; 79 break; 80 case 'G': 81 G++; 82 dna->unsortedness += A+C; 83 break; 84 case 'T': 85 dna->unsortedness += A+C+G; 86 break; 87 default: 88 break; 89 } 90 } 91 } 92 93 int SortCmp(const void* elem1, const void* elem2) 94 { 95 Dna* dna1 = (Dna *)(*(size_t*)elem1); 96 Dna* dna2 = (Dna *)(*(size_t*)elem2); 97 98 return dna1->unsortedness - dna2->unsortedness; 99 } 100 101 char g_mergeBuffer[BUFFER_SIZE]; 102 103 void Merge(char* array, int elemSize, int left, int mid, int right, int (*SortCmp)(const void*, const void*)) 104 { 105 int i = left; 106 int j = mid; 107 int bufIdx = 0; 108 109 while(i < mid && j <= right) 110 { 111 if(SortCmp(&array[i*elemSize], &array[j*elemSize]) <= 0) 112 { 113 memcpy(&g_mergeBuffer[bufIdx], &array[i*elemSize], elemSize); 114 i++; 115 } 116 else 117 { 118 memcpy(&g_mergeBuffer[bufIdx], &array[j*elemSize], elemSize); 119 j++; 120 } 121 bufIdx += elemSize; 122 } 123 124 for(; i < mid; i++) 125 { 126 memcpy(&g_mergeBuffer[bufIdx], &array[i*elemSize], elemSize); 127 bufIdx += elemSize; 128 } 129 130 for(; j <= right; j++) 131 { 132 memcpy(&g_mergeBuffer[bufIdx], &array[j*elemSize], elemSize); 133 bufIdx += elemSize; 134 } 135 136 memcpy(&array[left*elemSize], g_mergeBuffer, (right-left+1)*elemSize); 137 } 138 139 void MergeSort(void* array, int arrayLen, int elemSize, int (*SortCmp)(const void*, const void*)) 140 { 141 int loop, left, mid, right = 0; 142 143 for(loop = 1; loop < arrayLen; loop *= 2) 144 { 145 left = 0; 146 right = 0; 147 while(right < arrayLen - 1) 148 { 149 mid = left + loop; 150 right = (mid + loop - 1 > arrayLen - 1) ? (arrayLen - 1) : (mid + loop - 1); 151 Merge((char*)array, elemSize, left, mid, right, SortCmp); 152 left = left + loop * 2; 153 } 154 } 155 } 156 157 void ProcDnaSequence(DnaSequence *dnaSeq) 158 { 159 int i; 160 int elemSize = sizeof(dnaSeq->pDna[0]); 161 162 for(i = 0; i < dnaSeq->dnaNum; i++) 163 { 164 CalcUnsortedness(&dnaSeq->dna[i], dnaSeq->dnaLen); 165 } 166 MergeSort(dnaSeq->pDna, dnaSeq->dnaNum, elemSize, SortCmp); 167 } 168 169 int main() 170 { 171 GetDnaSequence(&DnaSeq); 172 ProcDnaSequence(&DnaSeq); 173 PrintDnaSequence(&DnaSeq); 174 return 0; 175 }