几种排序的比较- bitmapsort,qsort,set
今天看了编程珠玑的column1,其中Problem 1是假设内存足够大,如何用库函数实现对100w个数的排序,而Problem2是让实现bit vector操作(bset,clear, test),Problem3让实现bitmap sort 并和Problem1的sort做比较。
先来讲一下bit vetcor,它其实是数据压缩的一种方式。思想是利用每一个bit代表一个数,1个int 有32bit ,故一个int可以表示32个数。举个例子说,现在有一个int a,那么a的低位到高位分别代表0~31(每一位是1或0,1表示有该数,0表示没有);假如现在有集合{1,2,5,8},则a的二进制为(高位)00000000 00000000 00000001 00100110(低位)。bit vector 的操作有bset(i),clear(i),test(i)分别是对数i在整数数组中的设置,清除,和检查。
现在假设最多有10000000个数,那么则需要1+10000000/32 个int来存储这10000000个数。
对于每个数i 设置的操作将第i个bit设为1,第i个bit可以看成是第(i/32)个int里边的第(i%32)位。而已知整数右移一位表示除以2,于是i/32可表示为i>>5,i%32可表示为i与低八位的与,即 i & 0x1F。于是设置操作为a[i>>5] |= 1<<(i & 0x1F);clear(i)操作则是a[i>>5] &=~( 1<<(i & 0x1F));test(i)操作为a[i>>5] & (1<<(i & 0x1F)).用MASK表示0x1F,BITSPERWORD表示32,SHIFT表示5
代码如下:
1 #define BITSPERWORD 32 2 #define SHIFT 5 3 #define MASK 0x1F 4 #define N 10000000 5 int a[1+N/BITSPERWORD]; 6 void bset(int i) 7 { a[i>>SHIFT] |= 1<<(i & MASK);} 8 9 void clr(int i) 10 { a[i>>SHIFT] &= ~(1<<(i & MASK));} 11 12 int test(int i) 13 { return a[i>>SHIFT] & (1<<(i & MASK));}
bitmap sort的思想很简单,首先将N个数clear(即设为0),然后对读取的每一个数在数组上设为1,输出的时候将含1的数(test(i))从小到大输出即可。下面给出的是含对文件的操作。不需要对文件操作可自行修改。
code:
1 void bitSort() 2 { 3 int i; 4 for (i =0; i < N; ++i) 5 { 6 clr(i); 7 } 8 FILE *fp1 = NULL;//read file 9 FILE *fp2 = NULL;//write another file 10 11 if(NULL == (fp1 = fopen("data.txt","r"))) 12 throw ("open file failed!"); 13 /*while (scanf("%d",&i) != EOF) 14 { 15 bset(i); 16 }*/ 17 //文件读取改写: 18 while (fscanf(fp1,"%d",&i)!=EOF) 19 { 20 bset(i); 21 } 22 fclose(fp1); 23 fp1 = NULL; 24 fp2 = fopen("bitsortData.txt","w+"); 25 for (i = 0; i < N; ++i) 26 { 27 if (test(i)) 28 { 29 fprintf(fp2,"%d\n",i); 30 } 31 } 32 fclose(fp2); 33 fp2 = NULL; 34 printf("Bitsort complete!\n"); 35 }
为了测试该函数,还需要生成1000000个互不相等的整数。下面给出该函数
1 //生成k个整数,范围0~N,N为上面定义的,并写到文件data.txt 2 void produceRand(int k) 3 { 4 int *buf = (int*)malloc(N * sizeof(int)); 5 if(NULL == buf)return; 6 FILE *fp = NULL; 7 fp = fopen("data.txt","w+"); 8 if (fp == NULL) 9 { 10 throw ("can't open file!"); 11 } 12 int i; 13 for (i = 0; i < N; ++i) 14 { 15 buf[i] = i; 16 } 17 for (i = 0; i < k; ++i) 18 { 19 //生成i~n-1之间的随机数 20 int randi = i + (int)(rand() / (RAND_MAX + 1.0) * (N-1-i)); 21 //swap(i, randi) 22 int tmp = buf[i]; 23 buf[i] = buf[randi]; 24 buf[randi] = tmp; 25 fprintf(fp,"%d\n",buf[i]); 26 } 27 printf("Write file complete\n"); 28 fclose(fp); 29 fp = NULL; 30 free(buf); 31 buf = NULL; 32 }
使用qsort对文件data.txt中的数排序:
1 //比较函数 2 int intcomp(const void *x, const void *y) 3 { return *(int*)x - *(int*)y;} 4 5 void myQsort() 6 { 7 //申请空间,a[1000000] 8 int *a = (int*)malloc(1000000 * sizeof(int)); 9 if (a == NULL) return; 10 int i,n; 11 FILE *fp1 = NULL;//read file 12 FILE *fp2 = NULL;//write another file 13 14 if(NULL == (fp1 = fopen("data.txt","r"))) 15 throw ("open file failed!"); 16 n = 0; 17 //读入数据 18 while (fscanf(fp1,"%d",&a[n])!=EOF) 19 { 20 n++; 21 } 22 fclose(fp1); 23 fp1 = NULL; 24 //使用qsort排序 25 qsort(a, n, sizeof(int), intcomp); 26 //将结果写入文件 27 fp2 = fopen("qsortData.txt","w+"); 28 for (i = 0; i < n; ++i) 29 { 30 fprintf(fp2,"%d\n",a[i]); 31 } 32 fclose(fp2); 33 fp2 = NULL; 34 printf("Qsort complete!\n"); 35 free(a); 36 a = NULL; 37 }
使用C++ STL中的set对文件data.txt中的数排序:
1 void setSort() 2 { 3 set<int> S; 4 int i; 5 set<int>:: iterator j; 6 FILE *fp1 = NULL;//read file 7 FILE *fp2 = NULL;//write another file 8 9 if(NULL == (fp1 = fopen("data.txt","r"))) 10 throw ("open file failed!"); 11 //插入数据 12 while (fscanf(fp1,"%d",&i)!=EOF) 13 { 14 S.insert(i); 15 } 16 fclose(fp1); 17 fp1 = NULL; 18 //将结果写入文件 19 fp2 = fopen("setsortData.txt","w+"); 20 for (j = S.begin(); j != S.end(); ++j) 21 { 22 fprintf(fp2,"%d\n", *j); 23 } 24 fclose(fp2); 25 fp2 = NULL; 26 printf("Setsort complete!\n"); 27 }
主函数为:
1 #include <stdio.h> 2 #include <stdlib.h> 3 #include <set> 4 #include <time.h> 5 using namespace std; 6 int main() 7 { 8 // produceRand(1000000); 9 clock_t start,finish;//计时 10 double duration; 11 start = clock(); 12 bitSort(); 13 finish = clock(); 14 duration = (double)(finish - start) / CLOCKS_PER_SEC; 15 printf("bitsort time: %lfs\n", duration); 16 17 start = clock(); 18 myQsort(); 19 finish = clock(); 20 duration = (double)(finish - start) / CLOCKS_PER_SEC; 21 printf("qsort time: %lfs\n", duration); 22 23 start = clock(); 24 setSort(); 25 finish = clock(); 26 duration = (double)(finish - start) / CLOCKS_PER_SEC; 27 printf("setsort time: %lfs\n", duration); 28 return 0; 29 }
运行该程序得到三种排序的比较效果:
可以发现bitmap sort最快,使用set 排序最慢。而且bitmap sort使用的空间也少,qsort需要1000000个int,而bitmap sort则只需要1+ 1000000/32个int。可见bitmap sort的强大和高效!