桶排序(BucketSort)
桶排序思想
桶排序的思想就是把数组分到有限的桶中,然后再分别排序,最后将各个桶中的数据有序的合起来,具体如下:
(1)设计好桶的大小和桶的个数
(2)遍历数组,把每个元素放到对应的桶中
(3)对每个桶分别排序(可以递归使用桶排序,也可以使用其它排序)
(4)遍历桶中的数据,将数据有效的合起来
复杂度
桶排序利用函数映射关系,较少了几乎所有的比较操作。把大量数据分割成了基本有效的数据块,然后只需要对桶中的少量数据做先进的比较排序。
对 n 个关键字进行桶排序的时间复杂度分为两部分:
1、循环计算每个关键字的桶映射函数,这个时间复杂度是O(n).
2、利用比较先进的比较排序算法,对每个桶中的数据进行排序,其时间复杂度是∑ O(Ni*logNi) 。其中Ni 为第i个桶的数据量。
很显然,第(2)部分是桶排序性能好坏的决定因素。基于比较排序的最好平均时间复杂度只能达到O(N*logN),所以应该尽可能较少每个桶内数据的数量。
映射函数f(k)能够将N个数据平均的分配到M个桶中,这样每个桶就有[N/M]个数据量。
所以尽量的增大桶的数量,极限情况下每个桶只能得到一个数据,这样就完全避开了桶内数据的“比较”排序操作。这就是一种空间换时间的策略。
总结
对于N个待排数据,M个桶,平均每个桶[N/M]个数据的桶排序平均时间复杂度为:
O(N)+O(M*(N/M)*log(N/M))=O(N+N*(logN-logM))=O(N+N*logN-N*logM)
当N=M时,即极限情况下每个桶只有一个数据时。桶排序的最好效率能够达到O(N)。
桶排序的平均时间复杂度为线性的O(N+C),其中C=N*(logN-logM).
空间复杂度O(N+M),如果输入数据非常庞大,而桶的数量也非常多,则空间代价无疑是昂贵的。
此外,桶排序是稳定的
代码实现
简易版
1 #include<stdio.h> 2 #include<algorithm> 3 #include<vector> 4 using namespace std; 5 6 const int bucket_num = 10; //桶的数量 7 const int interval = 10; //桶的容量 8 const int maxn = 100 + 10; //数字的最大值 9 vector<int>buckets[bucket_num]; //每个桶 10 11 void BucketSort(int *arr, int n) 12 { 13 for (int i = 0; i < n; i++) 14 buckets[arr[i] / interval].push_back(arr[i]); 15 for (int i = 0; i < bucket_num; i++) if (buckets[i].size()) 16 sort(buckets[i].begin(), buckets[i].end()); 17 for (int i = 0; i < bucket_num; i++) if (buckets[i].size()) 18 for (int j = 0; j < buckets[i].size(); j++) 19 printf("%d ", buckets[i][j]); 20 printf("\n"); 21 } 22 23 int main() 24 { 25 int n, arr[maxn]; 26 printf("数组大小:"); 27 scanf("%d", &n); 28 printf("数组:"); 29 for (int i = 0; i < n; i++) scanf("%d", &arr[i]); 30 BucketSort(arr, n); 31 32 return 0; 33 }
详细版
1 #include<stdio.h> 2 #include<stdlib.h> 3 4 const int NARRAY = 50 + 5; /* array size */ 5 const int NBUCKET = 5; /* bucket size */ 6 const int INTERVAL = 10; /* bucket range */ 7 8 struct Node 9 { 10 int data; 11 struct Node *next; 12 }; 13 14 void BucketSort(int arr[],int n); 15 struct Node *InsertionSort(struct Node *list); 16 void print(int arr[],int n); 17 void printBuckets(struct Node *list); 18 int getBucketIndex(int value); 19 20 void BucketSort(int arr[],int n) 21 { 22 struct Node **buckets; 23 24 /* allocate memory for array of pointers to the buckets */ 25 buckets = (struct Node **)malloc(sizeof(struct Node*) * NBUCKET); 26 27 /* initialize pointers to the buckets */ 28 for (int i = 0; i < NBUCKET; ++i) { 29 buckets[i] = NULL; 30 } 31 32 /* put items into the buckets,like insert from the head */ 33 for (int i = 0; i < n; ++i) { 34 struct Node *current; 35 int pos = getBucketIndex(arr[i]); 36 current = (struct Node *) malloc(sizeof(struct Node)); 37 current->data = arr[i]; 38 current->next = buckets[pos]; 39 buckets[pos] = current; 40 } 41 42 /* check what's in each bucket */ 43 for (int i = 0; i < NBUCKET; i++) { 44 printf("Bucket[%d] : ", i); 45 printBuckets(buckets[i]); 46 } 47 48 /* sorting bucket using Insertion Sort */ 49 for (int i = 0; i < NBUCKET; ++i) { 50 buckets[i] = InsertionSort(buckets[i]); 51 } 52 53 /* check what's in each bucket */ 54 55 printf("-------------\n"); 56 printf("Bucktets after sorted\n"); 57 58 for (int i = 0; i < NBUCKET; i++) { 59 printf("Bucket[%d] : ", i); 60 printBuckets(buckets[i]); 61 } 62 63 /* put items back to original array */ 64 for (int j = 0, i = 0; i < NBUCKET; ++i) { 65 struct Node *node; 66 node = buckets[i]; 67 while (node) { 68 arr[j++] = node->data; 69 node = node->next; 70 } 71 } 72 73 /* free memory */ 74 for (int i = 0; i < NBUCKET; ++i) { 75 struct Node *node; 76 node = buckets[i]; 77 while (node) { 78 struct Node *tmp; 79 tmp = node; 80 node = node->next; 81 free(tmp); 82 } 83 } 84 free(buckets); 85 return; 86 } 87 88 /* Insertion Sort */ 89 struct Node *InsertionSort(struct Node *list) //list as head pionter 90 { 91 struct Node *k, *nodeList; 92 /* need at least two items to sort */ 93 if (list == 0 || list->next == 0) { 94 return list; 95 } 96 97 nodeList = list; 98 k = list->next; 99 nodeList->next = 0; /* 1st node is new list */ 100 while (k != 0) { 101 struct Node *ptr; 102 /* check if insert before first */ 103 if (nodeList->data > k->data) { 104 struct Node *tmp; //swap(k,nodeList) 105 tmp = k; 106 k = k->next; 107 tmp->next = nodeList; 108 nodeList = tmp; 109 continue; 110 } 111 112 for (ptr = nodeList; ptr->next != 0; ptr = ptr->next) { 113 if (ptr->next->data > k->data) break; 114 } 115 116 if (ptr->next != NULL) { //if isn`t tail node,swap 117 struct Node *tmp; 118 tmp = k; 119 k = k->next; 120 tmp->next = ptr->next; 121 ptr->next = tmp; 122 continue; 123 } 124 else { //if isn tail node,insert at the tail 125 ptr->next = k; 126 k = k->next; 127 ptr->next->next = 0; 128 continue; 129 } 130 } 131 return nodeList; 132 } 133 134 int getBucketIndex(int value) 135 { 136 return value / INTERVAL; 137 } 138 139 void print(int arr[],int n) 140 { 141 for (int i = 0; i < n; ++i) { 142 printf("%3d",arr[i]); 143 } 144 printf("\n"); 145 } 146 147 void printBuckets(struct Node *list) 148 { 149 struct Node *cur = list; 150 while (cur) { 151 printf("%3d", cur->data); 152 cur = cur->next; 153 } 154 printf("\n"); 155 } 156 157 int main() 158 { 159 int n, array[NARRAY]; 160 printf("数组大小:"); 161 scanf("%d", &n); //less than NBUCKET*INTERVAL 162 printf("数组:"); 163 for (int i = 0; i < n; i++) scanf("%d", &array[i]); 164 165 printf("Initial array\n"); 166 print(array,n); 167 printf("-------------\n"); 168 169 BucketSort(array,n); 170 printf("-------------\n"); 171 printf("Sorted array\n"); 172 print(array,n); 173 174 return 0; 175 }
参考链接: