利用堆排序找出数组中前n大的元素

#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <time.h>
#include <malloc.h>
#include <memory.h>
#define MAX_SIZE (1000 * 10000 + 1)

#define PARENT(i) (i/2)
#define RIGHT(i) (i*2 + 1)
#define LEFT(i) (i*2)
#define EXCHANGE(a,b,t) do{t=a;a=b;b=t;}while(0)

// 生成不重复的随机数序列写入文件
void gen_test_data(uint32_t cnt)
{
    if( cnt >= MAX_SIZE){printf("cnt too largr\n");return;}
    //uint32_t i = 0;
       //char *buf = (char*)malloc(MAX_SIZE);
    //for(;i < cnt;++i){buf[i] = 1;}
    uint32_t n = 0;
    char file_name[256];
    snprintf(file_name,256,"test_data_%d.txt",cnt);
    FILE *fp = fopen(file_name,"w");
    if(NULL == fp){printf("open %s error!\n",file_name);return;}
    while(n < cnt)
    {
        int32_t nRand = rand() % cnt;
        //while(buf[nRand] == 0)nRand = (nRand + 1)%cnt;
        //buf[nRand] = 0;
        fprintf(fp,"%d ",nRand);
        ++n;
    }
    fclose(fp);
    printf("gen %s finished\n",file_name);
}

// 读取文件
void read_data(int32_t arr[],const uint32_t size,uint32_t *cnt,const uint32_t data_cnt)
{
    FILE *fp = NULL;
    *cnt = 0;
    char file_name[256];
    if(data_cnt > size){printf("data_cnt too largr\n");return;}
    snprintf(file_name,256,"test_data_%d.txt",data_cnt);
    fp = fopen(file_name,"r");
    if(NULL == fp){printf("open %s error!\n",file_name);return;}
    while(!feof(fp) && *cnt < size)
    {
        fscanf(fp,"%d ",&arr[*cnt]);
        (*cnt)++;
    }
    fclose(fp);
}

// 快速排序
void quick_sort(int32_t arr[],int32_t low,int32_t high)
{
    if(low >= high)return;
    int32_t i = low,j = high,tmp = arr[i];
    while(i<j)
    {
        while(i<j && arr[j] <= tmp)j--;
        if(i<j){arr[i] = arr[j];i++;}
        while(i<j && arr[i] > tmp)i++;
        if(i<j){arr[j] = arr[i];j--;}
    }
    arr[i] = tmp;
    quick_sort(arr,low,i-1);
    quick_sort(arr,i+1,high);
}

void get_topn_quick(int32_t arr[],int32_t low,int32_t high,const int32_t topn)
{
    if(low >= high || topn > high)return;
    int32_t i = low,j = high,tmp = arr[i];
    while(i<j)
    {
        while(i<j && arr[j] < tmp)j--;
        if(i<j)arr[i++] = arr[j];
        while(i<j && arr[i] >= tmp)i++;
        if(i<j)arr[j--] = arr[i];
    }
    arr[i] = tmp;
    int32_t n = i - low + 1;
    if (n == topn)return;
    else if (n > topn)
        get_topn_quick(arr, low, i-1, topn);
    else if (n < topn)
        get_topn_quick(arr, i+1, high, topn - n);
}

void max_heapify(int32_t arr[],const uint32_t size,uint32_t i)
{
    uint32_t left = LEFT(i),right = RIGHT(i),largest = 0,tmp = 0;
    if(left<size && arr[left] > arr[i])largest = left;
    else largest = i;
    if(right<size && arr[right] > arr[largest])largest = right;
    if(largest != i)
    {
        EXCHANGE(arr[i],arr[largest],tmp);
        max_heapify(arr,size,largest);
    }
}

void min_heapify(int32_t arr[],const uint32_t size,uint32_t i)
{
    uint32_t left = LEFT(i),right = RIGHT(i),largest = 0,tmp = 0;
    if(left<size && arr[left] < arr[i])largest = left;
    else largest = i;
    if(right<size && arr[right] < arr[largest])largest = right;
    if(largest != i)
    {
        EXCHANGE(arr[i],arr[largest],tmp);
        min_heapify(arr,size,largest);
    }
}

void get_topn_heap(int32_t arr[], const int32_t arr_size, const int32_t topn)
{
    int32_t i = topn / 2, tmp = 0;
    // 在[0--topn)范围内构建最小堆,即优先级队列
    while (i >= 0)min_heapify(arr, topn, i--);
    for (i = topn; i < arr_size; ++i)
    {
        if (arr[i] <= arr[0])continue;    //小于最小值,没有判断的必要
        EXCHANGE(arr[0], arr[i], tmp);
        min_heapify(arr, topn, 0);
    }
}

void dump1(int32_t arr[],const uint32_t cnt)
{
    uint32_t i = 0;
    for(;i < cnt;++i)
    {
        printf("%4d ",arr[i]);
    }
    printf("\n");
}



void dump2(int32_t arr[],const uint32_t start,const uint32_t end)
{
    uint32_t i = start;
    for(;i < end;++i)
    {
        printf("%5d ",arr[i]);
    }
    printf("\n");
}

int32_t main(int32_t argc, char *argv[])
{
    uint32_t t = 0;
    int32_t *arr = (int32_t*)malloc(sizeof(int32_t)*MAX_SIZE);
    int32_t *heap = (int32_t*)malloc(sizeof(int32_t)*MAX_SIZE);
    int32_t *quick = (int32_t*)malloc(sizeof(int32_t)*MAX_SIZE);
    uint32_t cnt = 0,data_cnt = 0;
    for(cnt = 10;cnt <= MAX_SIZE;cnt*=10)
    {
        gen_test_data(cnt);
    }
    for(data_cnt = 10;data_cnt <= MAX_SIZE;data_cnt*=10)
    {
        read_data(arr, MAX_SIZE, &cnt, data_cnt);
        memcpy(heap,arr,sizeof(int32_t)*MAX_SIZE);
        printf("cnt=%d\n",cnt);
        t = clock();
        get_topn_heap(heap,cnt,cnt/10);
        printf("heap use time:%ld\n",clock()-t);
        quick_sort(heap,0,cnt/2-1);
        //dump2(heap,0,cnt/10);

        memcpy(quick,arr,sizeof(int32_t)*MAX_SIZE);
        t = clock();
        get_topn_quick(quick,0,cnt-1,cnt/10);
        printf("quick use time:%ld\n",clock()-t);
        quick_sort(quick,0,cnt/2-1);
        //dump2(quick,0,cnt/10);
        if(memcmp(heap,quick,sizeof(int32_t)*(cnt/10-1)) == 0)printf("OK\n");
    }
    return 0;
}

函数 get_topn_heap 实现了用最小堆查找数组arr中最大topn个数字,并将它们放置在数组中[0-tonp)的位置

与前面的用快速排序的方法相比,用最小堆的方法效率稍低一些,快速排序方法:http://www.cnblogs.com/tangxin-blog/p/5617736.html

对比数据:

posted @ 2016-06-26 23:44  你好阿汤哥  Views(857)  Comments(0Edit  收藏  举报