最大堆即大根堆 -- 求前n小
最小堆即小根堆 -- 求前n大
比如求前n小,对于当前元素,和最大堆的堆顶元素(即堆最大值)比较,如果小于堆顶元素,则替换堆顶元素,并调整堆。这样扫描一遍就可以得到最小的n个元素,效率非常高。
问题实例:
100w个数中找最大的前100个数
1 INT_MIN初始化最小堆
2 读入一个数,如果大于堆顶元素,则替换堆顶元素,并调整堆
//base index 1
#define LCHILD(i) (2*(i))
#define RCHILD(i) (2*(i)+1)
enum {
BIGHEAP=0,
SMALLHEAP=1,
};
//Adjust the heap
template <typename T>
void HeapAdjust(T *heap, size_t n, bool flg)
{
int i=1;
T root = heap[1];
while (i<n)
{
T ex;
if (flg == BIGHEAP) //最大堆
{
if (LCHILD(i)<=n)
{
ex = LCHILD(i);
if (RCHILD(i)<=n && heap[LCHILD(i)]<heap[RCHILD(i)])
ex = RCHILD(i);
}
else
break;
if (heap[ex] > root)
{
heap[i] = heap[ex];
i = ex;
}
else
break;
}
else //最小堆
{
if (LCHILD(i)<=n)
{
ex = LCHILD(i);
if (RCHILD(i)<=n && heap[LCHILD(i)]>heap[RCHILD(i)])
ex = RCHILD(i);
}
else
break;
if (heap[ex] < root)
{
heap[i] = heap[ex];
i = ex;
}
else
break;
}
}
heap[i] = root;
}
void FindNNumber(const char *file, size_t n, bool flg)
{
int *heap = new int[n+1];
int i;
for (i=1; i<=n; i++)
{
if (flg == BIGHEAP)
heap[i] = INT_MAX;
else
heap[i] = INT_MIN;
}
FILE *fp = fopen(file, "r");
int a;
while(fscanf(fp, "%d", &a)>0)
{
if (flg == BIGHEAP)
{
if (a < heap[1])
heap[1] = a;
}
else
{
if (a > heap[1])
heap[1] = a;
}
HeapAdjust(heap, n, flg);
}
fclose(fp);
for (i=1; i<=n; i++)
printf("%d\n", heap[i]);
}
void main()
{
//最小的n个数
FindNNumber("temp.txt", 10, BIGHEAP);
printf("\n\n");
//最大的n个数
FindNNumber("tt.txt", 10, SMALLHEAP);
}