在线求中位数
在线求第k个数做得多了,在线求中位数也是用堆,一个最大堆,一个最小堆。
思想大概是这样子的:
- 一个最大堆,一个最小堆,最大堆对应于前n/(n+1)个数,最小堆对应于后n/n+1个数;假设最大堆堆项元素为n1, 最小堆堆顶为n2, 则n1 <= n2;
- 确保两个堆的大小最多只差1. 设最大堆大小为s1, 最小堆大小为s2,则abs(s1-s2) <= 1;
- 对于新来的数m,分情况调整:
- 如果s1== s2, 那么:如果m<= n2, m插入到最大堆,s1= s1+1; 否则插入到最小堆,s2=s2+1;
- 如果s1 > s2, 那么:如果m >= n1, m插入到最小堆,s2=s2+1;否则,n1pop出最大堆,然后push进最小堆,然后m进最大堆,s2=s2+1;
- 如果s1 < s2, 那么:如果m <=n2, m插入到最大堆,s1=s1+1;否则,n2pop出最小堆,然后push进最大堆,然后m进最小堆,s1=s1+1;
代码如下:
1 int main(int argc, char** argv) { 2 priority_queue<int, vector<int>, less<int> > maxHeap; 3 priority_queue<int, vector<int>, greater<int> > minHeap; 4 5 vector<int> nums; 6 srand(time(NULL)); 7 for (int i = 0; i < 10000; ++i) { 8 nums.push_back(rand() % 1000); 9 cout << nums[i] << " "; 10 } 11 cout << endl; 12 13 double median; 14 for (int i = 0; i < nums.size(); ++i) { 15 if (maxHeap.size() == minHeap.size()) { 16 if (minHeap.empty() || nums[i] <= minHeap.top()) { 17 maxHeap.push(nums[i]); 18 median = maxHeap.top(); 19 } else { 20 minHeap.push(nums[i]); 21 median = minHeap.top(); 22 } 23 } else if (maxHeap.size() > minHeap.size()) { 24 if (maxHeap.empty() || nums[i] >= maxHeap.top()) { 25 minHeap.push(nums[i]); 26 } else { 27 minHeap.push(maxHeap.top()); 28 maxHeap.pop(); 29 maxHeap.push(nums[i]); 30 } 31 median = (maxHeap.top() + minHeap.top()) / 2.0; 32 } else { 33 if (minHeap.empty() || nums[i] <= minHeap.top()) { 34 maxHeap.push(nums[i]); 35 } else { 36 maxHeap.push(minHeap.top()); 37 minHeap.pop(); 38 minHeap.push(nums[i]); 39 } 40 median = (maxHeap.top() + minHeap.top()) / 2.0; 41 } 42 cout << i << "-th: " << median << endl; 43 } 44 45 return 0; 46 }