topk问题的解决思路
topk问题:在n个数中取出前k大的数
实现思路
# 思路1:快排,排序后切片出最大的k个;时间复杂度: O(nlogn) + O(k) O(k)可忽略不计
# 思路2:冒泡、选择排序,循环k趟,只获取前k个大数; 时间复杂度: O(kn)
# 思路3:插入排序,维护k长度的有序列表,其他元素挨个和这个列表最小的数比较,小的丢弃,大的插入排序进去;时间复杂度:O(kn)
# 思路4:堆排序,建k长度的小根堆,其他元素挨个和堆顶元素比较,大的替换堆顶元素再向下调整为小根堆。最后挨个出数;时间复杂度:O(nlogk)
代码实现
快排实现
def quick_sort(li, left, right):
if left < right:
mid = partition(li, left, right)
quick_sort(li, left, mid-1)
quick_sort(li, mid+1, right)
def partition(li, left, right):
tmp = li[left]
while left < right:
while left < right and li[right] >= tmp:
right -= 1
li[left] = li[right]
while left < right and li[left] <= tmp:
left += 1
li[right] = li[left]
li[left] = tmp
return left
# 测试
li = [i for i in range(10)]
random.shuffle(li)
print('排序前',li)
quick_sort(li, 0, len(li)-1)
print('排序后', li)
topk = li[-5:]
print('topk', topk)
# output:
排序前 [7, 2, 1, 6, 5, 4, 0, 3, 9, 8]
排序后 [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
topk [5, 6, 7, 8, 9]
冒泡排序实现
def topk(li, k):
for i in range(k):
for j in range(len(li)-1-i):
if li[j] > li[j+1]:
li[j], li[j+1] = li[j+1], li[j]
# 测试
li = [i for i in range(10)]
random.shuffle(li)
print('排序前',li)
topk(li, 5)
print('排序后', li)
print('topk', li[-5:])
# output:
排序前 [8, 9, 7, 2, 4, 5, 3, 0, 1, 6]
排序后 [2, 3, 0, 1, 4, 5, 6, 7, 8, 9]
topk [5, 6, 7, 8, 9]
选择排序实现
def topk(li, k):
for i in range(k):
max_index = i
for j in range(i+1, len(li)):
if li[j] > li[max_index]:
max_index = j
li[i], li[max_index] = li[max_index], li[i]
# 测试
li = [i for i in range(10)]
random.shuffle(li)
print('排序前',li)
topk(li, 5)
print('排序后', li)
print('topk', li[:5])
# output:
排序前 [6, 9, 0, 3, 1, 7, 5, 4, 8, 2]
排序后 [9, 8, 7, 6, 5, 0, 1, 4, 3, 2]
topk [9, 8, 7, 6, 5]
插入排序实现
# 待补充......
堆排序实现
def sift(li, low, high):
i = low
j = 2 * i + 1
tmp = li[low]
while j <= high:
if j+1 <= high and li[j+1] < li[j]:
j = j + 1
if li[j] < tmp:
li[i] = li[j]
i = j
j = 2 * i + 1
else:
li[i] = tmp
break
else:
li[i] = tmp
def topk(li, k):
# heap建小根堆
heap = li[0:k]
for i in range(k//2-1, -1, -1):
sift(heap, i, k-1)
# 遍历将大于heap[0]的元素换掉并向下调整
for i in range(k, len(li)):
if li[i] > heap[0]:
heap[0] = li[i]
sift(heap, 0, k-1)
# 挨个出数
for i in range(k-1, -1, -1):
heap[0], heap[i] = heap[i], heap[0]
sift(heap, 0, i-1)
return heap
# 测试
li = [i for i in range(10)]
random.shuffle(li)
print('排序前',li)
heap = topk(li, 5)
print('topk', heap)
# output:
排序前 [9, 1, 0, 6, 2, 4, 7, 3, 8, 5]
topk [9, 8, 7, 6, 5]