1、问题
什么是 Top K 问题?简单来说就是在一堆数据里面找到前 K 大(当然也可以是前 K 小)的数。这是一个非常经典的算法问题,不论是面试中还是实际开发中,都非常典型。
2、解决方案
2.1、直接排序
#include <iostream>
#include <vector>
std::vector<int> GetLeastNumbers(std::vector<int> input, int k) {
std::vector<int> ans;
if (k > input.size() || input.size() == 0 || k == 0) {
return ans;
}
std::sort(input.begin(), input.end());
for (int i = 0; i < k; i++) {
ans.push_back(input[i]);
}
return ans;
}
int main() {
std::vector<int> arr = {61, 17, 29, 22, 34, 60, 72, 21, 50, 1, 62};
std::vector<int> ans = GetLeastNumbers(arr, 3);
for (int i = 0; i < ans.size(); i++)
std::cout << ans[i] << ' ';
std::cout << std::endl;
return 0;
}
时间复杂度: O(n*log(n))
2.2、局部排序
#include <iostream>
#include <vector>
std::vector<int> GetLeastNumbers(std::vector<int> input, int k) {
if (k > input.size() || input.size() == 0 || k == 0) {
return input;
}
for (int i = 0; i < k; i++) {
for (int j = 0; j < input.size() - 1 - i; j++) {
if (input[j] > input[j + 1])
std::swap(input[j], input[j + 1]);
}
}
std::vector<int>::const_iterator start = input.end() - k;
std::vector<int>::const_iterator end = input.end();
std::vector<int> anc(start, end);
return anc;
}
int main() {
std::vector<int> arr = {61, 17, 29, 22, 34, 60, 72, 21, 50, 1, 62};
std::vector<int> ans = GetLeastNumbers(arr, 3);
for (int i = 0; i < ans.size(); i++)
std::cout << ans[i] << ' ';
std::cout << std::endl;
return 0;
}
时间复杂度: O(n*k)
分析:冒泡,将全局排序优化为了局部排序,非TopK的元素是不需要排序的,节省了计算资源。
2.3、堆排序
#include <iostream>
#include <vector>
using namespace std;
void BuildMaxHeap(vector<int> &input, vector<int> &heap, int k) {
for (int i = 0; i < k; i++) {
heap.push_back(input[i]);
}
sort(heap.begin(), heap.end());
reverse(heap.begin(), heap.end());
}
void ShiftMaxDown(vector<int> &heap) {
int cur_root = 0;
int cur_left = 2 * cur_root + 1;
int cur_right = 2 * cur_root + 2;
while (true) {
if (cur_left < heap.size() && heap[cur_root] < heap[cur_left]) {
int temp = heap[cur_root];
heap[cur_root] = heap[cur_left];
heap[cur_left] = temp;
cur_root = cur_left;
cur_left = 2 * cur_root + 1;
cur_right = 2 * cur_root + 2;
} else if (cur_right < heap.size() && heap[cur_root] < heap[cur_right]) {
int temp = heap[cur_root];
heap[cur_root] = heap[cur_right];
heap[cur_right] = temp;
cur_root = cur_right;
cur_left = 2 * cur_root + 1;
cur_right = 2 * cur_root + 2;
} else {
break;
}
}
}
vector<int> GetLeastNumbers(vector<int> input, int k) {
vector<int> ans;
if (k > input.size() || input.size() == 0 || k == 0) {
return ans;
}
BuildMaxHeap(input, ans, k);
for (int i = k; i < input.size(); i++) {
if (ans[0] > input[i]) {
ans[0] = input[i];
ShiftMaxDown(ans);
}
}
return ans;
}
int main() {
std::vector<int> arr = {61, 17, 29, 22, 34, 60, 72, 21, 50, 1, 62};
std::vector<int> ans = GetLeastNumbers(arr, 3);
for (int i = 0; i < ans.size(); i++)
std::cout << ans[i] << ' ';
std::cout << std::endl;
return 0;
}
时间复杂度: O(n*log(k))
分析:不一定要用堆排序来处理这个,其他排序算法也可以做到这个时间复杂度~