多线程处理N维度topk问题demo--[c++]

问题

-对多维度特征进行topk排序,使用c++ 优先队列模拟最大堆.


/*
----------------------------------
Version    : ??
File Name :     dm2.py
Description :   
Author  :       xijun1
Email   :      
Date    :       2018/12/7
-----------------------------------
Change Activity  :   2018/12/7
-----------------------------------
__author__ = 'xijun1'
*/
#include <iostream>
#include <queue>
#include <unordered_map>
#include <future>
using namespace std;

class MinHeapUtil {
private:

    struct HeapGreatcompare
    {
        // min heap
        bool operator()(const pair<unsigned int, int>& node_a, const pair<unsigned int, int>& node_b) {
            return node_a.second > node_b.second;
        }
    };


    std::vector< std::priority_queue< std::pair<unsigned int, int>, std::vector< std::pair<unsigned  int, int> > ,HeapGreatcompare> > pq_mat;
    unsigned  int max_topk;
public:
     bool setShape(const int dim  , const unsigned int max_topk );
     int getdim(){
         return  this->pq_mat.size();
     }
     int getMaxTopK(){
         return this->max_topk;
     }
     size_t getSize( int k ){
         return this->pq_mat[ k ].size();
     }
     int push_back(std::vector<std::pair<unsigned int, int>  >& data , int k );
     int push_back( std::pair<unsigned int, int>   word_prob , int k );
    int push_back( std::pair<unsigned int, int>   &word_prob , int k );
     std::pair<unsigned int , int >get_itop(int k){
         auto word_prob = this->pq_mat[ k ].top();
         this->pq_mat[ k ].pop();
         return word_prob;
     }
};

bool MinHeapUtil::setShape(const int dim, const unsigned  int max_topk) {
    this->pq_mat.resize(dim );
    this->max_topk = max_topk;
    return true;
}
 int  MinHeapUtil::push_back(std::vector< std::pair<unsigned int, int> > &data ,int k) {

    for (auto const& word_prob : data) {
        this->pq_mat[ k ].push(std::make_pair(word_prob.second, word_prob.first));
        if ( this->pq_mat[ k ].size() > this->max_topk)  //维护这个K大小
            this->pq_mat[ k ].pop();
    }
     return  0;
}
int MinHeapUtil::push_back(std::pair<unsigned int, int> word_prob, int k) {

    if( this->pq_mat[k].size() < this->max_topk || this->pq_mat[ k ].top().second < word_prob.second) {
        this->pq_mat[k].push(word_prob);
        if (this->pq_mat[k].size() > this->max_topk)  //维护这个K大小
            this->pq_mat[k].pop();
    }
    return  0;

}


int MinHeapUtil::push_back(std::pair<unsigned int, int> &word_prob, int k) {
    this->pq_mat[k].push(word_prob);
    if (this->pq_mat[k].size() > this->max_topk)  //维护这个K大小
        this->pq_mat[k].pop();
    return 0;
}
std::unordered_map<unsigned int , std::vector< int > > test_data;
int  main() {
    MinHeapUtil top_words ;

    top_words.setShape(5,4);
    int total_size = 5;

//    MinHeapUtil.push_back(std::pair<unsigned int, int>(8,2),0);
//    MinHeapUtil.push_back(std::pair<unsigned int, int>(7,3),0);
//    MinHeapUtil.push_back(std::pair<unsigned int, int>(6,4),0);
//    MinHeapUtil.push_back(std::pair<unsigned int, int>(5,5),0);
//    MinHeapUtil.push_back(std::pair<unsigned int, int>(4,6),0);
//    MinHeapUtil.push_back(std::pair<unsigned int, int>(3,7),0);
//    MinHeapUtil.push_back(std::pair<unsigned int, int>(2,4),0);
//    MinHeapUtil.push_back(std::pair<unsigned int, int>(1,8),0);
    for (int j = 0; j < 1000000; ++j) {
        std::vector< int > tmp(5);
        for (int i = 0; i < 5; ++i) {
            tmp[i] = j*5 + i;
        }
        test_data[j] = std::move(tmp);
    }
    struct save_func
    {
        void operator () (int tn  ,int thread_nums,int total_size , MinHeapUtil *top_words )
        {
            std::cout<<tn <<" : "<< thread_nums<<" : "<<total_size<<std::endl;
            for (int i = tn; i < total_size ; i+=thread_nums) {
                for ( auto word_key : test_data) {
                    //std::cout<<word_key.first;
                    top_words->push_back( std::pair<unsigned int, int>(word_key.first, word_key.second[ i ] ) , i );
                }
            }
            std::cout << "func " << tn << std::endl;
        }
    } func;
    unsigned thread_nums = 8;
    std::vector<std::future< void >> muliti_topword_threads;

    for (unsigned tn = 0; tn < thread_nums; ++tn)
        muliti_topword_threads.emplace_back(
                std::async(std::launch::async, func, (int)tn,(int)thread_nums,total_size,&top_words));

    for (unsigned tn = 0; tn < thread_nums; ++tn)
        muliti_topword_threads[tn].get();

    for (int k = 0; k < total_size; ++k) {
        size_t range = top_words.getSize( k );
        for (int i = 0; i < range; ++i) {
            auto temp = top_words.get_itop( k );
            std::cout <<"k:"<<k<<"  ( " << temp.first << "     " << temp.second << ")\n";
        }
    }
    return 0;
}



--结果

01 :  : 88 :  : 55

2 : 8 : 5
3 : 8 : 5
4 : 8 : 5
5 : 8 : 5
func 5
6 : 8 : 5
func 6
7 : 8 : 5
func 7
func 1
func 4
func 0
func 2
func 3
k:0  ( 999996     4999980)
k:0  ( 999997     4999985)
k:0  ( 999998     4999990)
k:0  ( 999999     4999995)
k:1  ( 999996     4999981)
k:1  ( 999997     4999986)
k:1  ( 999998     4999991)
k:1  ( 999999     4999996)
k:2  ( 999996     4999982)
k:2  ( 999997     4999987)
k:2  ( 999998     4999992)
k:2  ( 999999     4999997)
k:3  ( 999996     4999983)
k:3  ( 999997     4999988)
k:3  ( 999998     4999993)
k:3  ( 999999     4999998)
k:4  ( 999996     4999984)
k:4  ( 999997     4999989)
k:4  ( 999998     4999994)
k:4  ( 999999     4999999)
posted @ 2018-12-12 11:40  龚细军  阅读(508)  评论(0编辑  收藏  举报