pagerank 算法入门
http://blog.csdn.net/midgard/article/details/7061721
这篇文章很不错。
1 #include <vector> 2 #include <set> 3 #include <string> 4 #include <iostream> 5 6 using namespace std; 7 8 // use graph store webpage, weight representlink times 9 class Node { 10 public: 11 explicit Node(string name, double pr = 1) : 12 name_(name), page_rank_(pr) { 13 } 14 15 ~Node() { 16 linkin_nodes_.clear(); 17 } 18 void InsertLinkdInNode(Node* node) { 19 //如果没有链接 20 if (linkin_nodes_.find(node) == linkin_nodes_.end()) { 21 linkin_nodes_.insert(node); 22 } 23 node->InsertLinkOutNode(this); 24 } 25 26 void InsertLinkOutNode(Node* node) { 27 //如果没有链接 28 if (linkout_nodes_.find(node) == linkout_nodes_.end()) { 29 linkout_nodes_.insert(node); 30 } 31 } 32 33 void InsertLinkdInNode1(Node* node) { 34 //如果没有链接 35 if (linkin_nodes_.find(node) == linkin_nodes_.end()) { 36 linkin_nodes_.insert(node); 37 } 38 39 } 40 41 void InsertLinkOutNode1(Node* node) { 42 //如果没有链接 43 if (linkout_nodes_.find(node) == linkout_nodes_.end()) { 44 linkout_nodes_.insert(node); 45 } 46 node->InsertLinkdInNode1(this); 47 } 48 49 double GetPageRank() { 50 return page_rank_; 51 } 52 53 void SetPageRank(double pr) { 54 page_rank_ = pr; 55 } 56 57 double CalcRank() { 58 double pr = 0; 59 set<Node*>::const_iterator citr = linkin_nodes_.begin(); 60 for (; citr != linkin_nodes_.end(); ++citr) { 61 Node * node = *citr; 62 pr += node->GetPageRank() / node->GetOutBoundNum(); 63 } 64 return pr; 65 } 66 67 size_t GetOutBoundNum() { 68 return linkout_nodes_.size(); 69 } 70 71 size_t GetInBoundNum() { 72 return linkin_nodes_.size(); 73 } 74 75 void PrintNode() { 76 cout << "Node:" << name_ << " 's pagerank is: " << page_rank_ << endl; 77 } 78 private: 79 string name_; 80 set<Node*> linkin_nodes_; 81 set<Node*> linkout_nodes_; 82 double page_rank_; 83 }; 84 85 class PageRank { 86 public: 87 PageRank(double q = 0.85); 88 ~PageRank(void); 89 void Calc(vector<Node*> & nodes, int n); 90 double Calc(Node* node); 91 void PrintPageRank(vector<Node*> & nodes); 92 private: 93 double q_; //阻尼系数 94 };
1 #include "PageRank.h" 2 #include <iostream> 3 4 PageRank::PageRank(double q) : 5 q_(q) { 6 // q_ must < 1 7 } 8 9 PageRank::~PageRank(void) { 10 } 11 12 // 迭代计算n次 13 void PageRank::Calc(vector<Node*> & nodes, int n) { 14 for (int i = 0; i < n; ++i) { 15 vector<Node*>::const_iterator citr = nodes.begin(); 16 for (; citr != nodes.end(); ++citr) { 17 Node * node = *citr; 18 Calc(node); 19 } 20 } 21 } 22 23 void PageRank::PrintPageRank(vector<Node*> & nodes) { 24 double total_pr = 0; 25 vector<Node*>::const_iterator citr = nodes.begin(); 26 for (; citr != nodes.end(); ++citr) { 27 Node * node = *citr; 28 node->PrintNode(); 29 total_pr += node->GetPageRank(); 30 } 31 cout << "Total PR:" << total_pr << endl; 32 } 33 34 double PageRank::Calc(Node * node) { 35 double pr = node->CalcRank(); 36 if (pr < 0.00000000000000000000001 && pr > -0.00000000000000000000001) //pr == 0 37 { 38 pr = 1 - q_; 39 } else { 40 pr = pr * q_ + 1 - q_; 41 } 42 node->SetPageRank(pr); 43 return pr; 44 }
1 #include <iostream> 2 #include <vector> 3 #include <string> 4 #include <map> 5 #include "PageRank.h" 6 7 using namespace std; 8 9 void InitGraph(vector<Node*> & nodes) { // 邻接表存储方式 10 // example 1 11 Node * a = new Node("A"); 12 Node * b = new Node("B"); 13 Node * c = new Node("C"); 14 Node * d = new Node("D"); 15 nodes.push_back(a); 16 nodes.push_back(b); 17 nodes.push_back(c); 18 nodes.push_back(d); 19 // link in node 20 // a <- b, c, d 21 a->InsertLinkdInNode(b); 22 a->InsertLinkdInNode(c); 23 a->InsertLinkdInNode(d); 24 // b <- d 25 d->InsertLinkOutNode1(b); 26 //b->InsertLinkdInNode(d); 27 // c <- b, d 28 c->InsertLinkdInNode(b); 29 c->InsertLinkdInNode(d); 30 } 31 32 void TestPageRank() { 33 // build graph 34 vector<Node*> nodes; 35 InitGraph(nodes); 36 PageRank pr; 37 // 迭代计算5次 pagerank 38 pr.Calc(nodes, 80); 39 pr.PrintPageRank(nodes); 40 } 41 42 int main(int argc, const char ** argv) { 43 TestPageRank(); 44 return 0; 45 }