C# HttpWebRequest 从google服务器获取google的PageRank PR值

首先,本文的实现参考了这篇:

http://www.codeproject.com/KB/aspnet/Google_Pagerank.aspx

简述一下原理:

获取PR值,通过向google服务器发送一个http请求来实现。

http://toolbarqueries.google.com.hk/search?client=navclient-auto&hl=en&ch=6771535612&ie=UTF-8&oe=UTF-8&features=Rank&q=info:http%3A%2F%2Fwww.codeproject.com%2F

在浏览器中输入上面的链接会google服务器会返回一个字符串

上面的链接返回的是:Rank_1:1:6

要实现对任意链接的PR值的查询,关键要正确构造请求的URL,ch=6771535612这一项很重要,它是请求网址的hash值,这个hash值得算法在上面的那篇博文中就有详细的说明。

这里将主要的代码转贴在下面:

        private const UInt32 GOOGLE_MAGIC = 0xE6359A60;

private static void _mix(ref UInt32 a, ref UInt32 b, ref UInt32 c)
{
a
-= b; a -= c; a ^= c >> 13;
b
-= c; b -= a; b ^= a << 8;
c
-= a; c -= b; c ^= b >> 13;
a
-= b; a -= c; a ^= c >> 12;
b
-= c; b -= a; b ^= a << 16;
c
-= a; c -= b; c ^= b >> 5;
a
-= b; a -= c; a ^= c >> 3;
b
-= c; b -= a; b ^= a << 10;
c
-= a; c -= b; c ^= b >> 15;
}

public static string GoogleCH(string url)
{
url
= string.Format("info:{0}", url);

int length = url.Length;

UInt32 a, b;
UInt32 c
= GOOGLE_MAGIC;

int k = 0;
int len = length;

a
= b = 0x9E3779B9;

while (len >= 12)
{
a
+= (UInt32)(url[k + 0] + (url[k + 1] << 8) + (url[k + 2] << 16) + (url[k + 3] << 24));
b
+= (UInt32)(url[k + 4] + (url[k + 5] << 8) + (url[k + 6] << 16) + (url[k + 7] << 24));
c
+= (UInt32)(url[k + 8] + (url[k + 9] << 8) + (url[k + 10] << 16) + (url[k + 11] << 24));
_mix(
ref a, ref b, ref c);
k
+= 12;
len
-= 12;
}
c
+= (UInt32)length;
switch (len) /* all the case statements fall through */
{
case 11:
c
+= (UInt32)(url[k + 10] << 24);
goto case 10;
case 10:
c
+= (UInt32)(url[k + 9] << 16);
goto case 9;
case 9:
c
+= (UInt32)(url[k + 8] << 8);
goto case 8;
/* the first byte of c is reserved for the length */
case 8:
b
+= (UInt32)(url[k + 7] << 24);
goto case 7;
case 7:
b
+= (UInt32)(url[k + 6] << 16);
goto case 6;
case 6:
b
+= (UInt32)(url[k + 5] << 8);
goto case 5;
case 5:
b
+= (UInt32)(url[k + 4]);
goto case 4;
case 4:
a
+= (UInt32)(url[k + 3] << 24);
goto case 3;
case 3:
a
+= (UInt32)(url[k + 2] << 16);
goto case 2;
case 2:
a
+= (UInt32)(url[k + 1] << 8);
goto case 1;
case 1:
a
+= (UInt32)(url[k + 0]);
break;
default:
break;
/* case 0: nothing left to add */
}

_mix(
ref a, ref b, ref c);

return string.Format("6{0}", c);
}

  

再给出一个调用的用例供参考:

            try
{
//构造请求的URL
string checksum = GoogleCH(txtUrl.Text);
string query = string.Format(@"http://toolbarqueries.google.com/search?client=navclient-auto&ch={0}&features=Rank&q=info:{1}", checksum, txtUrl.Text);

//请求并获得响应
request = (HttpWebRequest)HttpWebRequest.Create(query);
response
= (HttpWebResponse)request.GetResponse();

if(response==null)
{
txtResponse.Text
= "response==NULL";
return;
}

Stream stream
= response.GetResponseStream();
txtResponse.Text
= "";

byte []buf=new byte[1024];

int readlen;

while ((readlen = stream.Read(buf, 0, 1024)) > 0)
{
//注意这里解析显示PR值
txtPR.Text = int.Parse(Regex.Match(Encoding.UTF8.GetString(buf, 0, readlen), "Rank_1:[0-9]:([0-9]+)").Groups[1].Value).ToString();
}
}
catch (System.UriFormatException)
{
txtResponse.Text
= "无效的URL";
}

  

posted @ 2011-05-29 22:27  OYJJ  阅读(786)  评论(0编辑  收藏  举报