C# HttpWebRequest 从google服务器获取google的PageRank PR值
首先,本文的实现参考了这篇:
http://www.codeproject.com/KB/aspnet/Google_Pagerank.aspx
简述一下原理:
获取PR值,通过向google服务器发送一个http请求来实现。
在浏览器中输入上面的链接会google服务器会返回一个字符串
上面的链接返回的是:Rank_1:1:6
要实现对任意链接的PR值的查询,关键要正确构造请求的URL,ch=6771535612这一项很重要,它是请求网址的hash值,这个hash值得算法在上面的那篇博文中就有详细的说明。
这里将主要的代码转贴在下面:
private const UInt32 GOOGLE_MAGIC = 0xE6359A60;
private static void _mix(ref UInt32 a, ref UInt32 b, ref UInt32 c)
{
a -= b; a -= c; a ^= c >> 13;
b -= c; b -= a; b ^= a << 8;
c -= a; c -= b; c ^= b >> 13;
a -= b; a -= c; a ^= c >> 12;
b -= c; b -= a; b ^= a << 16;
c -= a; c -= b; c ^= b >> 5;
a -= b; a -= c; a ^= c >> 3;
b -= c; b -= a; b ^= a << 10;
c -= a; c -= b; c ^= b >> 15;
}
public static string GoogleCH(string url)
{
url = string.Format("info:{0}", url);
int length = url.Length;
UInt32 a, b;
UInt32 c = GOOGLE_MAGIC;
int k = 0;
int len = length;
a = b = 0x9E3779B9;
while (len >= 12)
{
a += (UInt32)(url[k + 0] + (url[k + 1] << 8) + (url[k + 2] << 16) + (url[k + 3] << 24));
b += (UInt32)(url[k + 4] + (url[k + 5] << 8) + (url[k + 6] << 16) + (url[k + 7] << 24));
c += (UInt32)(url[k + 8] + (url[k + 9] << 8) + (url[k + 10] << 16) + (url[k + 11] << 24));
_mix(ref a, ref b, ref c);
k += 12;
len -= 12;
}
c += (UInt32)length;
switch (len) /* all the case statements fall through */
{
case 11:
c += (UInt32)(url[k + 10] << 24);
goto case 10;
case 10:
c += (UInt32)(url[k + 9] << 16);
goto case 9;
case 9:
c += (UInt32)(url[k + 8] << 8);
goto case 8;
/* the first byte of c is reserved for the length */
case 8:
b += (UInt32)(url[k + 7] << 24);
goto case 7;
case 7:
b += (UInt32)(url[k + 6] << 16);
goto case 6;
case 6:
b += (UInt32)(url[k + 5] << 8);
goto case 5;
case 5:
b += (UInt32)(url[k + 4]);
goto case 4;
case 4:
a += (UInt32)(url[k + 3] << 24);
goto case 3;
case 3:
a += (UInt32)(url[k + 2] << 16);
goto case 2;
case 2:
a += (UInt32)(url[k + 1] << 8);
goto case 1;
case 1:
a += (UInt32)(url[k + 0]);
break;
default:
break;
/* case 0: nothing left to add */
}
_mix(ref a, ref b, ref c);
return string.Format("6{0}", c);
}
再给出一个调用的用例供参考:
try
{
//构造请求的URL
string checksum = GoogleCH(txtUrl.Text);
string query = string.Format(@"http://toolbarqueries.google.com/search?client=navclient-auto&ch={0}&features=Rank&q=info:{1}", checksum, txtUrl.Text);
//请求并获得响应
request = (HttpWebRequest)HttpWebRequest.Create(query);
response = (HttpWebResponse)request.GetResponse();
if(response==null)
{
txtResponse.Text = "response==NULL";
return;
}
Stream stream = response.GetResponseStream();
txtResponse.Text = "";
byte []buf=new byte[1024];
int readlen;
while ((readlen = stream.Read(buf, 0, 1024)) > 0)
{
//注意这里解析显示PR值
txtPR.Text = int.Parse(Regex.Match(Encoding.UTF8.GetString(buf, 0, readlen), "Rank_1:[0-9]:([0-9]+)").Groups[1].Value).ToString();
}
}
catch (System.UriFormatException)
{
txtResponse.Text = "无效的URL";
}