在网上找了几天,能免费下载到的手机归属地数据库信息不是很全,不过网站http://www.ip138.com:8080/search.asp上查询到的数据还是比较全面,可惜拿不到他的数据库,只能想想其它办法了。
方法一:入侵此网站,将此网站数据拿来?
这可是非法的行当,行不通。。。打住
方法二:在此网站将所有号码段一个个进行查询,记下相应信息?
算了下,联通与移动现在好像有160000个号码段,一个个手工去查,这可要命。。
方法三:是否可写一个程序,来采集这些数据呢?
这方法当然可行,先分析下页面源码:
查询页面源码:

.
<FORM action="" method=post name="mobileform" onsubmit="return checkMobile();">
<TR bgColor=#eff1f3 class=tdc>
<TD align=middle width=130 noswap>手机号码(段) </TD>
<TD align=middle width=*><INPUT class=tdc name="mobile" maxLength="11">
<INPUT name="action" type="hidden" value=mobile> <INPUT class=bdtj name=B1 type=submit value="查 询">
</TD>
</TR>
</FORM>

.
结果显示页面源码:
…
<TABLE width=349 border="1" align="center" cellpadding="4" bordercolor=#3366cc style="border-collapse: collapse">
<TR>
<TD colspan=2 class=tdc1 align=center height=24 >++ ip138.com查询结果 ++</TD>
</TR>
<TR class=tdc bgcolor=#EFF1F3>
<TD width="130" align="center" noswap>您查询的手机号码段</TD>
<TD width=* align="center" class=tdc2>1592031</TD>
</TR>
<TR class=tdc bgcolor=#EFF1F3>
<TD width="130" align="center" noswap>卡号归属地</TD>
<TD width=* align="center" class=tdc2>广东 广州</TD>
</TR>
<TR class=tdc bgcolor=#EFF1F3>
<TD width="130" align="center" noswap>卡 类 型</TD>
<TD width=* align="center" class=tdc2>移动预付费卡</TD>
</TR>
<TR class=tdc bgcolor=#EFF1F3>
<TD align="center">区 号</TD>
<TD align="center" class=tdc2>020</TD>
</TR>
<TR class=tdc bgcolor=#EFF1F3>
<TD align="center">邮 编</TD>
<TD align="center" class=tdc2>510000 <a href="http://alexa.ip138.com/post/" target="_blank">更详细的..</a></TD></TR>
</TABLE>
…
发现只要将手机号码段Post到http://www.ip138.com:8080/search.asp即可获取相应信息。
代码实现:
1.web request post类
public class WebClientHelper

{
const string sUserAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727)";
const string sContentType = "application/x-www-form-urlencoded";
const string sRequestEncoding = "ascii";
const string sResponseEncoding = "gb2312";


/**//// <summary>
/// Post data到url
/// </summary>
/// <param name="data">要post的数据</param>
/// <param name="url">目标url</param>
/// <returns>服务器响应</returns>
static string PostDataToUrl(string postData, string url)

{


创建httpWebRequest对象#region 创建httpWebRequest对象
WebRequest webRequest = WebRequest.Create(url);
HttpWebRequest httpRequest = webRequest as HttpWebRequest;

if (httpRequest == null)

{
throw new ApplicationException(
string.Format("Invalid url string: {0}", url)
);
}
#endregion


填充httpWebRequest的基本信息#region 填充httpWebRequest的基本信息
httpRequest.UserAgent = sUserAgent;
httpRequest.ContentType = sContentType;
httpRequest.Method = "POST";
#endregion


填充要post的内容#region 填充要post的内容
Encoding encoding = Encoding.GetEncoding(sRequestEncoding);
byte[] data = encoding.GetBytes(postData);

httpRequest.ContentLength = data.Length;
Stream requestStream = httpRequest.GetRequestStream();
requestStream.Write(data, 0, data.Length);
requestStream.Close();
#endregion


发送post请求到服务器并读取服务器返回信息#region 发送post请求到服务器并读取服务器返回信息
Stream responseStream;
try

{
responseStream = httpRequest.GetResponse().GetResponseStream();
}
catch (Exception e)

{
throw e;
}
#endregion


读取服务器返回信息#region 读取服务器返回信息
string stringResponse = string.Empty;
using (StreamReader responseReader =
new StreamReader(responseStream, Encoding.GetEncoding(sResponseEncoding)))

{
stringResponse = responseReader.ReadToEnd();
}

responseStream.Close();
#endregion

return stringResponse;
}
}

2.数据采集类
public class AcquisitionHelper


{
//表格中每一个td标签内字符串
static Regex reg = new Regex(
"<TD[^<]*>(?<text>[^<]*)</TD>",
RegexOptions.IgnoreCase |
RegexOptions.IgnorePatternWhitespace |
RegexOptions.Multiline
);
//所有超链接标签
static Regex regA = new Regex("<a[^<]*</a>",
RegexOptions.IgnoreCase |
RegexOptions.IgnorePatternWhitespace |
RegexOptions.Multiline
);

public static MobileInfo Acquisition(string html)

{
MobileInfo info = null;
//因<td>标签内有<a>标签,先进行替换
html = regA.Replace(html, "");

System.Text.RegularExpressions.MatchCollection mc = reg.Matches(html);

if (mc != null && mc.Count == 12)

{
info = new NumInfo();
info.Number = mc[3].Groups["text"].Value;
info.Location = mc[5].Groups["text"].Value.Replace(" ", " ");
info.CardType = mc[7].Groups["text"].Value;
info.AreaCode = mc[9].Groups["text"].Value;
info.ZipCode = mc[11].Groups["text"].Value;
}

return info;
}
}

3.测试一下
static void Main(string[] args)


{
string postData = "mobile=1592031&action=mobile";
string html = WebClientHelper.PostDataToUrl(postData, "http://www.ip138.com:8080/search.asp");

MobileInfo info = Acquisition(html);

Console.WriteLine(info.Location);
//
.
}
有了以上两个工具类,我们就很容易去遍历所有手机号码段,收集到手机号码归属信息了。
PS:记得采用多线程。。