个人原创、欢迎转载、转载请注明出处、http://www.cnblogs.com/zetee/articles/3482085.html
采集器概貌,如下:
最近做一个项目,功能类似于CNZZ站长统计功能,要求显示Ip所在的省份市区/提供商等信息。网上的Ip纯真数据库,下载下来一看,发现没提供商内容,省市区都很少,居然有XXX网吧,哥瞬间倒了。没标准化、并且杂乱、还不连续的IP段、总体说来没达到要求。
在百度上找啊找,找到淘宝Ip地址库,官方介绍的相当诱人,准确率高,数据质量有保障,提供国家、省、市、县、运营商全方位信息,信息维度广,格式规范,但是限制每秒10次的访问(这个比较无语)。
淘宝IP地址库,提供API http://ip.taobao.com/
接口说明
1. 请求接口(GET):
http://ip.taobao.com/service/getIpInfo.php?ip=[ip地址字串]
2. 响应信息:
(json格式的)国家 、省(自治区或直辖市)、市(县)、运营商
3. 返回数据格式:
{"code":0,"data":{"ip":"210.75.225.254","country":"\u4e2d\u56fd","area":"\u534e\u5317",
"region":"\u5317\u4eac\u5e02","city":"\u5317\u4eac\u5e02","county":"","isp":"\u7535\u4fe1",
"country_id":"86","area_id":"100000","region_id":"110000","city_id":"110000",
"county_id":"-1","isp_id":"100017"}}
其中code的值的含义为,0:成功,1:失败。
1 :IP转换
准备好工具,后面就好弄啦, IPHelper提供了各种,IP<->byte[]<->Long 转换
1 public class IPHelper 2 { 3 /// <summary> 4 /// ip转成long 5 /// </summary> 6 /// <param name="ip"></param> 7 /// <returns></returns> 8 public static long Ip2Long(string ip) 9 { 10 byte[] bytes = Ip2Bytes(ip); 11 return Bytes2Long(bytes); 12 } 13 /// <summary> 14 /// long转成ip 15 /// </summary> 16 /// <param name="ipLong"></param> 17 /// <returns></returns> 18 public static string Long2Ip(long ipLong) 19 { 20 byte[] bytes = Long2Bytes(ipLong); 21 return Bytes2Ip(bytes); 22 } 23 /// <summary> 24 /// long转成byte[] 25 /// </summary> 26 /// <param name="ipvalue"></param> 27 /// <returns></returns> 28 public static byte[] Long2Bytes(long ipvalue) 29 { 30 byte[] b = new byte[4]; 31 for (int i = 0; i < 4; i++) 32 { 33 b[3 - i] = (byte)(ipvalue >> 8 * i & 255); 34 } 35 return b; 36 } 37 /// <summary> 38 /// byte[]转成long 39 /// </summary> 40 /// <param name="bt"></param> 41 /// <returns></returns> 42 public static long Bytes2Long(byte[] bt) 43 { 44 int x = 3; 45 long o = 0; 46 foreach (byte f in bt) 47 { 48 o += (long)f << 8 * x--; 49 } 50 return o; 51 } 52 /// <summary> 53 /// ip转成byte[] 54 /// </summary> 55 /// <param name="ip"></param> 56 /// <returns></returns> 57 public static byte[] Ip2Bytes(string ip) 58 { 59 string[] sp = ip.Split('.'); 60 return new byte[] { Convert.ToByte(sp[0]), Convert.ToByte(sp[1]), Convert.ToByte(sp[2]), Convert.ToByte(sp[3]) }; 61 } 62 /// <summary> 63 /// byte[]转成ip 64 /// </summary> 65 /// <param name="bytes"></param> 66 /// <returns></returns> 67 public static string Bytes2Ip(byte[] bytes) 68 { 69 return string.Format("{0}.{1}.{2}.{3}" 70 , bytes[0] 71 , bytes[1] 72 , bytes[2] 73 , bytes[3]); 74 } 75 }
2 :多线程疯狂获取IP
1 /// <summary> 2 /// 描述:开始采集 3 /// </summary> 4 private void StratCollect() 5 { 6 foreach (Thread thread in ThreadList) 7 { 8 thread.Start(); 9 } 10 } 11 /// <summary> 12 /// 描述:获取要采集的ip long 13 /// </summary> 14 private long GetCurrentIp() 15 { 16 long curip = System.Threading.Interlocked.Increment(ref CurrentCollectIP); 17 return curip; 18 } 19 /// <summary> 20 /// 线程中采集的方法 21 /// </summary> 22 private void GetTaobaoData() 23 { 24 long currentipLong = GetCurrentIp(); 25 while (currentipLong <= EndIP) 26 { 27 try 28 { 29 CaptureTaobaoIPData(currentipLong); 30 } 31 catch (Exception ex) 32 { 33 TextLog.SetString(currentipLong + ex.Message); 34 } 35 currentipLong = GetCurrentIp(); 36 } 37 } 38 /// <summary> 39 /// 描述:线程中采集并得到IP 40 /// </summary> 41 private void CaptureTaobaoIPData(long currentipLong) 42 { 43 string ip = IPHelper.Long2Ip(currentipLong); 44 string url = string.Format(UrlFomat, ip); 45 string js =HttpHelper. HttpRequest(url, Encoding.UTF8); 46 taobaoIPdata m = Newtonsoft.Json.JsonConvert.DeserializeObject<TaobaoJsonData>(js).data; 47 m.ipLong = currentipLong; 48 //更新界面 49 this.Invoke(new Action<taobaoIPdata>(v => 50 { 51 taobaoIPdataList.Add(v); 52 this.dgv.DataSource = taobaoIPdataList; 53 }), m); 54 55 }
3: Http请求的Json结果,并反序列化成对象
http请求这个相当简单。网上一大把,这里主要说一下json序列化,在这里本人建议采用Newtonsoft.Json.dll 下载地址: http://json.codeplex.com/ 性能和兼容性达到最好
1 public class HttpHelper 2 { 3 public static string HttpRequest(string url, Encoding encoding) 4 { 5 try 6 { 7 HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url); 8 request.Timeout = 6 * 1000; 9 request.Method = "GET"; 10 //得到处理结果 11 HttpWebResponse response = (HttpWebResponse)request.GetResponse(); 12 Stream myResponseStream = response.GetResponseStream(); 13 StreamReader myStreamReader = new StreamReader(myResponseStream, encoding); 14 string result = myStreamReader.ReadToEnd(); 15 return result; 16 } 17 catch (Exception ex) 18 { 19 throw ex; 20 } 21 22 } 23 24 }
1 taobaoIPdata m = Newtonsoft.Json.JsonConvert.DeserializeObject<TaobaoJsonData>(js).data;
1 /// <summary> 2 /// 淘宝数据 3 /// </summary> 4 public partial class taobaoIPdata 5 { 6 private long _ipLong; 7 /// <summary> 8 /// IP 长整形 9 /// </summary> 10 public long ipLong 11 { 12 get { return _ipLong; } 13 set { _ipLong = value; } 14 } 15 16 private string _ip; 17 /// <summary> 18 /// IP地址 19 /// </summary> 20 public string ip 21 { 22 get { return _ip; } 23 set { _ip = value; } 24 } 25 26 private string _country; 27 /// <summary> 28 /// 国家 29 /// </summary> 30 public string country 31 { 32 get { return _country; } 33 set { _country = value; } 34 } 35 36 private string _country_id; 37 /// <summary> 38 /// 国家编号 39 /// </summary> 40 public string country_id 41 { 42 get { return _country_id; } 43 set { _country_id = value; } 44 } 45 46 private string _area; 47 /// <summary> 48 /// 地区 49 /// </summary> 50 public string area 51 { 52 get { return _area; } 53 set { _area = value; } 54 } 55 56 private string _area_id; 57 /// <summary> 58 /// 地区编号 59 /// </summary> 60 public string area_id 61 { 62 get { return _area_id; } 63 set { _area_id = value; } 64 } 65 66 private string _region; 67 /// <summary> 68 /// 区域 69 /// </summary> 70 public string region 71 { 72 get { return _region; } 73 set { _region = value; } 74 } 75 76 private string _region_id; 77 /// <summary> 78 /// 区域编号 79 /// </summary> 80 public string region_id 81 { 82 get { return _region_id; } 83 set { _region_id = value; } 84 } 85 86 private string _city; 87 /// <summary> 88 ///城市 89 /// </summary> 90 public string city 91 { 92 get { return _city; } 93 set { _city = value; } 94 } 95 96 private string _city_id; 97 /// <summary> 98 /// 城市编号 99 /// </summary> 100 public string city_id 101 { 102 get { return _city_id; } 103 set { _city_id = value; } 104 } 105 106 private string _county; 107 /// <summary> 108 /// 县 109 /// </summary> 110 public string county 111 { 112 get { return _county; } 113 set { _county = value; } 114 } 115 116 private string _county_id; 117 /// <summary> 118 /// 县编号 119 /// </summary> 120 public string county_id 121 { 122 get { return _county_id; } 123 set { _county_id = value; } 124 } 125 126 private string _isp; 127 /// <summary> 128 /// 供应商 129 /// </summary> 130 public string isp 131 { 132 get { return _isp; } 133 set { _isp = value; } 134 } 135 136 private string _isp_id; 137 /// <summary> 138 /// 供应商ID 139 /// </summary> 140 public string isp_id 141 { 142 get { return _isp_id; } 143 set { _isp_id = value; } 144 } 145 146 147 148 } 149 /// <summary> 150 /// 淘宝api 返回的json数据 151 /// </summary> 152 public partial class TaobaoJsonData 153 { 154 public int code { get; set; } 155 public taobaoIPdata data { get; set; } 156 }
4:插入到数据库中。。。剩下的自己随便搞啦
源码下载:淘宝IP获取器.rar