随笔 - 9,  文章 - 1,  评论 - 33,  阅读 - 18306
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
        /// <summary>
        /// 读取URL数据内容
        /// </summary>
        /// <param name="url">网址</param>
        /// <returns>网站文本内容</returns>
        public string HttpGetText(string url)
        {
            HttpWebRequest Request = (HttpWebRequest)HttpWebRequest.Create(url);
            Request.Method = "GET";
            Request.ContentType = @"application/x-www-form-urlencoded";
            Request.Accept = @"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8";
            Request.Headers.Add(HttpRequestHeader.AcceptLanguage, @"Accept-Language:zh-CN,zh;q=0.8");
            Request.UserAgent = @"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0";
            Request.Proxy = proxy;
            var stream = Request.GetResponse().GetResponseStream();
            var read = new StreamReader(stream);
            var json = read.ReadToEnd();
            read.Close();
            stream.Close();
 
            System.Diagnostics.Debug.WriteLine("".PadRight(20, '='));
            System.Diagnostics.Debug.WriteLine(json);
            System.Diagnostics.Debug.WriteLine("".PadRight(20, '='));
            return json;
        }
 
public class IPs {
            public List<proxy> items = new List<proxy>();
 
            public class proxy {
                public string ip;
                public int port;
                public string address;
                public int speed;
                public int life;//持续分钟数
                public DateTime check_time;
            }
        }
        private void button1_Click(object sender, EventArgs e)
        {
            var html= HttpGetText("http://www.xicidaili.com/nt");
            int i1= html.IndexOf("<table id=\"ip_list\">");
            int i2= html.IndexOf("</table>");
            string ip_list = html.Substring(i1, i2 - i1+ "</table>".Length);
            var find = new Regex(@"<tr.*?>\s*?<td.*?>.*?</td>\s*?<td.*?>(?<ip>.*?)</td>\s*?<td.*?>(?<port>.*?)</td>\s*?<td.*?>\s*?<a.*?>(?<address>.*?)</a>\s*?</td>.*?width:(?<speed>.*?)%.*?<td>(?<life>.*?)</td>.*?<td>(?<check_time>.*?)</td>.*?</tr>", RegexOptions.IgnoreCase | RegexOptions.Singleline);
            var ips= find.Matches(ip_list);
            listView1.BeginUpdate();
            foreach (System.Text.RegularExpressions.Match item in ips) {
                try
                {
                    var ip = new IPs.proxy();
                    ListViewItem lvi = new ListViewItem(item.Groups["ip"].Value); ip.ip = item.Groups["ip"].Value;
                    lvi.SubItems.Add(item.Groups["port"].Value); ip.port = Convert.ToInt32(item.Groups["port"].Value);
                    lvi.SubItems.Add(item.Groups["address"].Value); ip.address = item.Groups["address"].Value;
                    lvi.SubItems.Add(item.Groups["speed"].Value); ip.speed = Convert.ToInt32(item.Groups["speed"].Value);
                    lvi.SubItems.Add(item.Groups["life"].Value); ip.life = conv(item.Groups["life"].Value);
                    lvi.SubItems.Add(item.Groups["check_time"].Value); ip.check_time = Convert.ToDateTime(item.Groups["check_time"].Value);
                    listView1.Items.Add(lvi);
                    IPaddress.items.Add(ip);
                }
                catch {
                    LogAdd("转换IP地址信息出错 " + item.Value);
                }
            }
            listView1.EndUpdate();
            int conv(string life) {
                int a = 1;
                if (life.Contains("天")) {
                    a = 60 * 24;
                    life = life.Replace("天", "");
                }else if (life.Contains("分钟"))
                {
                    a =1;
                    life = life.Replace("分钟", "");
                }
                else if (life.Contains("小时"))
                {
                    a = 60;
                    life = life.Replace("小时", "");
                }
                return Convert.ToInt32(life)*a;
            }
        }

  

关键代码就是获取指定网页里的IP代理信息,然后用正则表达式提取出来

本来想着直接将html转换为xml,谁知它网页写的不标准,转换不成功

只有用正则来查找了,效果不错~

代码运行环境: vs2017

当然老版本也可以,将局部函数代码放到外部即可。

效果图:

 

关键代码部分:

1
2
3
4
5
6
var html= HttpGetText("http://www.xicidaili.com/nt");
            int i1= html.IndexOf("<table id=\"ip_list\">");
            int i2= html.IndexOf("</table>");
            string ip_list = html.Substring(i1, i2 - i1+ "</table>".Length);
            var find = new Regex(@"<tr.*?>\s*?<td.*?>.*?</td>\s*?<td.*?>(?<ip>.*?)</td>\s*?<td.*?>(?<port>.*?)</td>\s*?<td.*?>\s*?<a.*?>(?<address>.*?)</a>\s*?</td>.*?width:(?<speed>.*?)%.*?<td>(?<life>.*?)</td>.*?<td>(?<check_time>.*?)</td>.*?</tr>", RegexOptions.IgnoreCase | RegexOptions.Singleline);
            var ips= find.Matches(ip_list);

  正则表达式推荐一个网址及学习工具:

http://deerchao.net/tutorials/regex/regex.htm#charclass

 

我本人也记不住 正则表达式  ,需要用的时候现查。

posted on   fxyc87  阅读(780)  评论(4编辑  收藏  举报
编辑推荐:
· Linux系列:如何用 C#调用 C方法造成内存泄露
· AI与.NET技术实操系列(二):开始使用ML.NET
· 记一次.NET内存居高不下排查解决与启示
· 探究高空视频全景AR技术的实现原理
· 理解Rust引用及其生命周期标识(上)
阅读排行:
· 阿里最新开源QwQ-32B,效果媲美deepseek-r1满血版,部署成本又又又降低了!
· 单线程的Redis速度为什么快?
· SQL Server 2025 AI相关能力初探
· 展开说说关于C#中ORM框架的用法!
· AI编程工具终极对决:字节Trae VS Cursor,谁才是开发者新宠?

< 2025年3月 >
23 24 25 26 27 28 1
2 3 4 5 6 7 8
9 10 11 12 13 14 15
16 17 18 19 20 21 22
23 24 25 26 27 28 29
30 31 1 2 3 4 5
点击右上角即可分享
微信分享提示