爬虫获取网页开发者模式NetWork信息

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using OpenQA.Selenium;
using OpenQA.Selenium.DevTools;
using OpenQA.Selenium.DevTools.V85.Network;
using DevToolsSessionDomains = OpenQA.Selenium.DevTools.V85.DevToolsSessionDomains;
 
// 2023.11.20 author by Zingu ft. NewBing
 
namespace PuppeteerSharp_Test
{
    public class Demo
    {
        // 构造方法按需设置
        public Demo() { }
 
        private List<Dictionary<string, string>> _responses = new List<Dictionary<string, string>>();
 
        // 线程锁
        private object obj_lock = new object();
 
 
        public async Task<List<Tuple<string, string>>> GetListAsync(string url)
        {
            List<Tuple<string, string>> ls = new List<Tuple<string, string>>();
            using (IWebDriver driver = new OpenQA.Selenium.IE.InternetExplorerDriver())
            {
                driver.Navigate().GoToUrl(url);
 
                Thread.Sleep(500);
                var videoElements1 = driver.FindElement(By.ClassName("course-list"));
               var videoElements=  videoElements1.FindElements(By.ClassName("course-link"));
                foreach (var item in videoElements)
                {
                    var aElement = item.FindElement(By.TagName("a"));
                    string href = aElement.GetAttribute("href");
                    string title = aElement.GetAttribute("title");
                    ls.Add(new Tuple<string, string>(href, title));
                }
            }
            return ls;
        }
 
 
        /// <summary>
        /// 主要使用逻辑, 异步方法
        /// </summary>
        /// <returns></returns>
        public async Task RunAsync()
        {
            // 初始化一个驱动, 本例中未设置 options 参数
            // var dr = new OpenQA.Selenium.Chrome.ChromeDriver();
             var dr = new OpenQA.Selenium.Edge.EdgeDriver();
            // 初始化 session
            var session = dr.GetDevToolsSession(85);
            // 初始化 domains
            var domains = session.GetVersionSpecificDomains<DevToolsSessionDomains>();
            // 设置 Network 为 Enable
            await domains.Network.Enable(new OpenQA.Selenium.DevTools.V85.Network.EnableCommandSettings());
            // 订阅 接收 Response 事件
            domains.Network.ResponseReceived += Network_ResponseReceived;
            // 打开目标网站
            dr.Url = "https://open.163.com/newview/movie/free?pid=MA5T0OVML&mid=MA5T1488U";
            // 设置 获取 Response body 的参数
            var cmd = new OpenQA.Selenium.DevTools.V85.Network.GetResponseBodyCommandSettings();
 
            await Task.Delay(500);
            foreach (var item in GetRequestUrl())
            {
                Console.WriteLine(item);
            }
            /*
            // 获取 RequestId 加入参数中
            cmd.RequestId = GetRequestId();
            // rlt 是最后获得的 Response body
            var rlt = domains.Network.GetResponseBody(cmd).GetAwaiter().GetResult();
            // 输出得到的结果
            //Console.WriteLine(rlt.Body);
            */
            // 设置 Network 为不可用
            await domains.Network.Disable();
            // 关闭驱动
            dr.Quit();
            Console.ReadLine();
        }
 
        private List<string> GetRequestUrl()
        {
            List<string> rlt =new List<string>();
            List<Dictionary<string, string>> box;
            // responses List 要加锁
            lock (obj_lock)
            {
                box = _responses.ToList();
            }
            foreach (var u in box)
            {
                // 结合需求设置
                // if (u["url"] != null&& (u["url"].Contains(".mp4")|| u["url"].Contains(".srt")))// <筛选的条件>
                if (u["url"] != null && (u["url"].Contains(".mp4") || u["url"].Contains(".srt")))
                    rlt.Add(u["url"]);     
            }
            return rlt;
        }
 
 
        /// <summary>
        /// 获取 RequestId
        /// </summary>
        /// <returns></returns>
        private string GetRequestId()
        {
            string rlt = "";
            List<Dictionary<string, string>> box;
            // responses List 要加锁
            lock (obj_lock)
            {
                box = _responses.ToList();
            }
            foreach (var u in box)
            {
                // 结合需求设置
                // if (u["url"] != null&& (u["url"].Contains(".mp4")|| u["url"].Contains(".srt")))// <筛选的条件>
                if (u["url"] != null && (u["url"].Contains(".mp4") || u["url"].Contains(".srt")))
                    Console.WriteLine(u["url"]);
                if (u["url"] != null )// <筛选的条件>
                {
                    rlt = u["requestId"];
                }
                else
                {
                    continue;
                }
            }
            return rlt;
        }
 
        /// <summary>
        /// 接收 Response 事件
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="e"></param>
        private void Network_ResponseReceived(object sender, OpenQA.Selenium.DevTools.V85.Network.ResponseReceivedEventArgs e)
        {
            var dic = new Dictionary<string, string>();
            // e 中是接收到的 Response 信息, 本例中只需求 requestId 和 url
            dic.Add("requestId", e.RequestId);
            dic.Add("url", e.Response.Url);
            // responses List 要加锁
            lock (obj_lock)
            {
                _responses.Add(dic);
 
            }
        }
    }
}

  

posted @   后跳  阅读(118)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· 震惊!C++程序真的从main开始吗?99%的程序员都答错了
· 单元测试从入门到精通
· 【硬核科普】Trae如何「偷看」你的代码?零基础破解AI编程运行原理
· 上周热点回顾(3.3-3.9)
· winform 绘制太阳,地球,月球 运作规律
点击右上角即可分享
微信分享提示