浏览器自动化的一些体会8 HttpWebRequest的几个问题

前面说过了,httpWebRequest的好处在于轻量,不需要界面,缺点在于无法执行javascript。这里再归纳一些问题。

1. 设置代理

1) httpWebRequest不支持https的代理,也就是说用不了某些vpn,你懂的。

2) 一般的写法:

HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);

request.Proxy = new WebProxy(proxyUrl, true); //如:http://123.123.123.123:80

3) 使用Pac(自动配置代理脚本):

这个比较麻烦,需要win32 api,下面是一个类以及调用方法,有详细的注释,不用说是抄来的:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
public class Win32Api
{
    #region AutoProxy Constants
    /// <summary>
    /// Applies only when setting proxy information
    /// </summary>
    public const int WINHTTP_ACCESS_TYPE_DEFAULT_PROXY = 0;
    /// <summary>
    /// Internet accessed through a direct connection
    /// </summary>
    public const int WINHTTP_ACCESS_TYPE_NO_PROXY = 1;
    /// <summary>
    /// Internet accessed using a proxy
    /// </summary>
    public const int WINHTTP_ACCESS_TYPE_NAMED_PROXY = 3;       
    /// <summary>
    /// Attempt to automatically discover the URL of the
    /// PAC file using both DHCP and DNS queries to the local network.
    /// </summary>
    public const int WINHTTP_AUTOPROXY_AUTO_DETECT = 0x00000001;
    /// <summary>
    /// Download the PAC file from the URL in the WINHTTP_AUTOPROXY_OPTIONS structure.
    /// </summary>
    public const int WINHTTP_AUTOPROXY_CONFIG_URL = 0x00000002;
    /// <summary>
    /// Executes the Web Proxy Auto-Discovery (WPAD) protocol in-process instead of
    /// delegating to an out-of-process WinHTTP AutoProxy Service, if available.
    /// This flag must be combined with one of the other flags
    /// </summary>
    public const int WINHTTP_AUTOPROXY_RUN_INPROCESS = 0x00010000;
    /// <summary>
    /// By default, WinHTTP is configured to fall back to auto-discover a proxy
    /// in-process. If this fallback behavior is undesirable in the event that
    /// an out-of-process discovery fails, it can be disabled using this flag.
    /// </summary>
    public const int WINHTTP_AUTOPROXY_RUN_OUTPROCESS_ONLY = 0x00020000;
    /// <summary>
    /// Use DHCP to locate the proxy auto-configuration file.
    /// </summary>
    public const int WINHTTP_AUTO_DETECT_TYPE_DHCP = 0x00000001;
    /// <summary>
    /// Use DNS to attempt to locate the proxy auto-configuration file at a
    /// well-known location on the domain of the local computer
    /// </summary>
    public const int WINHTTP_AUTO_DETECT_TYPE_DNS_A = 0x00000002;
    #endregion
     
    #region Proxy Structures
    /// <summary>
    /// The structure is used to indicate to the WinHttpGetProxyForURL
    /// function whether to specify the URL of the Proxy Auto-Configuration
    /// (PAC) file or to automatically locate the URL with DHCP or DNS
    /// queries to the network
    /// </summary>
    [StructLayout(LayoutKind.Sequential, CharSet=CharSet.Unicode)]
        public struct WINHTTP_AUTOPROXY_OPTIONS {
        /// <summary>
        /// Mechanisms should be used to obtain the PAC file
        /// </summary>
        [MarshalAs(UnmanagedType.U4)]
        public int dwFlags;
        /// <summary>
        /// If dwflags includes the WINHTTP_AUTOPROXY_AUTO_DETECT flag,
        /// then dwAutoDetectFlags specifies what protocols are to be
        /// used to locate the PAC file. If both the DHCP and DNS auto
        /// detect flags are specified, then DHCP is used first;
        /// if no PAC URL is discovered using DHCP, then DNS is used.
        /// If dwflags does not include the WINHTTP_AUTOPROXY_AUTO_DETECT
        /// flag, then dwAutoDetectFlags must be zero.
        /// </summary>
        [MarshalAs(UnmanagedType.U4)]
        public int dwAutoDetectFlags;
        /// <summary>
        /// If dwflags includes the WINHTTP_AUTOPROXY_CONFIG_URL flag, the
        /// lpszAutoConfigUrl must point to a null-terminated Unicode string
        /// that contains the URL of the proxy auto-configuration (PAC) file.
        /// If dwflags does not include the WINHTTP_AUTOPROXY_CONFIG_URL flag,
        /// then lpszAutoConfigUrl must be NULL.
        /// </summary>
        public string lpszAutoConfigUrl;
        /// <summary>
        /// Reserved for future use; must be NULL.
        /// </summary>
        public IntPtr lpvReserved;
        /// <summary>
        /// Reserved for future use; must be zero.
        /// </summary>
        [MarshalAs(UnmanagedType.U4)]
        public int dwReserved;
        /// <summary>
        /// Specifies whether the client's domain credentials should be automatically
        /// sent in response to an NTLM or Negotiate Authentication challenge when
        /// WinHTTP requests the PAC file.
        /// If this flag is TRUE, credentials should automatically be sent in response
        /// to an authentication challenge. If this flag is FALSE and authentication
        /// is required to download the PAC file, the WinHttpGetProxyForUrl fails.
        /// </summary>
        public bool fAutoLoginIfChallenged;
 
    }
 
    /// <summary>
    /// The structure contains the session or default proxy configuration.
    /// </summary>
    [StructLayout(LayoutKind.Sequential, CharSet=CharSet.Unicode)]
        public struct WINHTTP_PROXY_INFO {
        /// <summary>
        /// Unsigned long integer value that contains the access type
        /// </summary>  
        [MarshalAs(UnmanagedType.U4)]
        public int dwAccessType;
        /// <summary>
        /// Pointer to a string value that contains the proxy server list
        /// </summary>
        public string lpszProxy;
        /// <summary>
        /// Pointer to a string value that contains the proxy bypass list
        /// </summary>
        public string lpszProxyBypass;
    }
    #endregion
 
    #region WinHttp
    /// <summary>
    /// This function implements the Web Proxy Auto-Discovery (WPAD) protocol
    /// for automatically configuring the proxy settings for an HTTP request.
    /// The WPAD protocol downloads a Proxy Auto-Configuration (PAC) file,
    /// which is a script that identifies the proxy server to use for a given
    /// target URL. PAC files are typically deployed by the IT department within
    /// a corporate network environment. The URL of the PAC file can either be
    /// specified explicitly or WinHttpGetProxyForUrl can be instructed to
    /// automatically discover the location of the PAC file on the local network.
    /// </summary>
    /// <param name="hSession">The WinHTTP session handle returned by the WinHttpOpen function</param>
    /// <param name="lpcwszUrl">A pointer to a null-terminated Unicode string that contains the
    /// URL of the HTTP request that the application is preparing to send.</param>
    /// <param name="pAutoProxyOptions">A pointer to a WINHTTP_AUTOPROXY_OPTIONS structure that
    /// specifies the auto-proxy options to use.</param>
    /// <param name="pProxyInfo">A pointer to a WINHTTP_PROXY_INFO structure that receives the
    /// proxy setting. This structure is then applied to the request handle using the
    /// WINHTTP_OPTION_PROXY option.</param>
    /// <returns></returns>
    [DllImport("winhttp.dll", SetLastError=true, CharSet=CharSet.Unicode)]       
    public static extern bool WinHttpGetProxyForUrl(
        IntPtr hSession,
        string lpcwszUrl,
        ref WINHTTP_AUTOPROXY_OPTIONS pAutoProxyOptions,
        ref WINHTTP_PROXY_INFO pProxyInfo);
     
    /// <summary>
    /// The function initializes, for an application, the use of WinHTTP
    /// functions and returns a WinHTTP-session handle
    /// </summary>
    /// <param name="pwszUserAgent">A pointer to a string variable that contains the name of the
    /// application or entity calling the WinHTTP functions.</param>
    /// <param name="dwAccessType">Type of access required. This can be one of the following values</param>
    /// <param name="pwszProxyName"> A pointer to a string variable that contains the name of the
    /// proxy server to use when proxy access is specified by setting dwAccessType to
    /// WINHTTP_ACCESS_TYPE_NAMED_PROXY. The WinHTTP functions recognize only CERN type proxies for HTTP.
    /// If dwAccessType is not set to WINHTTP_ACCESS_TYPE_NAMED_PROXY, this parameter must be set
    /// to WINHTTP_NO_PROXY_NAME</param>
    /// <param name="pwszProxyBypass">A pointer to a string variable that contains an optional list
    /// of host names or IP addresses, or both, that should not be routed through the proxy when
    /// dwAccessType is set to WINHTTP_ACCESS_TYPE_NAMED_PROXY. The list can contain wildcard characters.
    /// Do not use an empty string, because the WinHttpOpen function uses it as the proxy bypass list.
    /// If this parameter specifies the "<local>" macro as the only entry, this function bypasses
    /// any host name that does not contain a period. If dwAccessType is not set to WINHTTP_ACCESS_TYPE_NAMED_PROXY,
    /// this parameter must be set to WINHTTP_NO_PROXY_BYPASS.</param>
    /// <param name="dwFlags">Unsigned long integer value that contains the flags that indicate various options
    /// affecting the behavior of this function</param>
    /// <returns>Returns a valid session handle if successful, or NULL otherwise</returns>
    [DllImport("winhttp.dll", SetLastError=true, CharSet=CharSet.Unicode)]
    public static extern IntPtr WinHttpOpen(
        string pwszUserAgent,
        int dwAccessType,
        IntPtr pwszProxyName,
        IntPtr pwszProxyBypass,
        int dwFlags
        );
     
    /// <summary>
    /// The function closes a single HINTERNET handle
    /// </summary>
    /// <param name="hInternet">Valid HINTERNET handle to be closed.</param>
    /// <returns>Returns TRUE if the handle is successfully closed, or FALSE otherwise</returns>
    [DllImport("winhttp.dll", SetLastError=true, CharSet=CharSet.Unicode)]
    public static extern bool WinHttpCloseHandle(IntPtr hInternet);
 
    #endregion
     
    [DllImport("kernel32.dll")]
    public static extern int GetLastError(); <br><br><br><br>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
private string getProxyForUrlUsingPac(string DestinationUrl, string PacUri)
{
 
    IntPtr WinHttpSession = Win32Api.WinHttpOpen("User", Win32Api.WINHTTP_ACCESS_TYPE_DEFAULT_PROXY, IntPtr.Zero, IntPtr.Zero, 0);
 
    Win32Api.WINHTTP_AUTOPROXY_OPTIONS ProxyOptions = new Win32Api.WINHTTP_AUTOPROXY_OPTIONS();
    Win32Api.WINHTTP_PROXY_INFO ProxyInfo = new Win32Api.WINHTTP_PROXY_INFO();
 
    ProxyOptions.dwFlags = Win32Api.WINHTTP_AUTOPROXY_CONFIG_URL;
    ProxyOptions.dwAutoDetectFlags = (Win32Api.WINHTTP_AUTO_DETECT_TYPE_DHCP | Win32Api.WINHTTP_AUTO_DETECT_TYPE_DNS_A);
    ProxyOptions.lpszAutoConfigUrl = PacUri;
 
    // Get Proxy
    bool IsSuccess = Win32Api.WinHttpGetProxyForUrl(WinHttpSession, DestinationUrl, ref ProxyOptions, ref ProxyInfo);
 
    Win32Api.WinHttpCloseHandle(WinHttpSession);
 
    if (IsSuccess)
    {
        return ProxyInfo.lpszProxy;
    }
    else
    {
        Console.WriteLine("Error: {0}", Win32Api.GetLastError());
        return null;
    }
}

  使用时,request.Proxy = new WebProxy(getProxyForUrlUsingPac(url, pac));

这里要注意一点,HttpWebRequest设置代理后,不要设置太多的Http Header,否则容易出问题。

3. 读取cookieContainer里的cookie

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
Hashtable table = (Hashtable)cookie.GetType().InvokeMember("m_domainTable",
                                                                          BindingFlags.NonPublic |
                                                                          BindingFlags.GetField |
                                                                          BindingFlags.Instance,
                                                                          null,
                                                                          cookie,
                                                                          new object[] { });
 
 
 
            foreach (var tableKey in table.Keys)
            {
                String str_tableKey = (string)tableKey;
 
                if (str_tableKey[0] == '.')
                {
                    str_tableKey = str_tableKey.Substring(1);
                }
 
                SortedList list = (SortedList)table[tableKey].GetType().InvokeMember("m_list",
                                                                            BindingFlags.NonPublic |
                                                                            BindingFlags.GetField |
                                                                            BindingFlags.Instance,
                                                                            null,
                                                                            table[tableKey],
                                                                            new object[] { });
 
                foreach (var listKey in list.Keys)
                {
                    String uri = "https://" + str_tableKey + (string)listKey;
                    foreach (Cookie c in cookie.GetCookies(new Uri(uri)))
                    {//取cookie的Name, Value等属性,上面是https的domain,不难改写,增加支持http
 
                    }
                }
            }

  

posted @   平静寄居者  阅读(429)  评论(0编辑  收藏  举报
编辑推荐:
· AI与.NET技术实操系列:向量存储与相似性搜索在 .NET 中的实现
· 基于Microsoft.Extensions.AI核心库实现RAG应用
· Linux系列:如何用heaptrack跟踪.NET程序的非托管内存泄露
· 开发者必知的日志记录最佳实践
· SQL Server 2025 AI相关能力初探
阅读排行:
· 震惊!C++程序真的从main开始吗?99%的程序员都答错了
· 【硬核科普】Trae如何「偷看」你的代码?零基础破解AI编程运行原理
· 单元测试从入门到精通
· 上周热点回顾(3.3-3.9)
· winform 绘制太阳,地球,月球 运作规律
点击右上角即可分享
微信分享提示