Python3 urllib库常用方法

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
'''
    GET请求方式
    POST请求方式
    超时timeout,异常处理
    响应类型(响应码,响应头...)
    POST请求添加Headers
    代理方法
    cookie添加 读取
    ---------- parse 包下 -----------
    urlparse 解析网址
    urlunparse 拼接网址
    urlencode GET参数化(比较有用)
     
'''
 
import urllib.request
import urllib.parse
 
# -------------------------- GET 方式(不加data)
# response = urllib.request.urlopen('http://www.baidu.com')
# print(response.read().decode('utf-8'))
 
# -------------------------  POST方式 (加data)
# data = bytes(urllib.parse.urlencode({'word': 'hello'}), encoding='utf8')
# response = urllib.request.urlopen('http://httpbin.org/post', data=data)
# print(response.read().decode('utf8'))
 
# -------------------------  超时timeout
# response = urllib.request.urlopen('http://httpbin.org/get', timeout=1)
# print(response.read())
# import socket
# import urllib.error
# try:
#     response = urllib.request.urlopen('http://httpbin.org/get', timeout=0.1)
# except urllib.error.URLError as e:
#     if isinstance(e.reason, socket.timeout):
#         print('TIME OUT')
 
 
# --------------------------- 响应类型
# response = urllib.request.urlopen('http://www.python.org')
# print(type(response))  # <class 'http.client.HTTPResponse'>
 
# --------------------------  状态码 响应头
# response = urllib.request.urlopen('http://www.python.org')
# print(response.status)  # 响应码
# print(response.getheaders())  # 响应头
# print(response.getheader('Server'))  # 响应的服务
 
# 获取响应内容
# response = urllib.request.urlopen('http://www.python.org')
# print(response.read().decode('utf-8'))  # read() 获取bytes类型
 
# ------------------------------  加入Headers, 发送一个POST 请求
# from urllib import parse, request
# url = 'http://httpbin.org/post'
# headers = {
#     'User-Agent': 'Mozilla/4.0 (compatible;MSIE 5.5; Windows NT)',
#     'Host': 'httpbin.org'
# }
# dict = {
#     'name': 'Germey'
# }
# data = bytes(parse.urlencode(dict), encoding='utf8')
# req = request.Request(url=url, data=data, headers=headers, method='POST')
# response = request.urlopen(req)
# print(response.read().decode('utf-8'))
# - ----------------------- 代理
# proxy_handler = urllib.request.ProxyHandler({
#     'http': 'http://127.0.0.1:2222',
#     'https': 'https://127.0.0.1:2211'
# })
# opener = urllib.request.build_opener(proxy_handler)
# response = opener.open('http://www.baidu.com')
# print(response.read())
 
 
 
 
# ------------------------- cookie
# import http.cookiejar, urllib.request
# cookie = http.cookiejar.CookieJar()
# handler = urllib.request.HTTPCookieProcessor(cookie)
# opener = urllib.request.build_opener(handler)
# response = opener.open('http://www.baidu.com')
# for item in cookie:
#     print(item.name+'='+item.value)  # 打印key-value
############ cookie 保存为txt
# import http.cookiejar, urllib.request
# filename = 'cookie.txt'
# cookie = http.cookiejar.LWPCookieJar(filename)
# handler = urllib.request.HTTPCookieProcessor(cookie)
# opener = urllib.request.build_opener(handler)
# response = opener.open('http://www.baidu.com')
# cookie.save(ignore_discard=True, ignore_expires=True)
############## 读取cookie
# import http.cookiejar, urllib.request
# cookie = http.cookiejar.LWPCookieJar()
# cookie.load('cookie.txt',ignore_discard=True ,ignore_expires=True)
# handler = urllib.request.HTTPCookieProcessor(cookie)
# opener = urllib.request.build_opener(handler)
# response = opener.open('http://ww.baidu.com')
# print(response.read().decode('utf-8'))
 
 
 
 
############# 异常处理
# from urllib import request, error
# try:
#     response = request.urlopen('http://wwwwwwww.com')
# except error.URLError as e:
#     print(e.reason, 'xxx')
############# 异常处理2
# from urllib import request, error
# try:
#     response = request.urlopen('http://www.baidu.com/aa/aass')
# except error.HTTPError as e:
#     print(e.reason, e.code, e.headers, sep='\n')
# except error.URLError as e:
#     print(e.reason)
# else:
#     print('Request Successfully')
###########  异常类型3
# import socket, urllib.request, urllib.error
# try:
#     response = urllib.request.urlopen('https://www.baidu.com',timeout=0.01)
# except urllib.error.URLError as e:
#     print(type(e.reason))
#     if isinstance(e.reason, socket.timeout):  # 判断异常是什么类型
#         print('TIME OUT')
 
 
 
 
# --------------------- URL解析
from urllib.parse import urlparse
# 一个参数
# result = urlparse('http://www.baidu.com/index.html;user?id=5#comment')
# print(type(result), result)
# 指定协议, 如果没有取https, 有就用url带的
# result = urlparse(scheme='https',url='http://www.baidu.com/index.html;user?id=5#comment')
# print(type(result), result)
 
# allow_fragments=False 一般不会用,把锚链接部分移动到参数(没有参数在往前移动#XXXX)
# result = urlparse(allow_fragments=False, url='http://www.baidu.com/index.html;user?id=5#comment')
# print(result)
 
# ---------------------------------- urlunparse  拼接网站
# from urllib.parse import urlunparse
# data = ['http', 'www.baidu.com', 'index.html', 'user', 'a=1', 'comment']
# print(urlunparse(data))
# ----------------------------     urljoin
from urllib.parse import urljoin
# 拼接
# print(urljoin('http://www.baidu.com', 'Faq.html'))
# 以第二个位基准
# print(urljoin('http://www.baidu.com', 'https://www.baidu.com/aaa'))
# 拼接
# print(urljoin('http://www.baidu.com', '?a=1'))
 
 
 
################    urlencode  参数化get请求参数
# from urllib.parse import urlencode
# params = {
#     'name': 'kaige',
#     'age': '22'
# }
# base_url = 'http://www.baidu.com?'
# url = base_url+ urlencode(params)
# print(url)

  

posted @   qukaige  阅读(264)  评论(0编辑  收藏  举报
编辑推荐:
· .NET Core 中如何实现缓存的预热?
· 从 HTTP 原因短语缺失研究 HTTP/2 和 HTTP/3 的设计差异
· AI与.NET技术实操系列:向量存储与相似性搜索在 .NET 中的实现
· 基于Microsoft.Extensions.AI核心库实现RAG应用
· Linux系列:如何用heaptrack跟踪.NET程序的非托管内存泄露
阅读排行:
· TypeScript + Deepseek 打造卜卦网站:技术与玄学的结合
· 阿里巴巴 QwQ-32B真的超越了 DeepSeek R-1吗?
· 【译】Visual Studio 中新的强大生产力特性
· 【设计模式】告别冗长if-else语句:使用策略模式优化代码结构
· 10年+ .NET Coder 心语 ── 封装的思维:从隐藏、稳定开始理解其本质意义
点击右上角即可分享
微信分享提示