urllib基本使用1

urllib不需要下载

 1 # 使用urllib来获取百度首页的源码
 2 import urllib.request
 3 
 4 # 1、定义一个url
 5 url = 'http://www.baidu.com'
 6 
 7 # 2、模拟浏览器向服务器发送请求
 8 response = urllib.request.urlopen(url)
 9 
10 # 3、获取响应中的页面的源码
11 # read方法返回的是字节形式的二进制数据
12 content = response.read().decode('utf-8')
13 
14 print(content)
15 
16 # print(type(response))
17 # HTTPResponse类型
18 
19 # response.read()
20 # 一个字节一个字节的读
21 # response.read(5)
22 # 一次读5个字节
23 
24 # response.readline()
25 # 一次读一行
26 
27 # response.readlines()
28 # 一行一行读直至读完
29 
30 # response.getcode()
31 # 状态码
32 
33 # response.geturl()
34 # 访问的url地址
35 
36 # response.getheaders()
37 # 获取响应头

urllib下载

1 import urllib.request
2 
3 # 下载网页
4 url_page = 'http://www.baidu.com'
5 # url:下载路径,filename文件名字
6 urllib.request.urlretrieve(url_page,'baidu.html')
7 
8 # 下载图片
9 # 下载视频

定制对象

 1 import urllib.request
 2 
 3 url = 'https://www.baidu.com'
 4 
 5 # url的组成
 6 # http/https  www.baidu.com  80/443                            #
 7 # 协议            主机        端口号       路径      参数      锚点
 8 
 9 
10 headers = {
11     'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:99.0) Gecko/20100101 Firefox/99.0'
12 }
13 request = urllib.request.Request(url=url, headers=headers)
14 response = urllib.request.urlopen(request)
15 content = response.read().decode('utf-8')
16 print(content)

编解码

  get请求

 1 import urllib.request
 2 import urllib.parse
 3 
 4 url = 'https://www.baidu.com/s?wd='
 5 name = urllib.parse.quote("周杰伦")
 6 # 多参数 https://www.baidu.com/s?wd=周杰伦&sex=男
 7 data = {
 8     'wd': '周杰伦',
 9     'sex': ''
10 }
11 params = urllib.parse.urlencode(data)
12 print(params)

  post请求

 1 import urllib.request
 2 import urllib.parse
 3 
 4 
 5 url = 'https://fanyi.baidu.com/sug'
 6 headers = {
 7     'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:99.0) Gecko/20100101 Firefox/99.0'
 8 }
 9 data = {
10     'kw': '蜘蛛'
11 }
12 # post请求的参数 必须进行编码(字节码)
13 data = urllib.parse.urlencode(data).encode("utf-8")
14 
15 request = urllib.request.Request(url=url, data=data, headers=headers)
16 response = urllib.request.urlopen(request)
17 content = response.read().decode('utf-8')
18 print(content)

  cookie

 1 import urllib.request
 2 import urllib.parse
 3 
 4 url = 'https://fanyi.baidu.com/v2transapi?from=en&to=zh'
 5 
 6 headers = {"Accept": "*/*",
 7            # "Accept-Encoding": "gzip, deflate, br",
 8            "Accept-Language": "zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2",
 9             "Connection": "keep-alive", "Content-Length": "136",
10             "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
11             "Cookie": "BAIDUID=5157A4D583A89C37A7545DADAB81726C:FG=1; BIDUPSID=36874D3B080FE4D3FE00F130757B1DCB; PSTM=1618275809; __yjs_duid=1_5c8c0adae28380c2c87efcd8b022c3551618281414735; MCITY=-289%3A179%3A; Hm_lvt_64ecd82404c51e03dc91cb9e8c025574=1648619861,1649214103,1649658988,1649730956; REALTIME_TRANS_SWITCH=1; FANYI_WORD_SWITCH=1; HISTORY_SWITCH=1; SOUND_SPD_SWITCH=1; SOUND_PREFER_SWITCH=1; APPGUIDE_10_0_2=1; BDORZ=FFFB88E999055A3F8A630C64834BD6D0; Hm_lpvt_64ecd82404c51e03dc91cb9e8c025574=1649730956; ab_sr=1.0.1_MWVlMDU1ZjZmYWEwNzE5N2UxMzJmMjkwZWQ4Y2U5ZTU3NWQ2NzE1M2YxYWYxYzc3M2Y5NWM5MzQ5YzU2YTRkMTZkNTUyYzQzOWViMjJkMzdiNGQxZjAyNTYxYmRlN2Q5MTcxODg4NDFjYWMxM2I0ZThjZGZmM2YxNTNmNGJlYzNkZDczODgxMzAzMTM1NTlhYTk3ZmYxZGY2ODBkZTMzMw==",
12              "Host": "fanyi.baidu.com", "Origin": "https://fanyi.baidu.com", "Referer": "https://fanyi.baidu.com/"
13             , "Sec-Fetch-Dest": "empty", "Sec-Fetch-Mode": "cors", "Sec-Fetch-Site": "same-origin"
14             , "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:99.0) Gecko/20100101 Firefox/99.0"
15             , "X-Requested-With": "XMLHttpRequest"
16          }
17 
18 data = {
19     "from": "en",
20     "to": "zh",
21     "query": "spider",
22     "transtype": "realtime",
23     "simple_means_flag": "3",
24     "sign": "63766.268839",
25     "token": "c45b7821850766d1e62222dc6115e145",
26     "domain": "common"
27 }
28 
29 data = urllib.parse.urlencode(data).encode("utf-8")
30 
31 request = urllib.request.Request(url=url, data=data, headers=headers)
32 response = urllib.request.urlopen(request)
33 content = response.read().decode('utf-8')
34 print(content)
35 
36 import json
37 print(json.loads(content))

 

posted @ 2022-04-12 11:14  归零19  阅读(58)  评论(0编辑  收藏  举报