urllib_1_基本使用
"""
.-''-.
.--. _..._ .' .-. )
|__| .' '. / .' / /
.--..-,.--. . .-. . (_/ / /
| || .-. | | ' ' | / /
| || | | | _ _ | | | | / / _ _
| || | | || ' / | | | | | . ' | ' / |
| || | '-.' | .' | | | | | / / _.-').' | .' |
|__|| | / | / | | | | | .' ' _.'.-'' / | / |
| | | `'. | | | | | / /.-'_.' | `'. |
|_| ' .'| '/| | | | / _.' ' .'| '/
`-' `--' '--' '--'( _.-' `-' `--'
Created on 2023/3/23 20:54.
@Author: haifei
"""
import time
import urllib.request
if __name__ == '__main__':
start = time.time()
url = 'http://www.baidu.com'
response = urllib.request.urlopen(url)
content = response.read()
content = content.decode('utf-8')
print(content)
print('It takes', time.time() - start, "seconds.")
urllib_2_一个类型和六个方法
"""
.-''-.
.--. _..._ .' .-. )
|__| .' '. / .' / /
.--..-,.--. . .-. . (_/ / /
| || .-. | | ' ' | / /
| || | | | _ _ | | | | / / _ _
| || | | || ' / | | | | | . ' | ' / |
| || | '-.' | .' | | | | | / / _.-').' | .' |
|__|| | / | / | | | | | .' ' _.'.-'' / | / |
| | | `'. | | | | | / /.-'_.' | `'. |
|_| ' .'| '/| | | | / _.' ' .'| '/
`-' `--' '--' '--'( _.-' `-' `--'
Created on 2023/3/23 21:01.
@Author: haifei
"""
import time
from urllib import request
url = "http://irun2u.top"
response = request.urlopen(url)
print(type(response))
print(response.getcode())
print(response.geturl())
print(response.getheaders())
if __name__ == '__main__':
start = time.time()
print('It takes', time.time() - start, "seconds.")
urllib_3_下载
"""
.-''-.
.--. _..._ .' .-. )
|__| .' '. / .' / /
.--..-,.--. . .-. . (_/ / /
| || .-. | | ' ' | / /
| || | | | _ _ | | | | / / _ _
| || | | || ' / | | | | | . ' | ' / |
| || | '-.' | .' | | | | | / / _.-').' | .' |
|__|| | / | / | | | | | .' ' _.'.-'' / | / |
| | | `'. | | | | | / /.-'_.' | `'. |
|_| ' .'| '/| | | | / _.' ' .'| '/
`-' `--' '--' '--'( _.-' `-' `--'
Created on 2023/3/23 21:16.
@Author: haifei
"""
import time
from urllib import request
url_page = 'http://irun2u.top'
request.urlretrieve(url_page, './download/irun2utop.html')
url_img = 'https://gimg2.baidu.com/image_search/src=http%3A%2F%2Fsafe-img.xhscdn.com%2Fbw1%2F91239c50-d064-4ec1-b998-1e5f979c9c46%3FimageView2%2F2%2Fw%2F1080%2Fformat%2Fjpg&refer=http%3A%2F%2Fsafe-img.xhscdn.com&app=2002&size=f9999,10000&q=a80&n=0&g=0n&fmt=auto?sec=1682170811&t=53fd80c95575efcc38e04269a4addf3f'
request.urlretrieve(url=url_img, filename='./download/lisa.jpg')
url_video = 'https://vd4.bdstatic.com/mda-kg0pcztgi0rucsza/v1-cae/sc/mda-kg0pcztgi0rucsza.mp4?v_from_s=hkapp-haokan-nanjing&auth_key=1679580855-0-0-293c71bb38a72b92a305768a159a1da1&bcevod_channel=searchbox_feed&pd=1&cd=0&pt=3&logid=2655222603&vid=10392909521055706475&abtest=107353_1&klogid=2655222603'
request.urlretrieve(url_video, './download/lisa.mp4')
if __name__ == '__main__':
start = time.time()
print('It takes', time.time() - start, "seconds.")
urllib_4_请求对象定制
"""
.-''-.
.--. _..._ .' .-. )
|__| .' '. / .' / /
.--..-,.--. . .-. . (_/ / /
| || .-. | | ' ' | / /
| || | | | _ _ | | | | / / _ _
| || | | || ' / | | | | | . ' | ' / |
| || | '-.' | .' | | | | | / / _.-').' | .' |
|__|| | / | / | | | | | .' ' _.'.-'' / | / |
| | | `'. | | | | | / /.-'_.' | `'. |
|_| ' .'| '/| | | | / _.' ' .'| '/
`-' `--' '--' '--'( _.-' `-' `--'
Created on 2023/3/23 21:52.
@Author: haifei
"""
import time
from urllib import request
url = 'https://www.baidu.com'
response = request.urlopen(url)
content = response.read().decode('utf-8')
print(content)
'''
UA反爬
UA介绍:User Agent中文名为用户代理,简称 UA,它是一个特殊字符串头,使得服务器能够识别客户使用的操作系统
及版本、CPU 类型、浏览器及版本。浏览器内核、浏览器渲染引擎、浏览器语言、浏览器插件等
UA大全参考:https://blog.csdn.net/Uridis/article/details/86558811
'''
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36'
}
request2 = request.Request(url=url, headers=headers)
response2 = request.urlopen(request2)
content2 = response2.read().decode('utf-8')
print(content2)
if __name__ == '__main__':
start = time.time()
print('It takes', time.time() - start, "seconds.")
urllib_5_get请求的quote方法
"""
.-''-.
.--. _..._ .' .-. )
|__| .' '. / .' / /
.--..-,.--. . .-. . (_/ / /
| || .-. | | ' ' | / /
| || | | | _ _ | | | | / / _ _
| || | | || ' / | | | | | . ' | ' / |
| || | '-.' | .' | | | | | / / _.-').' | .' |
|__|| | / | / | | | | | .' ' _.'.-'' / | / |
| | | `'. | | | | | / /.-'_.' | `'. |
|_| ' .'| '/| | | | / _.' ' .'| '/
`-' `--' '--' '--'( _.-' `-' `--'
Created on 2023/3/23 22:13.
@Author: haifei
"""
import time
from urllib import request, parse
name = parse.quote('周杰伦')
print(name)
url = 'https://www.baidu.com/s?ie=UTF-8&wd=' + name
print(url)
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36'
}
request2 = request.Request(url=url, headers=headers)
response = request.urlopen(request2)
content = response.read().decode('utf-8')
print(content)
if __name__ == '__main__':
start = time.time()
print('It takes', time.time() - start, "seconds.")
urllib_6_get请求的urlencode方法
"""
.-''-.
.--. _..._ .' .-. )
|__| .' '. / .' / /
.--..-,.--. . .-. . (_/ / /
| || .-. | | ' ' | / /
| || | | | _ _ | | | | / / _ _
| || | | || ' / | | | | | . ' | ' / |
| || | '-.' | .' | | | | | / / _.-').' | .' |
|__|| | / | / | | | | | .' ' _.'.-'' / | / |
| | | `'. | | | | | / /.-'_.' | `'. |
|_| ' .'| '/| | | | / _.' ' .'| '/
`-' `--' '--' '--'( _.-' `-' `--'
Created on 2023/3/23 22:59.
@Author: haifei
"""
import time
from urllib import parse, request
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36'
}
url = 'https://www.baidu.com/s?ie=UTF-8&wd=' + parse.quote('周杰伦') + '&sex=' + parse.quote('男')
print('url: ' + url)
data = {
'wd': '周杰伦',
'sex': '男'
}
url2 = 'https://www.baidu.com/s?ie=UTF-8&' + parse.urlencode(data)
print('url2: ' + url2)
base_url = 'https://www.baidu.com/s?'
base_data = {
'wd': 'Lisa',
'sex': '女',
'location': '南韩'
}
new_data = parse.urlencode(base_data)
new_url = base_url + new_data
print(new_url)
request2 = request.Request(url=new_url, headers=headers)
content = request.urlopen(request2).read().decode('utf-8')
print(content)
if __name__ == '__main__':
start = time.time()
print('It takes', time.time() - start, "seconds.")
urllib_7_post请求百度翻译之普通翻译
"""
.-''-.
.--. _..._ .' .-. )
|__| .' '. / .' / /
.--..-,.--. . .-. . (_/ / /
| || .-. | | ' ' | / /
| || | | | _ _ | | | | / / _ _
| || | | || ' / | | | | | . ' | ' / |
| || | '-.' | .' | | | | | / / _.-').' | .' |
|__|| | / | / | | | | | .' ' _.'.-'' / | / |
| | | `'. | | | | | / /.-'_.' | `'. |
|_| ' .'| '/| | | | / _.' ' .'| '/
`-' `--' '--' '--'( _.-' `-' `--'
Created on 2023/3/24 23:02.
@Author: haifei
"""
import json
import time
from urllib import request, parse
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36'
}
url = "https://fanyi.baidu.com/sug"
data = {
'kw': 'spider'
}
data = parse.urlencode(data).encode('utf-8')
print(data)
_request = request.Request(url=url, data=data, headers=headers)
print(_request)
response = request.urlopen(_request)
print(response)
content = response.read().decode("utf-8")
print(content)
print(type(content))
dic_content = eval(content)
print(type(dic_content))
data = dic_content.get('data')[0]
print(data)
print(type(data))
print(data.get('v'))
json_content = json.loads(content)
print(json_content)
print(type(json_content))
if __name__ == '__main__':
start = time.time()
print('It takes', time.time() - start, "seconds.")
urllib_8_post请求百度翻译之详细翻译
"""
.-''-.
.--. _..._ .' .-. )
|__| .' '. / .' / /
.--..-,.--. . .-. . (_/ / /
| || .-. | | ' ' | / /
| || | | | _ _ | | | | / / _ _
| || | | || ' / | | | | | . ' | ' / |
| || | '-.' | .' | | | | | / / _.-').' | .' |
|__|| | / | / | | | | | .' ' _.'.-'' / | / |
| | | `'. | | | | | / /.-'_.' | `'. |
|_| ' .'| '/| | | | / _.' ' .'| '/
`-' `--' '--' '--'( _.-' `-' `--'
Created on 2023/3/24 23:02.
@Author: haifei
"""
import json
import time
from urllib import request, parse
headers = {
'Accept': '*/*',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Acs-Token': '1679672667898_1679672667576_SlMSHXMJiE5lO9O3mCbWXoLpxMKuOCuCrmVe6FIg/IKZBgeYHKHsmtqdpt/0wzm4lRYtqHhwdh5bF9qEEols1QlVyi8FUOJsMsWtaiq3LlPe4Bg3rUMLI26ka8WrCqkw4jVHdLC+W6gtaUPft3vRHGatTpVwSwiI1qNsvjl+N7fs0qf1mF//0C3ea6IoZ4/nE1uWLWTzqHkt0TIw/FJlHUt7oNn+5fyrKP1nUBSKU00xpi+awI/Zsv7tlLLNyxrt0+ePrjepVLzrK9kEHr9zNU2Cpqox3Kc88rMb61Vuc8+YJWV4FVvyQZ1+6wQ7aPd+QuAx0RyEXTqU1YoVXFVKbeZviLGgI1POh9075YP89vo=',
'Connection': 'keep-alive',
'Content-Length': '116',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
'Cookie': 'BIDUPSID=B9B52F7273A4D0A02F4224DF0FE584E9; PSTM=1644560257; ZFY=WJR0yuV2wnPtrVSkigGW9zh6r:BS3wlaNLebcRmDOrT4:C; BAIDUID=131FA2C2E20EDCC5307B724B1B8D1609:FG=1; BAIDUID_BFESS=131FA2C2E20EDCC5307B724B1B8D1609:FG=1; Hm_lvt_64ecd82404c51e03dc91cb9e8c025574=1679671060; APPGUIDE_10_0_2=1; REALTIME_TRANS_SWITCH=1; FANYI_WORD_SWITCH=1; HISTORY_SWITCH=1; SOUND_SPD_SWITCH=1; SOUND_PREFER_SWITCH=1; ab_sr=1.0.1_NGRlOTg4M2IyMmZjMDhkNWQzYWQ2N2EzZmIxYzY3YzVhNTE4YTZmNGNjZTZiZTU4NTQ1ZThhYWNlNjU5Y2YyYWZmZDMyZTAwYjUxMzJjMWExMjVkYzQyZmU4MzVhN2JiZDVkNDBhMjEzYzJmNjZkMTJkODg4ZWNmNGY5YjNlMGRlMWM5NGU0NjE4ZDJiOTc2YTQzNDk5ZTBmYmI4NWU0NQ==; Hm_lpvt_64ecd82404c51e03dc91cb9e8c025574=1679672667',
'Host': 'fanyi.baidu.com',
'Origin': 'https://fanyi.baidu.com',
'Referer': 'https://fanyi.baidu.com/',
'sec-ch-ua': '"Google Chrome";v="111", "Not(A:Brand";v="8", "Chromium";v="111"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"macOS"',
'Sec-Fetch-Dest': 'empty',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Site': 'same-origin',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36',
'X-Requested-With': 'XMLHttpRequest',
}
url = "https://fanyi.baidu.com/v2transapi?from=en&to=zh"
data = {
'from': 'en',
'to': 'zh',
'query': 'girl',
'simple_means_flag': '3',
'sign': '780982.985479',
'token': '9d0251d64cfa1d98e5aab063d19cd487',
'domain': 'common',
}
data = parse.urlencode(data).encode('utf-8')
_request = request.Request(url=url, data=data, headers=headers)
response = request.urlopen(_request)
content = response.read().decode('utf-8')
print(content)
print(json.loads(content))
if __name__ == '__main__':
start = time.time()
print('It takes', time.time() - start, "seconds.")
https://www.bilibili.com/video/BV1Db4y1m7Ho
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 被坑几百块钱后,我竟然真的恢复了删除的微信聊天记录!
· 没有Manus邀请码?试试免邀请码的MGX或者开源的OpenManus吧
· 【自荐】一款简洁、开源的在线白板工具 Drawnix
· 园子的第一款AI主题卫衣上架——"HELLO! HOW CAN I ASSIST YOU TODAY
· Docker 太简单,K8s 太复杂?w7panel 让容器管理更轻松!