展开
拓展 关闭
订阅号推广码
GitHub
视频
公告栏 关闭

python爬虫入门

  • 将百度页面保存到本地
from urllib.request import urlopen
url = "http://www.baidu.com/"
r = urlopen(url)
b = r.read().decode('utf-8')
with open("a.html",mode="w",encoding="utf-8") as f:
f.write(b)
print("OK")
  • 安装库
pip install requests
  • 获取网页源码
import requests as req
url = "https://sogou.com/web?query=刘德华"
head = {
"User-Agent":
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 Edg/118.0.2088.76"
}
res = req.get(url,headers=head)
print(res.text)
res.close()
  • 发送post请求
import requests as req
url = "https://fanyi.baidu.com/sug"
head = {
"User-Agent":
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 Edg/118.0.2088.76"
}
while 1:
dc = input("请输入要翻译的单词(q退出):")
if dc == "q":
break
data = {
"kw": dc
}
res = req.post(url, headers=head, data=data)
result = res.json()["data"] # 返回数据
for d in result: # 遍历数据
for k,v in d.items():
print(v)
  • 返回json数据
import requests as req
url = "https://movie.douban.com/j/chart/top_list"
head = {
"User-Agent":
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 Edg/118.0.2088.76"
}
data = {
"type": 24,
"interval_id": "100:90",
"action": "",
"start": 0,
"limit": 20,
}
res = req.get(url, params=data, headers=head)
redic = res.json()
print(redic)
  • 获取数据保存到本地
import requests as req
url = "https://movie.douban.com/j/chart/top_list?type=11&interval_id=100%3A90&action=&start=0&limit=20"
head = {
"User-Agent":
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 Edg/118.0.2088.76"
}
res = req.get(url, headers=head)
data = res.text
data1 = data.split("is_watched")
with open("C:\\work\\a.txt", mode="a", encoding="utf-8") as f:
a = data1[0]
b = a[2:]
c = b[0:-2]
f.write(f"{c}\n")
for i in range(1, 20):
a1 = data1[i]
b1 = a1[10:]
c1 = b1[0:-2]
f.write(f"{c1}\n")
print("完成")
res.close()
posted @   DogLeftover  阅读(31)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· DeepSeek 开源周回顾「GitHub 热点速览」
· 物流快递公司核心技术能力-地址解析分单基础技术分享
· .NET 10首个预览版发布:重大改进与新特性概览!
· AI与.NET技术实操系列(二):开始使用ML.NET
· 单线程的Redis速度为什么快?
历史上的今天:
2022-11-10 idea使用token
点击右上角即可分享
微信分享提示