展开
拓展 关闭
订阅号推广码
GitHub
视频
公告栏 关闭

python爬虫入门

  • 将百度页面保存到本地
from urllib.request import urlopen
url = "http://www.baidu.com/"
r = urlopen(url)
b = r.read().decode('utf-8')
with open("a.html",mode="w",encoding="utf-8") as f:
    f.write(b)
print("OK")
  • 安装库
pip install requests
  • 获取网页源码
import requests as req
url = "https://sogou.com/web?query=刘德华"
head = {
"User-Agent":
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 Edg/118.0.2088.76"
}
res = req.get(url,headers=head)
print(res.text)
res.close() 
  • 发送post请求
import requests as req
url = "https://fanyi.baidu.com/sug"
head = {
    "User-Agent":
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 Edg/118.0.2088.76"
}
while 1:
    dc = input("请输入要翻译的单词(q退出):")
    if dc == "q":
        break
    data = {
        "kw": dc
    }
    res = req.post(url, headers=head, data=data)
    result = res.json()["data"]  # 返回数据
    for d in result:  # 遍历数据
        for k,v in d.items():
            print(v)
  • 返回json数据
import requests as req
url = "https://movie.douban.com/j/chart/top_list"
head = {
    "User-Agent":
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 Edg/118.0.2088.76"
}

data = {
    "type": 24,
    "interval_id": "100:90",
    "action": "",
    "start": 0,
    "limit": 20,
}
res = req.get(url, params=data, headers=head)
redic = res.json()
print(redic)
  • 获取数据保存到本地
import requests as req
url = "https://movie.douban.com/j/chart/top_list?type=11&interval_id=100%3A90&action=&start=0&limit=20"
head = {
    "User-Agent":
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 Edg/118.0.2088.76"
}
res = req.get(url, headers=head)
data = res.text
data1 = data.split("is_watched")
with open("C:\\work\\a.txt", mode="a", encoding="utf-8") as f:
    a = data1[0]
    b = a[2:]
    c = b[0:-2]
    f.write(f"{c}\n")
    for i in range(1, 20):
        a1 = data1[i]
        b1 = a1[10:]
        c1 = b1[0:-2]
        f.write(f"{c1}\n")
print("完成")
res.close()
posted @ 2023-11-10 15:12  DogLeftover  阅读(31)  评论(0编辑  收藏  举报