几个爬虫实例

爬红楼梦小说#

#http://www.shicimingju.com/book/hongloumeng.html
import requests
from bs4 import BeautifulSoup
ret=requests.get('https://www.shicimingju.com/book/hongloumeng.html')
ret.encoding='utf-8'

soup=BeautifulSoup(ret.text,'lxml')
li_list=soup.find(class_='book-mulu').find('ul').find_all('li')
with open('hlm.txt','w',encoding='utf-8') as f:
    for li in li_list:
        content=li.find('a').text
        url='https://www.shicimingju.com'+li.find('a').get('href')
        f.write(content)
        f.write('\n')
        res_content=requests.get(url)
        res_content.encoding = 'utf-8'
        res_content.encoding=res_content.apparent_encoding
        soup2=BeautifulSoup(res_content.text,'lxml')
        content_detail=soup2.find(class_='chapter_content').text
        f.write(content_detail)
        f.write('\n')
        print(content,'写入了')

爬肯德基门店#

# http://www.kfc.com.cn/kfccda/ashx/GetStoreList.ashx?op=keyword
import requests

header = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36'
}
data = {
    'cname': '',
    'pid': 20,
    'keyword': '浦东',
    'pageIndex': 1,
    'pageSize': 10
}
ret = requests.post('http://www.kfc.com.cn/kfccda/ashx/GetStoreList.ashx?op=keyword', data=data, headers=header)
print(ret.json())