python爬取文本
一、给定URL, 提取小说文本
import re import requests from bs4 import BeautifulSoup if __name__ == '__main__': response = requests.get('http://www.jinyongwang.com/shen/781.html') response.encoding = 'utf-8' html = response.text soup = BeautifulSoup(html, 'html.parser') # 提取标题 title = soup.select('#title') # 提取文本 body = soup.select('p') # 打开文件 myfile = open('C:\\Users\\acm\\Desktop\\novel.txt', mode='w') # 将标题和文本写入本地文档 myfile.writelines(title[0].text + '\n') for i in body[3:]: myfile.writelines(i.text+'\n\n')