1)python 爬取小说
#!/usr/bin/env python # -*- coding: utf-8 -*- import requests from pyquery import PyQuery as pq url = 'http://wwwm/203031' headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36' ' (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36' } # 请求网页链接 获取页面源码 r = requests.get(url, headers=headers).text doc = pq(r) # 获取网页的全部章节链接 这里去掉了前面的最新章节部分 直接从第一章开始 # div#book_detail:nth-child(2) 选取的是第二个div的内容 links = doc('div[@class="mulu"] li a').items() print(links) for link in links: download_url = link.attr('href') print(download_url) # 请求每个章节 download_page = requests.get('http://.com/'+download_url, headers=headers).text # 获取每一章节的源码 doc = pq(download_page) # 获取每一章小说的内容 contents = doc('div[@class="mcc"]').text() with open('无限化.txt', 'a+', encoding='utf8') as f: f.write(link.text()+"\n\n") f.write(contents+"\n\n") print("写入文件完成!请查看")