mitdump爬取当当网APP图书目录
因为mitmproxy没办法连接数据库所以,只能先把结果保存为txt文件,再读取到数据库中。
在滑动APP界面时,对代码进行分析
import requests import re import urllib def requets(flow): flow.request.headers['User-Agent'] = 'MitmProxy' print(flow.request.headers)
保存到文本:点击下载
爬取程序
from mitmproxy import ctx import json def response(flow): print('获取列表数据中...') #url = 'http://mapi.dangdang.com/index.php?cat_path_text=%E6%80%BB%E6%A6%9C&img_size=b&bang_name_text=%E5%9B%BE%E4%B9%A6%E7%95%85%E9%94%80%E6%A6%9C&access-token=&permanent_id=20190405192112099238317917598184509&ischildren=0&bang_name=bestsell&user_client=android&pagesize=10&union_id=537-100380&time_code=849e796eaa9e66cae342dd1f47c5f032&action=bang_tushu&page=5&global_province_id=151&cat_path=01.00.00.00.00.00&client_version=9.4.2&udid=c906965ad731be703305409f738a1bad×tamp=1556893369' url = flow.request.url if flow.request.url.startswith(url): text = flow.response.text #print(text) data = json.loads(text) books = data.get('products') for book in books: info = { '书名': book.get('product_name'), '作者': book.get('author'), '价格': book.get('price').get('dangdang_price'), '封面图片': book.get('img_url'), } ctx.log.info(str(book)) with open('D:\\books.txt', 'a', encoding='utf-8') as f: f.write(json.dumps(info, ensure_ascii=False) + '\n')
结果
读取到数据库中
import pymongo from pymongo import MongoClient import json client = pymongo.MongoClient('mongodb://admin:admin@localhost:27017/') db = client['books'] collection = db['book'] with open('D:\\books.txt', 'r+', encoding='utf-8') as f: for i in f.readlines(): new = json.loads(i) try: if collection.insert(new): print("成功保存到MongoDB") except Exception: print('someing wrong with MongDB')