摘要:
taobao.py items.py middlewares.py pipelines.py settings.py 阅读全文
摘要:
```
# python执行lua脚本 import requests
from urllib.parse import quote lua = '''
function main(splash) return 'hello'
end
''' url = 'http://localhost:8050/execute?lua_source=' + quote(lua)
response... 阅读全文
摘要:
```
# 抓取《我不是药神》的豆瓣评论 import csv
import time
import requests
from lxml import etree fw = open('douban_comments.csv', 'w')
writer = csv.writer(fw)
writer.writerow(['comment_time','comment_content']) ... 阅读全文
摘要:
```
# 抓取今日头条,对比渲染和没有渲染的效果 import requests
from lxml import etree # url = 'http://localhost:8050/render.html?url=https://www.toutiao.com&timeout=30&wait=0.5'
url = 'https://www.toutiao.com' response... 阅读全文
摘要:
``` import requests import json import re from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.sup... 阅读全文
摘要:
```
# 抓取简书博客总阅读量
# https://www.jianshu.com/u/130f76596b02
import requests
import json
import re
from lxml import etree header = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,... 阅读全文