Python 正则表达式爬取浏览目录
1 import requests 2 import re 3 4 content = requests.get('https://book.douban.com/').text 5 pattem = re.compile('<li.*?cover.*?title="(.*?)".*?href="(.*?)".*?more-meta.*?author">(.*?)</span>.*?year">(.*?)</span>.*?</li>',re.S) 6 results = re.findall(pattem, content) 7 # print(results) 8 9 for result in results: 10 name,url,author,date = result 11 author = re.sub('\s','',author) 12 date = re.sub('\s','',date) 13 print(url,name,author,date)