爬取豆瓣电影
- 代码
# _*_ coding::utf_8 _*_ import re import urllib.parse import urllib.request url='https://movie.douban.com/j/search_subjects?type=movie&tag=%E7%A7%91%E5%B9%BB&sort=rank&' # page=int(input("请输入要查询的页数:")) formdata={ 'page_limit':30000, 'page_start':(2-1)*30, } headers={ 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36', } formdata=urllib.parse.urlencode(formdata) url=url+formdata request=urllib.request.Request(url=url,headers=headers) response=urllib.request.urlopen(request).read().decode() print(response) film={} film['name']=re.findall(r'"title":"(.*?)","url":',response) # film['name']=re.sub(r'"title":"([A-Za-z0-9\!\%\[\]\,\。])","url":"https:\\/\\/movie.douban.com\\'," ",response) print(film['name'])
- 补充
posted on 2019-09-24 22:13 ybl20000418 阅读(99) 评论(0) 编辑 收藏 举报