爬取微博

from urllib import parse
import lxml
import requests

def parse_url():

key_word = input('请输入关键字>>:')
# 微博的url编码两次
key_word = parse.quote(key_word,encoding='utf8')
url = 'https://s.weibo.com/weibo/' + parse.quote(key_word,encoding='utf8')
return url


def get_main_html():
url = parse_url()
response = requests.get(url=url,
headers={
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36"
},
allow_redirects=False
)
if response.status_code == 200:
print(response.url)
with open('1.html','wb') as f:
for line in response.iter_lines():
f.write(line)


if __name__ == '__main__':
get_main_html()
posted @ 2020-01-09 16:12  import*  阅读(196)  评论(0编辑  收藏  举报