urllib爬取实例
#汉字转码、多个参数拼接 from urllib import request base_url = "http://www.baidu.com/s?" content = input("请输入你要搜索的内容:") qs = { "wd":content, "rsv_sp":1 } #将汉字转成unicode码 from urllib import parse qs = parse.urlencode(qs) #wd=%E5%85%84%E5%BC%9F%E8%BF%9E print(qs) base_url = base_url+qs #http://www.baidu.com/s?wd=%E5%85%84%E5%BC%9F%E8%BF%9E headers = { "user_agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36" } req = request.Request(base_url,headers = headers) response = request.urlopen(req) html = response.read() html = html.decode("utf-8") with open(content+".html","w",encoding="utf-8") as f: f.write(html)