urllib库爬取实例
from urllib import request import random def spider(url): user_agent_list = [ "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36" ] user_agent = random.choice(user_agent_list) print(user_agent,url) headers = { "User-Agent":user_agent } req = request.Request(url,headers=headers) response = request.urlopen(req) html = response.read() html = html.decode("utf-8") # print(html) l = url.split("/") fileName = "05_"+l[-1] with open(fileName,"w",encoding="utf-8") as f: f.write(html) if __name__ == "__main__": url_list = ["http://www.langlang2017.com/index.html","http://www.langlang2017.com/route.html","http://www.langlang2017.com/FAQ.html"] for url in url_list: spider(url)