百度贴吧爬虫小程序源码
# _*_ coding::utf_8 _*_ import urllib.request import urllib.parse import os url='http://tieba.baidu.com/f?' start=int(input("请输入开始页码:")) end=int(input("请输入结束页码:")) name=input("请输入搜索贴吧的名字:") name1=urllib.parse.quote(name) num1=(start-1)*50 num2=(end-1)*50 if not os.path.exists(name): path = r'E:\python' os.mkdir(path+name) for i in range(start,end): data={ 'kw':name1, 'ie':'utf-8', 'pn':i, } headers={ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36' } data=urllib.parse.urlencode(data) url+=data request=urllib.request.Request(url=url,headers=headers) response=urllib.request.urlopen(request) filename=name+'_'+str(i)+".html" filepath=path+name+'/'+filename with open(filepath,'wb') as fp: fp.write(response.read()) print("下载完成第{n}页".format(n=i))
posted on 2019-09-29 19:27 ybl20000418 阅读(166) 评论(0) 编辑 收藏 举报