python爬虫demo01
1 import requests, json, time, sys 2 from bs4 import BeautifulSoup 3 from contextlib import closing 4 5 url = 'https://image.xiaozhustatic1.com/12/9,0,27,3473,1800,1200,d064ccfb.jpg' 6 headers = { 7 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36' 8 } 9 10 url2 = 'http://bj.xiaozhu.com/' 11 12 13 def get_img_src_list(url): 14 """ 15 获取图片的url 16 :param url: 17 :return: 18 """ 19 res = requests.get(url, headers=headers) 20 res_data = BeautifulSoup(res.text, 'lxml') 21 imgs = res_data.select('#page_list > ul > li > a > img') 22 # titles = res_data.select('#page_list > ul > li > div.result_btm_con.lodgeunitname > div > a > span') 23 # print( imgs ) 24 img_srcs = [] 25 img_alt = [] 26 for img in imgs: 27 img_srcs.append(img.get('lazy_src')) 28 return img_srcs 29 30 31 def downloadPic(url, pic_name): 32 """ 33 下载图片 34 :param url: 35 :param pic_name: 36 :return: 37 """ 38 res = requests.get(url, headers=headers, stream=True) 39 with closing(res) as r: 40 with open('%d.jpg' %pic_name, 'ab+') as f: 41 for chunk in res.iter_content(chunk_size=1024): 42 if chunk: 43 f.write(chunk) 44 f.flush() 45 print('下载{}.jpg成功!'.format(pic_name)) 46 47 if __name__ == '__main__': 48 img_srcs = get_img_src_list(url2) 49 for i in range(len(img_srcs)): 50 print(img_srcs[i]) 51 # time.sleep(1) 52 downloadPic(img_srcs[i], i) 53 i += 1