python3网络小爬虫下载图片实战

# -*- coding: utf-8 -*-
''' Created on Tue Jan 23 20:50:42 2018 @author: zhuxueming'''
import urllib.request
import re

def get_url(url):
  res = urllib.request.Request(url)
  res.add_header('User-Agent','Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36')
  page = urllib.request.urlopen(res)
  html = page.read().decode('utf-8')
  return html

def get_img():
  html = get_url(url)
  reg = r'<img src="(.+\.jpg)"'
  pat = re.findall(reg,html)
  i = 1
  for each in pat:
    each = re.sub(r'/t/',r'/pre/',each)
    print(each)
    try:
      urllib.request.urlretrieve(each,str(i)+'.jpg',None)
      print(''+str(i)+'张下载成功')
      i +=1
    except BaseException:
      print(''+str(i)+'张下载失败')
      i +=1
      continue
     
    
if __name__ == '__main__':
  url = 'http://www.ivsky.com/tupian/yuzhou_qiguan_v21241/'
  get_img()

 

posted @ 2018-01-23 21:56  pyming  阅读(101)  评论(0编辑  收藏  举报