爬虫2_python2

# -*- coding: UTF-8 -*-
# 正则表达式模块
import re
# 获取路径模块
import urllib
#时间模块
import time
def getHtml(url):
    page=urllib.urlopen(url)
    print page.getcode()
    html=page.read()
    print html
    return html
def getImag(html):
    x=0
    imglist=re.findall(r'src="(.*?\.(jpg|png))"',html)
    print imglist
    for imgurl in imglist:
        print('正在下载 %s'%imgurl[0])
        urllib.urlretrieve(imgurl[0], 'D:\%d.jpg'%x)
        x+=1
        time.sleep(1)
url="https://www.so.com/s?q=图&ie=utf-8&src=se7_newtab_new"
html=getHtml(url)
getImag(html)

  

posted @ 2018-03-08 13:55  耐烦不急  阅读(211)  评论(0编辑  收藏  举报