20171123初学demo爬去网页资料

一、工具vs2015 +python3.5

import urllib.request 
import urllib.error
import re


def getcontent(url,page):
     head=("User-Agent","Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko")
     opener=urllib.request.build_opener()
     opener.addheaders=[head]
     urllib.request.install_opener(opener)
     try:
       data=urllib.request.urlopen(url).read().decode("utf-8")
       contentpat='<div class="content">(.*?)</div>'
       contentlist=re.compile(contentpat,re.S).findall(data)
       for cont in contentlist:
          print(cont)
     except urllib.error.URLError as e:
       print(e.reason)


for i in range(1,29):
    url="https://www.qiushibaike.com/8hr/page/"+str(i)
    getcontent(url,i)

 模仿浏览访问,正则表达式匹配内容,打印结果

posted @ 2017-11-23 14:58  soar.pang  阅读(185)  评论(0编辑  收藏  举报