煎蛋 无聊图解析

import requests
from html.parser import HTMLParser

class MyHtmlParser(HTMLParser):
    srclist = {}
    count = 0
    def handle_starttag(self, tag, attrs):
        if tag == 'img':
            #print("Encountered a start tag:", tag)
            for x in attrs:
                if x[0] in ('src','org_src') and x[1].find('jandan') == -1:
                    #print(x[1].find('jandan'),x[1])
                    self.srclist[self.count] = x[1]
            self.count += 1
    def handle_endtag(self, tag):
        if tag == 'img':
            pass
            #print("Encountered an end tag:", tag)
    def handle_data(self, data):
        pass
        #print("Encountered some data:", data)
for x in range(4000,6002):
    iurl = 'http://jandan.net/pic/page-'+str(x)
    r = requests.get(iurl)
    parser = MyHtmlParser()
    parser.feed(str(r.content))
    for k, v in parser.srclist.items():
        print(v)

 

posted @ 2015-02-13 15:10  yumuxu  阅读(997)  评论(0编辑  收藏  举报