煎蛋 无聊图解析
import requests from html.parser import HTMLParser class MyHtmlParser(HTMLParser): srclist = {} count = 0 def handle_starttag(self, tag, attrs): if tag == 'img': #print("Encountered a start tag:", tag) for x in attrs: if x[0] in ('src','org_src') and x[1].find('jandan') == -1: #print(x[1].find('jandan'),x[1]) self.srclist[self.count] = x[1] self.count += 1 def handle_endtag(self, tag): if tag == 'img': pass #print("Encountered an end tag:", tag) def handle_data(self, data): pass #print("Encountered some data:", data) for x in range(4000,6002): iurl = 'http://jandan.net/pic/page-'+str(x) r = requests.get(iurl) parser = MyHtmlParser() parser.feed(str(r.content)) for k, v in parser.srclist.items(): print(v)