漫话爬取
#!/usr/bin/env python # _*_ coding: utf-8 _*_ # __author__ ='kong' from selenium import webdriver import os import time class GetCartoon(object): def __init__(self): self.startUrl = "http://www.1kkk.com/ch1-406302/" self.browser = self.getBrowser() self.saveCartoon() def getBrowser(self): browser = webdriver.PhantomJS() try: browser.get(self.startUrl) browser.implicitly_wait(5) print "url get ok" except: print "url get failed" return browser def createDir(self,fileName): if os.path.exists(fileName): print "dir is exists" else: os.makedirs(fileName) print "dir create ok" def saveCartoon(self): fileName = self.browser.title.split("_")[0] self.createDir(fileName) os.chdir(fileName) # 总共26页 sumPage = int(self.browser.find_element_by_xpath("//font[@class='zf40']/span[2]").text) i = 1 while i <= sumPage: imgName = str(i)+'.png' # html页面保存成图片 self.bowser.get_screenshot_as_file(imgName) i += 1 # 点击下一页按扭 NextTag = self.browser.find_element_by_id("next") NextTag.click() # 3秒后执行保存图片操作 time.sleep(3) self.browser.close() if __name__ == '__main__': GC = GetCartoon()