Python+Selenium爬虫实战一《将QQ今日话题发布到个人博客》
前提条件:
1、使用Wamp Server部署WordPress个人博客,网上资料较多,这里不过多介绍
思路:
1、首先qq.com首页获取到今日话题的的链接;
2、通过今日话题链接访问到今日话题,并获取今日话题的标题以及话题内容;
3、登录到本地的博客,发表今日话题。
难点:
今日话题的内容就是对应文章‘innerHTML’属性的内容
脚本实现如下:
1 #coding:utf-8 2 from selenium import webdriver 3 import time 4 5 class QQDailyHot(): 6 7 def __init__(self): 8 self.driver = webdriver.Chrome() 9 self.driver.maximize_window() 10 self.title, self.content = self.get_title_and_content_from_daily_hot() 11 12 def get_daily_hot_url(self): 13 return self.by_css('#todaytop a').get_attribute('href') 14 15 def get_title_and_content_from_daily_hot(self): 16 self.driver.get('http://www.qq.com/') 17 url = self.get_daily_hot_url() 18 self.driver.get(url) 19 title = self.by_id('sharetitle').text 20 content = self.by_id('articleContent').get_attribute('innerHTML') 21 return title, content 22 23 def quit(self): 24 self.driver.quit() 25 26 def create_post_from_daily_hot(self): 27 self.driver.get('http://localhost/wp-login.php') 28 self.login_as_test() 29 self.driver.get('http://localhost/wp-admin/post-new.php') 30 self.by_id('title').send_keys(self.title) 31 self.set_content(self.content) 32 self.by_id('publish').click() 33 34 def set_content(self, text): 35 text = text.strip() 36 js = 'document.getElementById("content_ifr").contentWindow.document.body.innerHTML=\'%s\'' %(text) 37 print(js) 38 self.driver.execute_script(js) 39 40 def login(self, user_name, password): 41 self.by_id('user_login').send_keys(user_name) 42 self.by_id('user_pass').send_keys(password) 43 self.by_id('wp-submit').click() 44 45 def login_as_test(self): 46 user_name = password = 'test' 47 self.login(user_name, password) 48 49 def by_id(self, the_id): 50 return self.driver.find_element_by_id(the_id) 51 52 def by_css(self, css): 53 return self.driver.find_element_by_css_selector(css) 54 55 def by_name(self, name): 56 return self.driver.find_element_by_name(name) 57 58 if __name__ == '__main__': 59 daily_hot = QQDailyHot() 60 daily_hot.create_post_from_daily_hot() 61 daily_hot.quit()
登录到WordPress,查看文章内容(部分截图):