linux上用selenium登录新浪微博,获取用户关注的用户id
环境:ubuntu、hadoop2.7.3
1.安装selenium
命令:sudo pip install selenium 更新:sudo pip install -U selenium
2.查看firefox版本
命令:firefox -v
3.没有查出版本的话请安装firefox
命令:sudo apt-get install firefox
注意:这步可能会报错,一般是apt需要更新(修改/etc/apt/source.list 修改后sudo apt-get update然后在安装firefox)
4.驱动firefox需要有第三方的驱动geckodriver,网上下载
5.需要将下载到的geckodriver解压放到/usr/local/bin目录下
命令:
sudo cp geckodriver-v0.14.0-linux64.tar.gz /usr/local/bin/geckodriver-v0.14.0-linux64.tar.gz
sudo tar xzf geckodriver-v0.14.0-linux64.tar.gz
sudo rm geckodriver-v0.14.0-linux64.tar.gz
6.在无界面的linux系统上需要安装虚拟的显示器
命令:sudo pip install pyvirtualdisplay selenium
7.如果系统没有安装xvfb,需要安装,才能正常运行程序
命令:sudo apt-get install xvfb
8.以下是登录微博手机版并获取用户5307898815的关注用户id代码
#-*- coding:utf-8 -*- import time import re import os import sys import codecs import shutil import urllib from selenium import webdriver from selenium.webdriver.common.keys import Keys import selenium.webdriver.support.ui as ui from selenium.webdriver.common.action_chains import ActionChains import traceback import sys reload(sys) sys.setdefaultencoding("utf-8") from pyvirtualdisplay import Display display = Display(visible=0, size=(1800, 1600)) display.start() driver = webdriver.Firefox() wait=ui.WebDriverWait(driver,10) def LoginWeibo_cn(username,password): try: print u'准备登陆weibop.cn网站..' driver.get("https://passport.weibo.cn/signin/login?entry=mweibo&r=") # print driver.current_url ,driver.page_source time.sleep(10) elem_user = driver.find_element_by_id("loginName") elem_user.send_keys(username) time.sleep(5) elem_pwd = driver.find_element_by_id("loginPassword") elem_pwd.send_keys(password) time.sleep(5) elem_sub=driver.find_element_by_id("loginAction") elem_sub.click() print u'登陆成功...' return True except Exception,e: print "Error:",e return False def VisitPersonFollowPage(uid): try: url = 'https://weibo.cn/{}/follow?page=1'.format(uid) driver.get(url) time.sleep(1) users = driver.find_elements_by_xpath('//table') print 'len(users):',len(users) for user in users: try: link = user.find_element_by_xpath('.//td/a[2]').get_attribute("href") follow_uid_re = re.search(r'uid=(\d+)&',link) if follow_uid_re: follow_uid = follow_uid_re.group(1) print '关注用户id:',follow_uid except: traceback.print_exc() except: traceback.print_exc() if __name__ == '__main__': username = '用户名' password = '密码' login_status = LoginWeibo_cn(username, password) if login_status: user_id = '5307898815' VisitPersonFollowPage(user_id) driver.quit() display.stop()
9.以下是登录新浪微博电脑网页版代码
#-*- coding:utf-8 -*- import time import re import os import sys import codecs import shutil import urllib from selenium import webdriver from selenium.webdriver.common.keys import Keys import selenium.webdriver.support.ui as ui from selenium.webdriver.common.action_chains import ActionChains import traceback import sys reload(sys) sys.setdefaultencoding("utf-8") from pyvirtualdisplay import Display display = Display(visible=0, size=(1800, 1600)) display.start() driver = webdriver.Firefox() wait=ui.WebDriverWait(driver,10) def LoginWeibo_com(username,password): try: print u'准备登陆weibop.com网站..' driver.get("http://weibo.com/login?url=") time.sleep(5) # print driver.current_url ,driver.page_source elem_user = driver.find_element_by_id('loginname') elem_user.clear() elem_user.send_keys(username) time.sleep(3) elem_pwd = driver.find_element_by_name("password") elem_pwd.send_keys(password) time.sleep(2) elem_sub = driver.find_element_by_class_name('W_login_form').find_element_by_link_text('登录') elem_sub.click() print u'登陆成功...' except: traceback.print_exc() finally: print u'End loginWeibo!\n\n' if __name__ == '__main__': username = '用户名' password = '密码' LoginWeibo_com(username, password) driver.quit() display.stop()