爬虫学习06用selenium爬取空间
用selenium爬取空间 from selenium import webdriver from lxml import etree import time pro = webdriver.Chrome(executable_path=r'C:\Users\古月蜀黍\Desktop\chromedriver_win32\chromedriver.exe') pro.get(url='https://i.qq.com/?s_url=http%3A%2F%2Fuser.qzone.qq.com%2F1355144989%2Finfocenter') # 获取iframe标签 pro.switch_to.frame('login_frame') my_button = pro.find_element_by_id('switcher_plogin') my_button.click() # 输入账号密码 username = pro.find_element_by_id('u') username.send_keys('1355144989') password = pro.find_element_by_id('p') password.send_keys('liqian521.1314') login = pro.find_element_by_id('login_button') login.click() time.sleep(2) js = 'window.scrollTo(0, document.body.scrollHeight)' pro.execute_script(js) time.sleep(2) pro.execute_script(js) time.sleep(2) pro.execute_script(js) time.sleep(2) pro.execute_script(js) time.sleep(2) pro.execute_script(js) time.sleep(2) # 获取当前显示页面的源数据 page_text = pro.page_source tree = etree.HTML(page_text) text = tree.xpath('//div[@class="f-info"]//text()') print(text) pro.quit() 无界面浏览器PhantomJS from selenium import webdriver import time pro = webdriver.PhantomJS(executable_path=r'C:\Users\古月蜀黍\Desktop\文件汇总\爬虫\phantomjs\bin\phantomjs.exe') pro.get(url = 'https://www.baidu.com') # 根据find系列的函数定位到指定标签 my_input = pro.find_element_by_id('kw') # 向获取的标签中输入数据 time.sleep(2) my_input.send_keys('胡涛') pro.save_screenshot('./1.jpg') my_button = pro.find_element_by_id('su') # 给标签绑定点击事件 time.sleep(2) my_button.click() # 获取当前显示页面的源码 time.sleep(2) pro.save_screenshot('./2.jpg') page_text = pro.page_source print(page_text) # 退出页面 pro.quit() 谷歌无界面浏览器的配置 # 无界面浏览器的配置 from selenium.webdriver.chrome.options import Options chrome_options = Options() chrome_options.add_argument('--headless') chrome_options.add_argument('--disable-gpu') from selenium import webdriver import time pro = webdriver.Chrome(executable_path=r'C:\Users\古月蜀黍\Desktop\chromedriver_win32\chromedriver.exe',chrome_options=chrome_options) pro.get('https://www.baidu.com') # 根据find系列的函数定位到指定标签 my_input = pro.find_element_by_id('kw') # 向获取的标签中输入数据 time.sleep(2) my_input.send_keys('胡涛') pro.save_screenshot('./111.png') my_button = pro.find_element_by_id('su') # 给标签绑定点击事件 time.sleep(2) my_button.click() # 获取当前显示页面的源码 time.sleep(2) pro.save_screenshot('./222.png') page_text = pro.page_source print(page_text) # 退出页面 pro.quit()