selenium处理动态加载数据
selenium
概念:用来完成浏览器自动化相关的操作。可以通过代码的形式制定一些基于浏览器自动化的相关操作(行为动作),当代码执行后,浏览器就会自动触发先关的事件 环境安装: pip install selenium 下载对应浏览器的驱动程序 编码流程: 导包:from selenium import webdriver 实例化某一款浏览器对象 制定相关的行为动作
访问百度
from selenium import webdriver from time import sleep bro = webdriver.Chrome(executable_path='./chromedriver.exe') #获取浏览器驱动 bro.get('https://www.baidu.com') #访问 sleep(2) #标签定位 tag_input = bro.find_element_by_id('kw') #获取百度输入框 tag_input.send_keys('人民币') #输入搜索字段 sleep(2) btn = bro.find_element_by_id('su') #获取搜索按钮 btn.click() #点击 sleep(2) bro.quit() #退出浏览器
滑动
from selenium import webdriver from time import sleep bro = webdriver.Chrome(executable_path='./chromedriver.exe') bro.get('https://xueqiu.com/') sleep(2) #执行js实现滚轮向下滑动 js = 'window.scrollTo(0,document.body.scrollHeight)' #,document.body.scrollHeight:屏幕的高度 bro.execute_script(js) sleep(2) bro.execute_script(js) sleep(2) bro.execute_script(js) sleep(2) bro.execute_script(js) sleep(2) a_tag = bro.find_element_by_xpath('//*[@id="app"]/div[3]/div/div[1]/div[2]/div[2]/a') #获取加载更多按钮 a_tag.click() sleep(5) #获取当前浏览器页面数据(动态) print(bro.page_source) bro.quit()
PhantomJs
#PhantomJs是一款无可视化界面的浏览器(免安装) from selenium import webdriver from time import sleep bro = webdriver.PhantomJS(executable_path=r'C:\Users\Administrator\Desktop\爬虫+数据\爬虫day03\phantomjs-2.1.1-windows\bin\phantomjs.exe') bro.get('https://xueqiu.com/') sleep(2) bro.save_screenshot('./1.png') #执行js实现滚轮向下滑动 js = 'window.scrollTo(0,document.body.scrollHeight)' bro.execute_script(js) sleep(2) bro.execute_script(js) sleep(2) bro.execute_script(js) sleep(2) bro.execute_script(js) sleep(2) bro.save_screenshot('./2.png') # a_tag = bro.find_element_by_xpath('//*[@id="app"]/div[3]/div/div[1]/div[2]/div[2]/a') # bro.save_screenshot('./2.png') # a_tag.click() sleep(2) #获取当前浏览器页面数据(动态) print(bro.page_source) bro.quit()
谷歌无头浏览器
from selenium import webdriver from time import sleep from selenium.webdriver.chrome.options import Options # 创建一个参数对象,用来控制chrome以无界面模式打开 chrome_options = Options() chrome_options.add_argument('--headless') chrome_options.add_argument('--disable-gpu') bro = webdriver.Chrome(executable_path='./chromedriver.exe',options=chrome_options) bro.get('https://www.baidu.com') sleep(2) bro.save_screenshot('1.png') #标签定位 tag_input = bro.find_element_by_id('kw') tag_input.send_keys('人民币') sleep(2) btn = bro.find_element_by_id('su') btn.click() sleep(2) print(bro.page_source) bro.quit()
前进和后退
#前进和后退 from selenium import webdriver from time import sleep bro = webdriver.Chrome(executable_path='./chromedriver.exe') bro.get('https://www.baidu.com') sleep(1) bro.get('http://www.goubanjia.com/') sleep(1) bro.get('https://www.taobao.com') sleep(1) bro.back() sleep(1) bro.forward() sleep(1) print(bro.page_source) bro.quit()
动作链一
from selenium import webdriver from time import sleep from selenium.webdriver import ActionChains bro = webdriver.Chrome(executable_path='./chromedriver.exe') url = 'https://www.runoob.com/try/try.php?filename=jqueryui-api-droppable' bro.get(url=url) #如果定位的标签存在于iframe标签之中,则必须经过switch_to操作在进行标签定位 bro.switch_to.frame('iframeResult') source_tag = bro.find_element_by_id('draggable') #创建一个动作连的对象 action = ActionChains(bro) action.click_and_hold(source_tag) for i in range(4): #perform表示开始执行动作链 action.move_by_offset(20,0).perform() sleep(1) bro.quit()
动作链二
from selenium import webdriver from time import sleep from selenium.webdriver import ChromeOptions from selenium.webdriver import ActionChains
#selenium避免被检测 option = ChromeOptions() option.add_experimental_option('excludeSwitches', ['enable-automation']) bro = webdriver.Chrome(executable_path='./chromedriver.exe',options=option) url = 'https://www.runoob.com/try/try.php?filename=jqueryui-api-droppable' bro.get(url=url) #如果定位的标签存在于iframe标签之中,则必须经过switch_to操作在进行标签定位 bro.switch_to.frame('iframeResult') source_tag = bro.find_element_by_id('draggable') taget_tag = bro.find_element_by_id('droppable') #创建一个动作连的对象 action = ActionChains(bro) action.drag_and_drop(source_tag,taget_tag) action.perform() sleep(3) # bro.quit()