selenium代码实例
# 环境安装:pip install selenium
# 编码流程:
1.导报:from selenium import webdriver
2. 实例化某一款浏览器对象
3.自指定自动化操作代码
1 2 3 4 5 6 7 8 9 10 | # 使用下面的方法,查找指定的元素进行操作 find_element_by_id 根据 id 找节点 find_elements_by_name 根据name找 find_elements_by_xpath 根据xpath查找 find_elements_by_tag_name 根据标签名找 find_elements_by_class_name 根据 class 名字查找 # 截屏保存 browser.save_screenshot(r 'phantomjs\baidu.png' ) # 退出驱动程序 driver.quit() |
# 自动打开百度搜索人民币
# 自动打开百度搜索人民币 from selenium import webdriver from time import sleep
bro = webdriver.Chrome(executable_path=r'C:\Users\Administrator\Desktop\chromedriver_win32\chromedriver.exe') bro.get(url='https://www.baidu.com/') sleep(2) text_input = bro.find_element_by_id('kw') # send_keys 给input标签输入 text_input.send_keys('人民币') sleep(2) bro.find_element_by_id('su').click() sleep(3) #获取当前的页面源码数据(渲染后的数据) print(bro.page_source) bro.quit()
#获取豆瓣电影中更多电影详情数据
#获取豆瓣电影中更多电影详情数据 from selenium import webdriver from time import sleep #谷歌无头浏览器 from selenium.webdriver.chrome.options import Options chrome_options = Options() chrome_options.add_argument('--headless') chrome_options.add_argument('--disable-gpu') url = 'https://movie.douban.com/typerank?type_name=%E6%83%8A%E6%82%9A&type=19&interval_id=100:90&action=' bro = webdriver.Chrome(executable_path=r'C:\Users\Administrator\Desktop\chromedriver_win32\chromedriver.exe',chrome_options=chrome_options) bro.get(url) sleep(3)
# 执行JS代码,自动向下划 bro.execute_script('window.scrollTo(0,document.body.scrollHeight)') sleep(3) bro.execute_script('window.scrollTo(0,document.body.scrollHeight)') sleep(3) bro.execute_script('window.scrollTo(0,document.body.scrollHeight)') sleep(2) page_text = bro.page_source with open('./douban.html','w',encoding='utf-8') as fp: fp.write(page_text) sleep(1) bro.quit()
#登录qq空间爬取主页
from selenium import webdriver
from time import sleep
bro = webdriver.Chrome(executable_path=r'C:\Users\Administrator\Desktop\chromedriver_win32\chromedriver.exe') url = 'https://qzone.qq.com/' bro.get(url=url) sleep(2) #定位到一个具体的iframe bro.switch_to.frame('login_frame') bro.find_element_by_id('switcher_plogin').click() sleep(2) bro.find_element_by_id('u').send_keys('332424') bro.find_element_by_id('p').send_keys('dsaafa020@') bro.find_element_by_id('login_button').click() sleep(5) page_text = bro.page_source with open('qq.html','w',encoding='utf-8') as fp: fp.write(page_text) bro.quit()
PhantomJS使用(做无头浏览器)(被弃用)
PhantomJS的作者ariya在PhantomJS的GitHub页面的issue #15344中写道:由于缺乏积极的贡献,我将会存档该项目。如果将来我们又重新开发这个项目的话,这个项目还会被取出来。因此,所有的之前的关于PhantomJS 2.5(由 @Vitallium 提起)和PhantomJS 2.1.x(由 @pixiuPL 提起)的计划也会废弃。接下来,为了防止混淆,上述被废弃的版本的源码和二进制包也会被删除。在未来的通知之前,PhantomJS 2.1.1将会是已知最后的稳定版本。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 | #获取豆瓣电影中更多电影详情数据 from selenium import webdriver from time import sleep url = 'https://movie.douban.com/typerank?type_name=%E6%83%8A%E6%82%9A&type=19&interval_id=100:90&action=' bro = webdriver.PhantomJS(executable_path = r 'C:\Users\Administrator\Desktop\爬虫+数据\day_03_爬虫\phantomjs-2.1.1-windows\bin\phantomjs.exe' ) bro.get(url) sleep( 3 ) bro.execute_script( 'window.scrollTo(0,document.body.scrollHeight)' ) sleep( 3 ) bro.execute_script( 'window.scrollTo(0,document.body.scrollHeight)' ) sleep( 3 ) bro.execute_script( 'window.scrollTo(0,document.body.scrollHeight)' ) sleep( 2 ) page_text = bro.page_source with open ( './douban.html' , 'w' ,encoding = 'utf-8' ) as fp: fp.write(page_text) sleep( 1 ) bro.quit() |
# 爬取微信公众号文章
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 | from selenium import webdriver from lxml import etree #谷歌无头浏览器 from selenium.webdriver.chrome.options import Options chrome_options = Options() chrome_options.add_argument( '--headless' ) chrome_options.add_argument( '--disable-gpu' ) # Fiddler抓包公众号历史文章URL url = 'https://mp.weixin.qq.com/mp/profile_ext?action=home&__biz=MjM5NzU0MzU0Nw==&scene=124&uin=MzQxNDc2MTIxOQ%3D%3D&key=5fa67e91c99877c92cab8f76d9eba741f20e126dcf62c0a8a42af6c159ae91cc6d9b27dd799b89357259a82e1375e1f275a1960f43e003ac9b5baba11703172d08c866f9bd6aa20534932779237f7fe8&devicetype=Windows+7&version=62080085&lang=zh_CN&a8scene=7&pass_ticket=bB%2BcRIlVVqJKLAN%2FLxVVoWiJXecI7JA3Ttwfs%2FWX0zIjxaW1KxSt6Z2wvmXr8tv0&winzoom=1' bro = webdriver.Chrome(executable_path = r 'C:\Users\Administrator\Desktop\chromedriver_win32\chromedriver.exe' ,chrome_options = chrome_options) bro.get(url) sleep( 3 ) bro.execute_script( 'window.scrollTo(0,document.body.scrollHeight)' ) sleep( 3 ) bro.execute_script( 'window.scrollTo(0,document.body.scrollHeight)' ) sleep( 3 ) bro.execute_script( 'window.scrollTo(0,document.body.scrollHeight)' ) sleep( 2 ) page_text = bro.page_source with open ( './douban.html' , 'w' ,encoding = 'utf-8' ) as fp: fp.write(page_text) sleep( 1 ) bro.quit() with open ( './douban.html' , 'r' ,encoding = "utf-8" ) as f: text_html = f.read() etree_page = etree.HTML(text_html) # 获取所有文章的链接 div_list = etree_page.xpath( "//div[@class='weui_media_box appmsg js_appmsg']/@hrefs" ) # 下载公众号文章每篇文章 for url in div_list: try : bro = webdriver.Chrome(executable_path = r 'C:\Users\Administrator\Desktop\chromedriver_win32\chromedriver.exe' ,chrome_options = chrome_options) bro.get(url) page_text = bro.page_source t = etree.HTML(page_text) text = t.xpath( "//h2[@id='activity-name']/text()" )[ 0 ].strip() filename = r "C:\Users\Administrator\Desktop\html\%s.html" % text with open (filename, 'w' ,encoding = 'utf-8' ) as fp: fp.write(page_text) print (page_text) except Exception as e: print (e) bro.quit() |
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 开发者必知的日志记录最佳实践
· SQL Server 2025 AI相关能力初探
· Linux系列:如何用 C#调用 C方法造成内存泄露
· AI与.NET技术实操系列(二):开始使用ML.NET
· 记一次.NET内存居高不下排查解决与启示
· 阿里最新开源QwQ-32B,效果媲美deepseek-r1满血版,部署成本又又又降低了!
· 开源Multi-agent AI智能体框架aevatar.ai,欢迎大家贡献代码
· Manus重磅发布:全球首款通用AI代理技术深度解析与实战指南
· 被坑几百块钱后,我竟然真的恢复了删除的微信聊天记录!
· AI技术革命,工作效率10个最佳AI工具