无敌爬虫之无头浏览器

 

 

 

 

驱动下载

https://sites.google.com/a/chromium.org/chromedriver/downloads

 

 

import bs4

import requests
from selenium import webdriver
import time


# 启动Chrome无头浏览器
options = webdriver.ChromeOptions()
options.add_argument('--headless')
options.add_argument('--disable-gpu')
# 指定驱动
driver = webdriver.Chrome(executable_path=r'C:\Program Files\Google\Chrome\Application\chromedriver.exe',options=options)

# 获得页面
driver.get("https://12345678912345789")

# 打印数据内容
time.sleep(10)
re_date = driver.page_source
# 打出页面渲染后的代码
print(re_date)
soup = bs4.BeautifulSoup(re_date, "html.parser")
# 打印出网站标题
print(driver.title)
# 查出所有p标签中的内容
for p in soup.find_all('p'):
    with open('123456789.txt','a+', encoding='utf-8') as f:
        f.write(p.text)
        print("正在写入------------------->", p.text)

driver.quit()

 

posted @ 2023-03-28 11:50  trysocket  阅读(100)  评论(0编辑  收藏  举报