from selenium import webdriver
import time,random
# 设置浏览器隐藏# option = webdriver.ChromeOptions()# option.add_argument("--headless")
browser = webdriver.Chrome()
try:
browser.get('http://localhost/demo1.html')
buttons = browser.find_elements_by_class_name('mybutton')
for i inrange(10):
i = random.randint(0, 5)
buttons[i].click()
print('当前正在点击按钮',i+1)
time.sleep(1)
except Exception as e:
print(e)
browser.close()
爬取网页Java图书信息
import requests
from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
from time import sleep
driver = webdriver.Chrome()
wait = WebDriverWait(driver,4)
driver.get('http://www.ptpress.com.cn/search/books')
input = driver.find_element_by_id('searchVal')
input.send_keys('Java')
button = driver.find_element_by_xpath('//div[@class="search_main down_search"]/button')
button.click()
sleep(5)
divs = driver.find_elements_by_class_name('book_item')
for div in divs:
img = div.find_elements_by_tag_name('img')[0].get_attribute('src') #依旧是WebElement对象
title = div.text
print('当前下载:',title," ",img)
fileurl = "./实训二/" + title + ".jpg"
r = requests.get(img)
withopen(fileurl, 'wb') as f:
f.write(r.content)
driver.close()
将数据存储到MongoDB数据库中
import pymongo,requests,json
ua = {'User-Agent' : 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) Chrome/65.0.3325.181'}
url = 'https://www.ptpress.com.cn/bookinfo/getBookListForWS'
client = pymongo.MongoClient('localhost:27017')
db = client['test']
col = db['webspider']
res = requests.get(url,headers = ua).text
data = json.loads(res)
news = data['data']
for i in news:
name = i['bookName']
author = i['author']
price = i['price']
a = {'name':name,'author':author,'price':price}
col.insert_one(a)
print(a)
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· winform 绘制太阳,地球,月球 运作规律
· AI与.NET技术实操系列(五):向量存储与相似性搜索在 .NET 中的实现
· 超详细:普通电脑也行Windows部署deepseek R1训练数据并当服务器共享给他人
· 【硬核科普】Trae如何「偷看」你的代码?零基础破解AI编程运行原理
· 上周热点回顾(3.3-3.9)