0 selenium 登录cnblogs
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
import json
bro = webdriver.Chrome(executable_path='./chromedriver.exe')
try:
bro.get('https://www.cnblogs.com/')
bro.implicitly_wait(10)
time.sleep(2)
with open('cnblogs.json', 'r', encoding='utf-8') as f:
cookies = json.load(f)
for cookie in cookies:
bro.add_cookie(cookie)
bro.refresh()
time.sleep(5)
except Exception as e:
print(e)
finally:
bro.close()
1 抽屉半自动点赞
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
import json
import requests
try:
with open('chouti.json', 'r', encoding='utf-8') as f:
cookies = json.load(f)
request_cookies = {}
for cookie in cookies:
request_cookies[cookie['name']] = cookie['value']
print(request_cookies)
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36'
}
res = requests.get('https://dig.chouti.com/top/24hr?_=1679277434856', headers=headers)
for item in res.json().get('data'):
id_link = item.get('id')
data = {
'linkId': id_link
}
res2 = requests.post('https://dig.chouti.com/link/vote', headers=headers, data=data, cookies=request_cookies)
print(res2.text)
except Exception as e:
print(e)
finally:
pass
2 xpath的使用
-bs4 find 和find_all
-selenium find_element和 find_elements
-lxml 也是个解析器,支持xpath和css
-css咱们会了
-xpath需要学习
XPath即为XML路径语言(XML Path Language),它是一种用来确定XML文档中某部分位置的语言
- / 从当前路径下开始找
- /div 从当前路径下开始找div
-// 递归查找,子子孙孙
-//div 递归查找div
-@ 取属性
- . 当成
- .. 上一层
3 selenium 动作链
-滑动验证码
-形式一:
actions=ActionChains(bro)
actions.drag_and_drop(sourse,target)
actions.perform()
-方式二:
ActionChains(bro).click_and_hold(sourse).perform()
distance=target.location['x']-sourse.location['x']
track=0
while track < distance:
ActionChains(bro).move_by_offset(xoffset=2,yoffset=0).perform()
track+=2
3.1 动作链案例
import time
from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
try:
browser = webdriver.Chrome(executable_path='./chromedriver.exe')
browser.get('http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable')
browser.switch_to.frame('iframeResult')
target = browser.find_element(By.ID, 'droppable')
source = browser.find_element(By.ID, 'draggable')
time.sleep(2)
finally:
browser.close()
4 自动登录12306
import time
from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.chrome.options import Options
try:
options = Options()
options.add_argument("--disable-blink-features=AutomationControlled")
browser = webdriver.Chrome(executable_path='./chromedriver.exe', chrome_options=options)
browser.get('https://kyfw.12306.cn/otn/resources/login.html')
browser.maximize_window()
username = browser.find_element(By.ID, 'J-userName')
password = browser.find_element(By.ID, 'J-password')
username.send_keys('')
password.send_keys('')
login_btn = browser.find_element(By.ID, 'J-login')
time.sleep(2)
login_btn.click()
time.sleep(5)
span = browser.find_element(By.ID, 'nc_1_n1z')
ActionChains(browser).click_and_hold(span).perform()
ActionChains(browser).move_by_offset(xoffset=300, yoffset=0).perform()
time.sleep(3)
finally:
browser.close()
5 打码平台使用
http://www.chaojiying.com/price.html
6 使用打码平台自动登录
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from chaojiying import ChaojiyingClient
from PIL import Image
bro = webdriver.Chrome(executable_path='./chromedriver.exe')
bro.get('http://www.chaojiying.com/apiuser/login/')
bro.implicitly_wait(10)
bro.maximize_window()
try:
username = bro.find_element(by=By.XPATH, value='/html/body/div[3]/div/div[3]/div[1]/form/p[1]/input')
password = bro.find_element(by=By.XPATH, value='/html/body/div[3]/div/div[3]/div[1]/form/p[2]/input')
code = bro.find_element(by=By.XPATH, value='/html/body/div[3]/div/div[3]/div[1]/form/p[3]/input')
btn = bro.find_element(by=By.XPATH, value='/html/body/div[3]/div/div[3]/div[1]/form/p[4]/input')
username.send_keys('306334678')
password.send_keys('lqz123')
bro.save_screenshot('main.png')
img = bro.find_element(By.XPATH, '/html/body/div[3]/div/div[3]/div[1]/form/div/img')
location = img.location
size = img.size
print(location)
print(size)
img_tu = (int(location['x']), int(location['y']), int(location['x'] + size['width']), int(location['y'] + size['height']))
img = Image.open('./main.png')
fram = img.crop(img_tu)
fram.save('code.png')
chaojiying = ChaojiyingClient('306334678', 'lqz123', '937234')
im = open('code.png', 'rb').read()
print(chaojiying.PostPic(im, 1902))
res_code=chaojiying.PostPic(im, 1902)['pic_str']
code.send_keys(res_code)
time.sleep(5)
btn.click()
time.sleep(10)
except Exception as e:
print(e)
finally:
bro.close()
7 使用selenium爬取京东商品信息
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
def get_goods(bro):
li_list = bro.find_elements(By.CLASS_NAME, 'gl-item')
for li in li_list:
try:
img_url = li.find_element(By.CSS_SELECTOR, '.p-img img').get_attribute('src')
if not img_url:
img_url = 'https:' + li.find_element(By.CSS_SELECTOR, '.p-img img').get_attribute('data-lazy-img')
price = li.find_element(By.CSS_SELECTOR, '.p-price i').text
name = li.find_element(By.CSS_SELECTOR, '.p-name a').text
url = 'https:' + li.find_element(By.CSS_SELECTOR, '.p-img a').get_attribute('href')
commit = li.find_element(By.CSS_SELECTOR, '.p-commit a').text
print('''
商品图片地址:%s
商品地址:%s
商品名字:%s
商品价格:%s
商品评论数:%s
''' % (img_url, url, name, price, commit))
except Exception as e:
print(e)
continue
next = bro.find_element(By.PARTIAL_LINK_TEXT, '下一页')
time.sleep(1)
next.click()
get_goods(bro)
try:
bro = webdriver.Chrome(executable_path='./chromedriver.exe')
bro.get('http://www.jd.com')
bro.implicitly_wait(10)
input_key = bro.find_element(By.ID, 'key')
input_key.send_keys('茅台')
input_key.send_keys(Keys.ENTER)
bro.execute_script('scrollTo(0,5000)')
get_goods(bro)
except Exception as e:
print('sasdfsadfasdfa',e)
finally:
bro.close()
8 scrapy介绍
-pip3.8 install scrapy
-装不上,基本上是因为twisted装不了,单独装
1、pip3 install wheel
3、pip3 install lxml
4、pip3 install pyopenssl
5、下载并安装pywin32:https://sourceforge.net/projects/pywin32/files/pywin32/
6、下载twisted的wheel文件:http://www.lfd.uci.edu/~gohlke/pythonlibs/
7、执行pip3 install 下载目录\Twisted-17.9.0-cp36-cp36m-win_amd64.whl
8、pip3 install scrapy
-爬虫:spiders(自己定义的,可以有很多),定义爬取的地址,解析规则
-引擎:engine ---》控制整个框架数据的流动,大总管
-调度器:scheduler---》要爬取的 requests对象,放在里面,排队
-下载中间件:DownloaderMiddleware---》处理请求对象,处理响应对象
-下载器:Downloader ----》负责真正的下载,效率很高,基于twisted的高并发的模型之上
-爬虫中间件:spiderMiddleware----》处于engine和爬虫直接的(用的少)
-管道:piplines---》负责存储数据
scrapy startproject firstscrapy
scrapy genspider 名字 网址

【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 分享一个免费、快速、无限量使用的满血 DeepSeek R1 模型,支持深度思考和联网搜索!
· 基于 Docker 搭建 FRP 内网穿透开源项目(很简单哒)
· ollama系列01:轻松3步本地部署deepseek,普通电脑可用
· 25岁的心里话
· 按钮权限的设计及实现