python智能识别验证码自动登录
代码实现
from typing import Sized from docx import Document import time from docxtpl import DocxTemplate,InlineImage,RichText from docx.shared import Mm from PIL import Image from selenium import webdriver import ssl import sys import json import base64 # 初始化driver driver = webdriver.Chrome() driver.set_window_size(1280, 800, driver.window_handles[0]) driver.maximize_window() # 获取验证码图片 def getimage(): ele_vcode = driver.find_element_by_xpath("//*[@id='captchaImgU']") ele_vcode.click() time.sleep(2) ele_vcode.screenshot('vcode.png') # 百度api接口识别 coding=utf-8 # post请求参数 ssl._create_default_https_context = ssl._create_unverified_context API_KEY = 'fqe83vwceOl3A87umYHATbaB' SECRET_KEY = 'UFjtlGbBvhLAh1VSDok1apCuDx6AceRG' OCR_URL = "https://aip.baidubce.com/rest/2.0/ocr/v1/accurate_basic" TOKEN_URL = 'https://aip.baidubce.com/oauth/2.0/token' # 保证兼容python2以及python3 IS_PY3 = sys.version_info.major == 3 if IS_PY3: from urllib.request import urlopen from urllib.request import Request from urllib.error import URLError from urllib.parse import urlencode from urllib.parse import quote_plus else: pass # 获取token def fetch_token(): params = {'grant_type': 'client_credentials', 'client_id': API_KEY, 'client_secret': SECRET_KEY} post_data = urlencode(params) if (IS_PY3): post_data = post_data.encode('utf-8') req = Request(TOKEN_URL, post_data) try: f = urlopen(req, timeout=5) result_str = f.read() except URLError as err: print(err) if (IS_PY3): result_str = result_str.decode() result = json.loads(result_str) if ('access_token' in result.keys() and 'scope' in result.keys()): if not 'brain_all_scope' in result['scope'].split(' '): print ('please ensure has check the ability') exit() return result['access_token'] else: print ('please overwrite the correct API_KEY and SECRET_KEY') exit() # 读取文件 def read_file(image_path): f = None try: f = open(image_path, 'rb') return f.read() except: print('read image file fail') return None finally: if f: f.close() # 调用远程服务 def request(url, data): req = Request(url, data.encode('utf-8')) has_error = False try: f = urlopen(req) result_str = f.read() if (IS_PY3): result_str = result_str.decode() return result_str except URLError as err: print(err) # 获取验证码 def get_code(): # 获取access token token = fetch_token() # 拼接通用文字识别高精度url image_url = OCR_URL + "?access_token=" + token text = "" # 读取测试图片 file_content = read_file('vcode.png') # 调用文字识别服务 result = request(image_url, urlencode({'image': base64.b64encode(file_content)})) result_json = json.loads(result) for words_result in result_json["words_result"]: text = text + words_result["words"] return text # 执行登录 def phsc_login(): driver.get("https://www.shgt.com/trade-web/login") time.sleep(5) while driver.title == '登录': # 加入循环判断,登录不成功,重新获取验证码登录 getimage() vcode = get_code() driver.find_element_by_xpath("//*[@name='user']").clear() driver.find_element_by_xpath("//*[@name='user']").send_keys("username") driver.find_element_by_xpath("//*[@name='pass']").clear() driver.find_element_by_xpath("//*[@name='pass']").send_keys("password") driver.find_element_by_xpath("//*[@name='validateCode']").clear() driver.find_element_by_xpath("//*[@name='validateCode']").send_keys(vcode) driver.find_element_by_xpath("//*[@class='el-button btn_login el-button--button']").click() time.sleep(5) phsc_login() driver.quit()
参考文章:
百度ORC接口入门:https://ai.baidu.com/ai-doc/OCR/dk3iqnq51
如何用代码调用百度OCR服务:https://cloud.baidu.com/doc/OCR/s/Pkrwx9ye4
【Python+selenium】带图片验证码的登录自动化实战:https://www.jianshu.com/p/6755a40d961f
5行Python实现验证码识别(识别率一般):https://jishuin.proginn.com/p/763bfbd60bb1
分类:
python_selenium
, python
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· AI与.NET技术实操系列:向量存储与相似性搜索在 .NET 中的实现
· 基于Microsoft.Extensions.AI核心库实现RAG应用
· Linux系列:如何用heaptrack跟踪.NET程序的非托管内存泄露
· 开发者必知的日志记录最佳实践
· SQL Server 2025 AI相关能力初探
· winform 绘制太阳,地球,月球 运作规律
· 震惊!C++程序真的从main开始吗?99%的程序员都答错了
· AI与.NET技术实操系列(五):向量存储与相似性搜索在 .NET 中的实现
· 【硬核科普】Trae如何「偷看」你的代码?零基础破解AI编程运行原理
· 超详细:普通电脑也行Windows部署deepseek R1训练数据并当服务器共享给他人