Python——使用代码平台进行识别验证码
打码平台介绍
一般使用超级鹰或打码兔的打码平台。
超级鹰介绍
打开http://www.chaojiying.com/contact.html注册用户,生成软件ID
查看打码类型
使用方法
from chaojiying import Chaojiying CHAOJIYING_USERNAME = 'xxxxxx' # 账号 CHAOJIYING_PASSWORD = '123456' # 密码 CHAOJIYING_SOFT_ID = 894611 # 生成的唯一key CHAOJIYING_KIND = 9004 # 题型 cjy = Chaojiying(CHAOJIYING_USERNAME, CHAOJIYING_PASSWORD, CHAOJIYING_SOFT_ID) # 创建实例 result = cjy.post_pic(bytes_array.getvalue(), CHAOJIYING_KIND) # 上传图片
self.browser.save_screenshot('aa.png') # 先把整个屏幕截图 element = self.browser.find_element_by_xpath('/html/body/div[2]/div/div[2]/div[2]/div[3]/div/div[2]/div[3]/div/div') # 获取图片所在的div left = element.location['x'] top = element.location['y'] - 100 right = element.location['x'] + element.size['width'] bottom = element.location['y'] + element.size['height'] im = Image.open('aa.png') captcha = im.crop((left, top, right, bottom)) # 根据div的长宽在整个屏幕上面截图 captcha.save('captcha.png')
把需要识别的图片和提示一起上传 返回坐标
result = self.chaojiying.post_pic(bytes_array.getvalue(), CHAOJIYING_KIND) # 提交图片进行验证 groups = result.get('pic_str').split('|') # 对返回的数据进行解析 获取x坐标和y坐标 locations = [[int(number) for number in group.split(',')] for group in groups]
for location in locations: ActionChains(self.browser).move_to_element_with_offset(self.get_touclick_element(), location[0],location[1]).click().perform() time.sleep(1)
识别案例
conf 目录
[DEFAULT] CODE_USERNAME = xxxxxxxxx CODE_PASSWORD = 1111111 CODE_SOFT_ID = 894611 CODE_KIND = 9004 TRACK_TICKET_USERNAME = uuuuuu TRACK_TICKET_PASSWORD = ya1111 [OTHER] image=1
import logging import os from logging import handlers # 日志格式 c_format = '[%(asctime)s] [%(levelname)s] [%(filename)s:%(lineno)d] [%(message)s]' f_format = '[%(asctime)s]-[%(levelname)s]-[%(filename)s:%(lineno)d]-[%(message)s]' # log文件目录 logfile_dir = r'%s\log'%os.path.dirname(os.path.dirname(os.path.abspath(__file__))) log_name = 'scrapy.log' # 文件绝对路径 logfile_path = os.path.join(logfile_dir, log_name) def get_mylogger(name): """ get log :param name: :return: """ logger = logging.getLogger(name) logger.setLevel('DEBUG') console_handler = logging.StreamHandler() console_handler.setLevel('INFO') file_handler = logging.FileHandler(logfile_path) file_handler.setLevel('WARNING') file_size_handler = handlers.RotatingFileHandler(logfile_path, maxBytes=5 * 1024*1024, backupCount=5) file_time_handler = handlers.TimedRotatingFileHandler(logfile_path,when="W0",backupCount=5) logger.addHandler(console_handler) logger.addHandler(file_handler) file_format = logging.Formatter(fmt=f_format) console_format = logging.Formatter(fmt=c_format, datefmt='%Y-%m-%d %H:%M:%S ') console_handler.setFormatter(console_format) file_handler.setFormatter(file_format) return logger if __name__ == '__main__': log = get_mylogger('test')
import os config_path = r'%s\%s' %(os.path.dirname(os.path.abspath(__file__)),'check_config.ini') image_path = r'%s\image' %os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
lib目录
#!/usr/local/bin/python3 # -*- coding: utf-8 -*- import configparser import os class Read_Ini(object): def __init__(self,config_file): self.config = configparser.ConfigParser() self.config.read(filenames=config_file) def get_value(self, name,section='DEFAULT'): """ 得到section下的属性name的值 :param section: :param name: :return: """ return self.config.get(section,name) def get_section_dict(self,section='DEFAULT'): """ 得到section下所有属性值 :param section: :return: """ return self.config.options(section) if __name__ == '__main__': path = r'%s\conf\%s' % (os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'check_config.ini') print(path) a = Read_Ini(path) print(a.get_value('CODE_USERNAME'))
import requests from hashlib import md5 class Chaojiying(object): def __init__(self, username, password, soft_id): self.username = username self.password = md5(password.encode('utf-8')).hexdigest() self.soft_id = soft_id self.base_params = { 'user': self.username, 'pass2': self.password, 'softid': self.soft_id, } self.headers = { 'Connection': 'Keep-Alive', 'User-Agent': 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0)', } def post_pic(self, im, codetype): """ im: 图片字节 codetype: 题目类型 参考 http://www.chaojiying.com/price.html """ params = { 'codetype': codetype, } params.update(self.base_params) files = {'userfile': ('ccc.jpg', im)} r = requests.post('http://upload.chaojiying.net/Upload/Processing.php', data=params, files=files, headers=self.headers) return r.json() def report_error(self, im_id): """ im_id:报错题目的图片ID """ params = { 'id': im_id, } params.update(self.base_params) r = requests.post('http://upload.chaojiying.net/Upload/ReportError.php', data=params, headers=self.headers) return r.json()
core目录
#!/usr/local/bin/python3 # -*- coding: utf-8 -*- # @Time : 2018/4/22 20:16 # @Author : hyang # @File : demo.py # @Software: import time from io import BytesIO from selenium import webdriver from selenium.webdriver.support.ui import WebDriverWait # 等待元素加载的 from selenium.webdriver.common.action_chains import ActionChains #拖拽 from selenium.webdriver.support import expected_conditions as EC # from selenium.common.exceptions import TimeoutException, NoSuchElementException from selenium.webdriver.common.by import By from PIL import Image from lib import chaojiying from lib import read_ini from conf import settings from conf import my_logset as mylog from collections import namedtuple import os class Check_Code_Click_Demo(object): def __init__(self): self.url = 'https://kyfw.12306.cn/otn/login/init' self.driver = webdriver.Chrome() self.wait = WebDriverWait(self.driver,10) # print(settings.config_path) self.config = read_ini.Read_Ini(settings.config_path) self.logger = mylog.get_mylogger('demo') ''' 使用超级鹰打码平台-用户名,密码,软件ID ''' self.chaojiying = chaojiying.Chaojiying(self.config.get_value('CODE_USERNAME'), self.config.get_value('CODE_PASSWORD'), self.config.get_value('CODE_SOFT_ID')) def __del__(self): self.driver.close() # 当对象销毁后,执行关闭 def open_url(self): ''' 打开网页,最大化,操作滚动条 :return: ''' self.driver.get(self.url) time.sleep(1) self.driver.maximize_window() # 最大化 # self.driver.set_window_size(1920, 1080) # 分辨率 1920*1080 self.logger.info('打开url,输入用户名,密码') self.driver.find_element_by_id('username').send_keys(self.config.get_value('TRACK_TICKET_USERNAME')) time.sleep(0.5) self.driver.find_element_by_id('password').send_keys(self.config.get_value('TRACK_TICKET_PASSWORD')) time.sleep(0.5) # js = 'var q=document.documentElement.scrollTop=280' # 操作滚动条 # self.driver.execute_script(js) # 区域截图(对指定的区域/元素截图) def element_screenshot(self,element): # 截取全屏图片 self.driver.save_screenshot(os.path.join(settings.image_path,'full.png')) # 获取element的顶点坐标 x_point = element.location['x'] y_point = element.location['y'] # 获取element的宽、高 element_width = x_point + element.size['width'] element_height = y_point + element.size['height'] points = namedtuple('points',['x','y','width','height']) p = points(x_point,y_point,element_width,element_height) pl = (p.x, p.y, p.width, p.height) self.logger.info('得到验证码位置%s' %str(pl)) # 开始截取指定区域 picture = Image.open(os.path.join(settings.image_path,'full.png')) ''''' crop()-- 一个显式的参数:一个4元组 Image.crop(box=None):图像返回一个矩形区域,box是一个四元组 限定所述左,上,右,和下像素坐标 参数:box--裁剪矩形,作为(左,上,右,下)-tuple;返回类型:Image;返回:一个Image对象 所以你应该重写它: # ^ 4-tuple ^ ''' picture = picture.crop((pl)) picture.save(os.path.join(settings.image_path,'fullcrop.png')) return picture def get_touclick_image(self): """ 得到要点击的图像 :return: """ element = self.wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '.touclick-img-par.touclick-bgimg'))) return element def get_image(self,name='captcha.png',num = 100): """ 得到要识别的图片 :param name: :param num: :return: """ image_element = self.wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '.touclick-img-par.touclick-bgimg'))) captcha = self.element_screenshot(image_element) self.logger.info('得到要识别的图片') return captcha def get_click_words(self, location): """ 根据返回坐标的位置,点击图像中识别字的坐标 :param location: :return: """ for loc in location: im_element = self.get_touclick_image() # print(im_element.location['x'], im_element.location['y'], im_element.size) #print('loc=',loc) # 将鼠标移动到距某个元素多少距离的位置点击 ActionChains(self.driver).move_to_element_with_offset(im_element,loc[0], loc[1]).click().perform() self.driver.save_screenshot(os.path.join(settings.image_path,'check.png')) time.sleep(0.5) def get_points(self,res): """ 解析坐标的位置 :param res: :return: """ groups = res.get('pic_str').split('|') # 解析返回的坐标 location = [[int(num) for num in group.split(',')] for group in groups] return location def login(self): time.sleep(1) login_element = self.driver.find_element_by_id('loginSub') self.logger.info('开始点击登录') login_element.click() def main(self): image = self.get_image() #time.sleep(10) byte_array = BytesIO() image.save(byte_array, format('png')) # 把图片转换为二进制格式保存到内存中 try: # 获取图像二进制数据,把数据提交到打码平台 res = self.chaojiying.post_pic(byte_array.getvalue(), self.config.get_value('CODE_KIND')) self.logger.info('得到打码平台%s' % res) # 解析坐标 location = self.get_points(res) if location: self.logger.info('得到识别的坐标%s'%location) self.get_click_words(location) self.login() time.sleep(6) self.driver.switch_to.window(self.driver.window_handles[-1]) self.logger.info('url:%s' %self.driver.current_url) #[url:https://kyfw.12306.cn/otn/index/initMy12306] if 'initMy' in self.driver.current_url: self.logger.info('用户登录成功') self.driver.save_screenshot(os.path.join(settings.image_path,'full_login.png')) else: check_error = self.driver.find_element_by_id('error_msgmypasscode1') check_style = check_error.get_attribute('style') print('style=', check_style) if 'none' in check_style: self.logger.info('验证成功') # self.driver.save_screenshot('True.png') else: self.logger.error('验证失败!!!') self.main() except Exception as e: self.logger.error('返回异常!!!%s'% e) if __name__ == '__main__': c = Check_Code_Click_Demo() c.open_url() c.main()
bin目录
import sys,os BASE_DIR=os.path.dirname(os.path.dirname(os.path.abspath(__file__))) sys.path.append(BASE_DIR) # 加入环境变量 from core import demo from conf import my_log_settings if __name__ == '__main__': # my_log_settings.load_my_logging_cfg(__name__) de = demo.Check_Code_Click_Demo() de.open_url() de.main()