1.准备验证码图片
1 import os 2 from urllib.request import urlretrieve 3 4 urlPath='http://www.189.cn/portal/captcha/simple.do?date=1503125232339' 5 localPath='G:\python_work\chapter11\captcha' 6 7 # 根据文件名创建文件 8 def createFileByFileName(localPath,fileName): 9 if not os.path.exists(localPath): 10 os.makedirs(localPath) 11 totalPath=localPath+'\\'+fileName 12 if not os.path.exists(totalPath): 13 file=open(totalPath, 'a+') 14 file.close() 15 return totalPath 16 17 # 根据图片的地址, 下载图片并保存在本地 18 def getAndSaveImg(imgUrl, filename): 19 if(len(imgUrl)!=0): 20 file=filename+'.jpg' 21 urlretrieve(imgUrl, createFileByFileName(localPath, file)) 22 23 # 下载图片 24 i=0 25 while i<100: 26 getAndSaveImg(urlPath, str(i)) 27 i=i+1
2.将验证码图片转为灰度图 , 扩展名改为.tif
1 import os 2 from PIL import Image 3 4 def getFileList(path): 5 # 返回目录中所有jpg图像的文件名列表 6 return [os.path.join(path, f) for f in os.listdir(path) if f.endswith('.jpg')] 7 8 # 批量将图片转为灰度图 9 def patchConvert(srcPath, destPath): 10 fileList = getFileList(srcPath) 11 for file in fileList: 12 img = Image.open(file).convert('L') 13 name_ext = getFileNameAndExt(file) 14 destFile = os.path.join(destPath, name_ext[0]+'.tif') 15 img.save(destFile) 16 17 # 获取文件名和扩展名 18 def getFileNameAndExt(filename): 19 # 取目录与文件名 20 (filepath, tempfilename) = os.path.split(filename) 21 # 取文件名(不带扩展名)和扩展名 22 (shotname, extension) = os.path.splitext(tempfilename) 23 return shotname, extension 24 25 patchConvert('captcha', 'captcha_gray')
3. 后续步骤参考原先的博客:
http://www.cnblogs.com/CoolJayson/p/7395824.html
问题: 生成box文件后, 用jTessBoxEditor修改box文件时, 里面的矩形画的误差较大, 字符个数识别不准确 , 应该如何对矩形进行修改???