Java识别静态验证码和动态验证码
写了一个简单java工具类,用于验证码点阵打印+自动识别。为了提升识别精度和程序性能,此工具类是针对特定类型的验证码的,若要用于其他类型的验证码识别,需要做相应调整。
文章分两部分演示了此java工具类如何识别静态验证码图片和动态验证码gif。
一、静态验证码图片识别
输入验证码:
程序运行结果:
======= print and recognize captchapic =======
"................................................................................",
"................................................................................",
"................................................................................",
"................##.##........#####..............................................",
"................##.##.......##...##.............................................",
"................##.##.............##............................................",
"............###.##.##.###........##....#####....................................",
"...........##..###.###..##.....###....##...##...................................",
"..........##....##.##....##......##........##...................................",
"..........##....##.##....##.......##..#######...................................",
"..........##....##.##....##.......##.##....##...................................",
"...........##..###.##....##.##...##..##...###...................................",
"............###.##.##....##..#####....####.##...................................",
"................................................................................",
"................................................................................",
"................................................................................",
"................................................................................",
"................................................................................",
"................................................................................",
"................................................................................"
recognize: dh3a
相应代码如下:
package com.demo.check;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpStatus;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.io.IOUtils;
import javax.imageio.ImageIO;
import java.awt.*;
import java.awt.image.BufferedImage;
import java.io.*;
import java.util.ArrayList;
import java.util.List;
public class CaptchaRecognizer {
public static void main(String[] args) {
HttpClient httpClient = new HttpClient();
GetMethod getMethod = new GetMethod("https://img2020.cnblogs.com/blog/1039974/202011/1039974-20201119224011928-1654538410.png"); // 验证码链接
for (int i = 0; i < 5; i++) {
try {
// 执行get请求
int statusCode = httpClient.executeMethod(getMethod);
if (statusCode != HttpStatus.SC_OK) {
System.err.println("Method failed: " + getMethod.getStatusLine());
} else {
File captcha = File.createTempFile("ybt", ".png");
OutputStream outStream = new FileOutputStream(captcha);
InputStream inputStream = getMethod.getResponseBodyAsStream();
IOUtils.copy(inputStream, outStream);
outStream.close();
BufferedImage image = ImageIO.read(captcha);
System.out.println("======= print and recognize captchapic =======");
printImage(image);
System.out.printf("recognize: %s\n", recognizeCaptcha(image));
}
} catch (Exception e) {
e.printStackTrace();
} finally {
// 释放连接
getMethod.releaseConnection();
}
}
}
/**
* @param colorInt 像素点的RGB值
* @return
*/
private static boolean isBlack(int colorInt) {
Color color = new Color(colorInt);
if (color.getRed() + color.getGreen() + color.getBlue() <= 10) {
return true;
}
return false;
}
/**
* @param image 需要打印的图像
* @throws IOException
*/
private static void printImage(BufferedImage image) {
int h = image.getHeight();
int w = image.getWidth();
// 矩阵打印
for (int y = 0; y < h; y++) {
System.out.printf("\"");
for (int x = 0; x < w; x++) {
if (isBlack(image.getRGB(x, y))) {
System.out.print("#");
} else {
System.out.print(".");
}
}
System.out.printf("%s", y == h-1 ? "\"" : "\",");
System.out.println();
}
}
/**
* @param image 待识别的符号图片
* @return
*/
private static char recognizeSymbol(BufferedImage image) {
int h = image.getHeight();
int w = image.getWidth();
int minDiff = 999999;
char symAns = 0;
// 对于某个给定数值
for (int i = 0; i < 10; i++) {
int curDiff = 0;
for (int y = 0; y < h; y++) {
for (int x = 0; x < w; x++) {
boolean pixel1 = digitals[i][y].charAt(x) == '#';
boolean pixel2 = isBlack(image.getRGB(x, y));
if (pixel1 != pixel2) {
++curDiff;
}
}
}
if (curDiff < minDiff) {
minDiff = curDiff;
symAns = (char) ('0' + i);
}
if (minDiff == 0) {
return symAns;
}
}
// 对于某个给定字母
for (int i = 0; i < 26; i++) {
int curDiff = 0;
for (int y = 0; y < h; y++) {
for (int x = 0; x < w; x++) {
boolean pixel1 = alphas[i][y].charAt(x) == '#';
boolean pixel2 = isBlack(image.getRGB(x, y));
if (pixel1 != pixel2) {
++curDiff;
}
}
}
if (curDiff < minDiff) {
minDiff = curDiff;
symAns = (char) ('a' + i);
}
if (minDiff == 0) {
return symAns;
}
}
return symAns;
}
/**
* @param image 需要被分割的验证码
* @return
*/
private static List<BufferedImage> splitImage(BufferedImage image) {
List<BufferedImage> subImgs = new ArrayList<BufferedImage>();
subImgs.add(image.getSubimage(10, 3, 8, 12));
subImgs.add(image.getSubimage(19, 3, 8, 12));
subImgs.add(image.getSubimage(28, 3, 8, 12));
subImgs.add(image.getSubimage(37, 3, 8, 12));
return subImgs;
}
/**
* @param image 待识别的验证码
* @return
*/
public static String recognizeCaptcha(BufferedImage image) {
StringBuilder ans = new StringBuilder();
List<BufferedImage> subImgs = splitImage(image);
for (BufferedImage subImg : subImgs) {
// 依次识别子图片
ans.append(recognizeSymbol(subImg));
}
return ans.toString();
}
private static String[][] digitals = new String[][]{
{
"........",
"........",
"........",
"........",
"........",
"........",
"........",
"........",
"........",
"........",
"........",
"........"
},
{
"........",
"........",
"........",
"........",
"........",
"........",
"........",
"........",
"........",
"........",
"........",
"........"
},
{
"..####..",
".##..##.",
"##....##",
"......##",
".....##.",
"....##..",
"...##...",
"..##....",
".##.....",
"########",
"........",
"........"
},
{
".#####..",
"##...##.",
"......##",
".....##.",
"...###..",
".....##.",
"......##",
"......##",
"##...##.",
".#####..",
"........",
"........"
},
{
".....##.",
"....###.",
"...####.",
"..##.##.",
".##..##.",
"##...##.",
"########",
".....##.",
".....##.",
".....##.",
"........",
"........"
},
{
"#######.",
"##......",
"##......",
"##.###..",
"###..##.",
"......##",
"......##",
"##....##",
".##..##.",
"..####..",
"........",
"........"
},
{
"..####..",
".##..##.",
"##....#.",
"##......",
"##.###..",
"###..##.",
"##....##",
"##....##",
".##..##.",
"..####..",
"........",
"........"
},
{
"########",
"......##",
"......##",
".....##.",
"....##..",
"...##...",
"..##....",
".##.....",
"##......",
"##......",
"........",
"........"
},
{
"..####..",
".##..##.",
"##....##",
".##..##.",
"..####..",
".##..##.",
"##....##",
"##....##",
".##..##.",
"..####..",
"........",
"........"
},
{
"..####..",
".##..##.",
"##....##",
"##....##",
".##..###",
"..###.##",
"......##",
".#....##",
".##..##.",
"..####..",
"........",
"........"
}
};
private static String[][] alphas = new String[][]{
{
"........",
"........",
"........",
"..#####.",
".##...##",
"......##",
".#######",
"##....##",
"##...###",
".####.##",
"........",
"........"
},
{
"##......",
"##......",
"##......",
"##.###..",
"###..##.",
"##....##",
"##....##",
"##....##",
"###..##.",
"##.###..",
"........",
"........"
},
{
"........",
"........",
"........",
"..#####.",
".##...##",
"##......",
"##......",
"##......",
".##...##",
"..#####.",
"........",
"........"
},
{
"......##",
"......##",
"......##",
"..###.##",
".##..###",
"##....##",
"##....##",
"##....##",
".##..###",
"..###.##",
"........",
"........"
},
{
"........",
"........",
"........",
"..####..",
".##..##.",
"##....##",
"########",
"##......",
".##...##",
"..#####.",
"........",
"........"
},
{
"...####.",
"..##..##",
"..##..##",
"..##....",
"..##....",
"######..",
"..##....",
"..##....",
"..##....",
"..##....",
"........",
"........"
},
{
"........",
"........",
"........",
".#####.#",
"##...###",
"##...##.",
"##...##.",
".#####..",
"##......",
".######.",
"##....##",
".######."
},
{
"##......",
"##......",
"##......",
"##.###..",
"###..##.",
"##....##",
"##....##",
"##....##",
"##....##",
"##....##",
"........",
"........"
},
{
"...##...",
"...##...",
"........",
"..###...",
"...##...",
"...##...",
"...##...",
"...##...",
"...##...",
".######.",
"........",
"........"
},
{
".....##.",
".....##.",
"........",
"....###.",
".....##.",
".....##.",
".....##.",
".....##.",
".....##.",
"##...##.",
"##...##.",
".#####.."
},
{
".##.....",
".##.....",
".##.....",
".##..##.",
".##.##..",
".####...",
".####...",
".##.##..",
".##..##.",
".##...##",
"........",
"........"
},
{
"........",
"........",
"........",
"........",
"........",
"........",
"........",
"........",
"........",
"........",
"........",
"........"
},
{
"........",
"........",
"........",
"#.##.##.",
"##.##.##",
"##.##.##",
"##.##.##",
"##.##.##",
"##.##.##",
"##.##.##",
"........",
"........"
},
{
"........",
"........",
"........",
"##.###..",
"###..##.",
"##....##",
"##....##",
"##....##",
"##....##",
"##....##",
"........",
"........"
},
{
"........",
"........",
"........",
"........",
"........",
"........",
"........",
"........",
"........",
"........",
"........",
"........"
},
{
"........",
"........",
"........",
"##.###..",
"###..##.",
"##....##",
"##....##",
"##....##",
"###..##.",
"##.###..",
"##......",
"##......"
},
{
"........",
"........",
"........",
"..###.##",
".##..###",
"##....##",
"##....##",
"##....##",
".##..###",
"..###.##",
"......##",
"......##"
},
{
"........",
"........",
"........",
"##.####.",
".###..##",
".##.....",
".##.....",
".##.....",
".##.....",
".##.....",
"........",
"........"
},
{
"........",
"........",
"........",
".######.",
"##....##",
"##......",
".######.",
"......##",
"##....##",
".######.",
"........",
"........"
},
{
"........",
"..##....",
"..##....",
"######..",
"..##....",
"..##....",
"..##....",
"..##....",
"..##..##",
"...####.",
"........",
"........"
},
{
"........",
"........",
"........",
"##....##",
"##....##",
"##....##",
"##....##",
"##....##",
".##..###",
"..###.##",
"........",
"........"
},
{
"........",
"........",
"........",
"##....##",
"##....##",
".##..##.",
".##..##.",
"..####..",
"..####..",
"...##...",
"........",
"........"
},
{
"........",
"........",
"........",
"##....##",
"##....##",
"##.##.##",
"##.##.##",
"##.##.##",
"########",
".##..##.",
"........",
"........"
},
{
"........",
"........",
"........",
"##....##",
".##..##.",
"..####..",
"...##...",
"..####..",
".##..##.",
"##....##",
"........",
"........"
},
{
"........",
"........",
"........",
"##....##",
"##....##",
"##....##",
"##....##",
"##....##",
".##..###",
"..###.##",
"#.....##",
".######."
},
{
"........",
"........",
"........",
".######.",
".....##.",
"....##..",
"...##...",
"..##....",
".##.....",
".######.",
"........",
"........"
}
};
}
二、动态验证码gif识别
动态验证码gif的识别和静态验证码图片的识别非常相识,两者之间唯一的区别在于gif是由多帧静态图片所构成的。所以我们处理gif的思路很简单,从特定帧的静态图片中,识别需要的验证符号。
输入验证码:
程序运行结果:
======= print and recognize captchapic =======
"..................................................................................................................................................",
"..................................................................................................................................................",
"....................#####.........................................................................................................................",
".................###.....###......................................................................................................................",
".................#.........#......................................................................................................................",
".................#..........#.....................................................................................................................",
"................#...........#.....................................................................................................................",
"................#.#######....#..........................######....................................................................................",
"................###########.#.........................##########..................................................................................",
"................#############........................###########..................................................................................",
"................##.....#####.........................##.....#####.................................................................................",
".................##.....####................................#####.................................................................................",
"...................###.####..................................####........................#######..................................................",
"......................#####.................................#######....................##########.................................................",
".....................#####..................................####....#..................###########................................................",
".................########..................................####......#.................#.....#####................................................",
".................#######..................................####.......#........................####................................................",
".................#########...............................####.........#.......................####................................................",
".....................######.............................###.#........#...................########.................................................",
"....................#######...........................####...#.......#.................##########.................................................",
"..................##...######........................####.....##..###.................####...####.................................................",
".................#.....####..#......................####.........#...................####....####.................................................",
".................#....#####..#.....................####..............................####....####.................................................",
"..............#...##.#####.##.....................####...............................####...#####.................................................",
".............##############......................#############.......................#############................................................",
".............###########.........................#############.......................######..#####................................................",
"...............#######...........................#############........................####...#####................................................",
"..................................................................................................................................................",
"..................................................................................................................................................",
"..................................................................................................................................................",
"..................................................................................................................................................",
"..................................................................................................................................................",
".................................................................................................................................................."
<<< frame >>>
"..................................................................................................................................................",
"..................................................................................................................................................",
"..................................................................................................................................................",
"..................................................................................................................................................",
"..................................................................................................................................................",
".................................................................................................................................#####............",
"...............................................................................................................................########...........",
"........................................................######................................................................########............",
"......................................................##########.........................................##...................####...#............",
".....................................................###########........................................#..#.................####.................",
".....................................................##.....#####.......................................#..#.................####.................",
"............................................................#####......................................#....#................####.................",
".............................................................####........................#######.......#....#..............##########.............",
"............................................................####.......................##########......#....#.............##########..............",
"............................................................####.......................###########.....#....#.............##########..............",
"...........................................................####........................#.....#####.....#....#...............####..................",
"..........................................................####................................####......#..#................####..................",
".........................................................####.................................####......#..#................####..................",
"........................................................###..............................########........##................####...................",
"......................................................####.............................##########..........................####...................",
".....................................................####.............................####...####..........................####...................",
"....................................................####......................####...####....####..........................####...................",
"...................................................####......................#....#..####....####.......####...............####...................",
"..................................................####.......................#....#..####...#####.....##....###............####...................",
".................................................#############...............#.....#.#############...#.........#..........####....................",
".................................................#############...............#....#..######..#####...#.........#..........####....................",
".................................................#############...............#....#...####...#####..#...........#.........####....................",
"..............................................................................##.#..................#...........#.................................",
"................................................................................#...................#............#................................",
"....................................................................................................#...........#.................................",
".....................................................................................................#..........#.................................",
".....................................................................................................#.........#..................................",
".....................................................................................................##.......##.................................."
recognize: 32af
以下贴出gif的分割函数,其它过程和第一部分基本相同,之后逐个解析静态子图片即可。
/**
* @param file 需要被分割的gif文件
* @throws Exception
*/
private static List<BufferedImage> splitGif(File file) throws IOException {
FileImageInputStream in = new FileImageInputStream(file);
ImageReaderSpi readerSpi = new GIFImageReaderSpi();
GIFImageReader gifReader = (GIFImageReader) readerSpi.createReaderInstance();
gifReader.setInput(in);
int num = gifReader.getNumImages(true);
ImageWriterSpi writerSpi = new GIFImageWriterSpi();
GIFImageWriter writer = (GIFImageWriter) writerSpi.createWriterInstance();
List<BufferedImage> subImgs = new ArrayList<BufferedImage>();
for (int i = 0; i < num; i++) {
File frame = File.createTempFile("mxt", ".png");
FileImageOutputStream out = new FileImageOutputStream(frame);
writer.setOutput(out);
writer.write(gifReader.read(num - i - 1));
out.close();
// 分割每一帧图片,进行识别
BufferedImage image = ImageIO.read(frame);
if (i == 1 || i == 2) {
printImage(image);
System.out.println("<<< frame >>>");
}
subImgs.add(image.getSubimage(7 + i * 36, 5, 30, 27));
}
in.close();
return subImgs;
}
参考链接
[1] https://blog.csdn.net/problc/article/details/5794460#commentBox
[2] https://blog.csdn.net/lmj623565791/article/details/23960391/
[3] https://blog.csdn.net/chwshuang/article/details/64923354