Java识别静态验证码和动态验证码

写了一个简单java工具类,用于验证码点阵打印+自动识别。为了提升识别精度和程序性能,此工具类是针对特定类型的验证码的,若要用于其他类型的验证码识别,需要做相应调整。

文章分两部分演示了此java工具类如何识别静态验证码图片和动态验证码gif。

一、静态验证码图片识别

输入验证码:

程序运行结果:

======= print and recognize captchapic  =======
"................................................................................",
"................................................................................",
"................................................................................",
"................##.##........#####..............................................",
"................##.##.......##...##.............................................",
"................##.##.............##............................................",
"............###.##.##.###........##....#####....................................",
"...........##..###.###..##.....###....##...##...................................",
"..........##....##.##....##......##........##...................................",
"..........##....##.##....##.......##..#######...................................",
"..........##....##.##....##.......##.##....##...................................",
"...........##..###.##....##.##...##..##...###...................................",
"............###.##.##....##..#####....####.##...................................",
"................................................................................",
"................................................................................",
"................................................................................",
"................................................................................",
"................................................................................",
"................................................................................",
"................................................................................"
recognize: dh3a

相应代码如下:

package com.demo.check;

import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpStatus;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.io.IOUtils;

import javax.imageio.ImageIO;
import java.awt.*;
import java.awt.image.BufferedImage;
import java.io.*;
import java.util.ArrayList;
import java.util.List;

public class CaptchaRecognizer {

    public static void main(String[] args) {
        HttpClient httpClient = new HttpClient();
        GetMethod getMethod = new GetMethod("https://img2020.cnblogs.com/blog/1039974/202011/1039974-20201119224011928-1654538410.png"); // 验证码链接
        for (int i = 0; i < 5; i++) {
            try {
                // 执行get请求
                int statusCode = httpClient.executeMethod(getMethod);
                if (statusCode != HttpStatus.SC_OK) {
                    System.err.println("Method failed: " + getMethod.getStatusLine());
                } else {
                    File captcha = File.createTempFile("ybt", ".png");
                    OutputStream outStream = new FileOutputStream(captcha);
                    InputStream inputStream = getMethod.getResponseBodyAsStream();
                    IOUtils.copy(inputStream, outStream);
                    outStream.close();

                    BufferedImage image = ImageIO.read(captcha);
                    System.out.println("======= print and recognize captchapic  =======");
                    printImage(image);
                    System.out.printf("recognize: %s\n", recognizeCaptcha(image));
                }

            } catch (Exception e) {
                e.printStackTrace();
            } finally {
                // 释放连接
                getMethod.releaseConnection();
            }
        }
    }

    /**
     * @param colorInt 像素点的RGB值
     * @return
     */
    private static boolean isBlack(int colorInt) {
        Color color = new Color(colorInt);
        if (color.getRed() + color.getGreen() + color.getBlue() <= 10) {
            return true;
        }
        return false;
    }

    /**
     * @param image 需要打印的图像
     * @throws IOException
     */
    private static void printImage(BufferedImage image) {
        int h = image.getHeight();
        int w = image.getWidth();

        // 矩阵打印
        for (int y = 0; y < h; y++) {
            System.out.printf("\"");
            for (int x = 0; x < w; x++) {
                if (isBlack(image.getRGB(x, y))) {
                    System.out.print("#");
                } else {
                    System.out.print(".");
                }
            }
            System.out.printf("%s", y == h-1 ? "\"" : "\",");
            System.out.println();
        }
    }

    /**
     * @param image 待识别的符号图片
     * @return
     */
    private static char recognizeSymbol(BufferedImage image) {
        int h = image.getHeight();
        int w = image.getWidth();

        int minDiff = 999999;
        char symAns = 0;
        // 对于某个给定数值
        for (int i = 0; i < 10; i++) {
            int curDiff = 0;
            for (int y = 0; y < h; y++) {
                for (int x = 0; x < w; x++) {
                    boolean pixel1 = digitals[i][y].charAt(x) == '#';
                    boolean pixel2 = isBlack(image.getRGB(x, y));
                    if (pixel1 != pixel2) {
                        ++curDiff;
                    }
                }
            }
            if (curDiff < minDiff) {
                minDiff = curDiff;
                symAns = (char) ('0' + i);
            }
            if (minDiff == 0) {
                return symAns;
            }
        }

        // 对于某个给定字母
        for (int i = 0; i < 26; i++) {
            int curDiff = 0;
            for (int y = 0; y < h; y++) {
                for (int x = 0; x < w; x++) {
                    boolean pixel1 = alphas[i][y].charAt(x) == '#';
                    boolean pixel2 = isBlack(image.getRGB(x, y));
                    if (pixel1 != pixel2) {
                        ++curDiff;
                    }
                }
            }
            if (curDiff < minDiff) {
                minDiff = curDiff;
                symAns = (char) ('a' + i);
            }
            if (minDiff == 0) {
                return symAns;
            }
        }

        return symAns;
    }

    /**
     * @param image 需要被分割的验证码
     * @return
     */
    private static List<BufferedImage> splitImage(BufferedImage image) {
        List<BufferedImage> subImgs = new ArrayList<BufferedImage>();
        subImgs.add(image.getSubimage(10, 3, 8, 12));
        subImgs.add(image.getSubimage(19, 3, 8, 12));
        subImgs.add(image.getSubimage(28, 3, 8, 12));
        subImgs.add(image.getSubimage(37, 3, 8, 12));
        return subImgs;
    }

    /**
     * @param image 待识别的验证码
     * @return
     */
    public static String recognizeCaptcha(BufferedImage image) {
        StringBuilder ans = new StringBuilder();

        List<BufferedImage> subImgs = splitImage(image);
        for (BufferedImage subImg : subImgs) {
            // 依次识别子图片
            ans.append(recognizeSymbol(subImg));
        }
        return ans.toString();
    }

    private static String[][] digitals = new String[][]{
            {
                    "........",
                    "........",
                    "........",
                    "........",
                    "........",
                    "........",
                    "........",
                    "........",
                    "........",
                    "........",
                    "........",
                    "........"
            },
            {
                    "........",
                    "........",
                    "........",
                    "........",
                    "........",
                    "........",
                    "........",
                    "........",
                    "........",
                    "........",
                    "........",
                    "........"
            },
            {
                    "..####..",
                    ".##..##.",
                    "##....##",
                    "......##",
                    ".....##.",
                    "....##..",
                    "...##...",
                    "..##....",
                    ".##.....",
                    "########",
                    "........",
                    "........"
            },
            {
                    ".#####..",
                    "##...##.",
                    "......##",
                    ".....##.",
                    "...###..",
                    ".....##.",
                    "......##",
                    "......##",
                    "##...##.",
                    ".#####..",
                    "........",
                    "........"
            },
            {
                    ".....##.",
                    "....###.",
                    "...####.",
                    "..##.##.",
                    ".##..##.",
                    "##...##.",
                    "########",
                    ".....##.",
                    ".....##.",
                    ".....##.",
                    "........",
                    "........"
            },
            {
                    "#######.",
                    "##......",
                    "##......",
                    "##.###..",
                    "###..##.",
                    "......##",
                    "......##",
                    "##....##",
                    ".##..##.",
                    "..####..",
                    "........",
                    "........"
            },
            {
                    "..####..",
                    ".##..##.",
                    "##....#.",
                    "##......",
                    "##.###..",
                    "###..##.",
                    "##....##",
                    "##....##",
                    ".##..##.",
                    "..####..",
                    "........",
                    "........"
            },
            {
                    "########",
                    "......##",
                    "......##",
                    ".....##.",
                    "....##..",
                    "...##...",
                    "..##....",
                    ".##.....",
                    "##......",
                    "##......",
                    "........",
                    "........"
            },
            {
                    "..####..",
                    ".##..##.",
                    "##....##",
                    ".##..##.",
                    "..####..",
                    ".##..##.",
                    "##....##",
                    "##....##",
                    ".##..##.",
                    "..####..",
                    "........",
                    "........"
            },
            {
                    "..####..",
                    ".##..##.",
                    "##....##",
                    "##....##",
                    ".##..###",
                    "..###.##",
                    "......##",
                    ".#....##",
                    ".##..##.",
                    "..####..",
                    "........",
                    "........"
            }
    };

    private static String[][] alphas = new String[][]{
            {
                    "........",
                    "........",
                    "........",
                    "..#####.",
                    ".##...##",
                    "......##",
                    ".#######",
                    "##....##",
                    "##...###",
                    ".####.##",
                    "........",
                    "........"
            },
            {
                    "##......",
                    "##......",
                    "##......",
                    "##.###..",
                    "###..##.",
                    "##....##",
                    "##....##",
                    "##....##",
                    "###..##.",
                    "##.###..",
                    "........",
                    "........"
            },
            {
                    "........",
                    "........",
                    "........",
                    "..#####.",
                    ".##...##",
                    "##......",
                    "##......",
                    "##......",
                    ".##...##",
                    "..#####.",
                    "........",
                    "........"
            },
            {
                    "......##",
                    "......##",
                    "......##",
                    "..###.##",
                    ".##..###",
                    "##....##",
                    "##....##",
                    "##....##",
                    ".##..###",
                    "..###.##",
                    "........",
                    "........"
            },
            {
                    "........",
                    "........",
                    "........",
                    "..####..",
                    ".##..##.",
                    "##....##",
                    "########",
                    "##......",
                    ".##...##",
                    "..#####.",
                    "........",
                    "........"
            },
            {
                    "...####.",
                    "..##..##",
                    "..##..##",
                    "..##....",
                    "..##....",
                    "######..",
                    "..##....",
                    "..##....",
                    "..##....",
                    "..##....",
                    "........",
                    "........"
            },
            {
                    "........",
                    "........",
                    "........",
                    ".#####.#",
                    "##...###",
                    "##...##.",
                    "##...##.",
                    ".#####..",
                    "##......",
                    ".######.",
                    "##....##",
                    ".######."
            },
            {
                    "##......",
                    "##......",
                    "##......",
                    "##.###..",
                    "###..##.",
                    "##....##",
                    "##....##",
                    "##....##",
                    "##....##",
                    "##....##",
                    "........",
                    "........"
            },
            {
                    "...##...",
                    "...##...",
                    "........",
                    "..###...",
                    "...##...",
                    "...##...",
                    "...##...",
                    "...##...",
                    "...##...",
                    ".######.",
                    "........",
                    "........"
            },
            {
                    ".....##.",
                    ".....##.",
                    "........",
                    "....###.",
                    ".....##.",
                    ".....##.",
                    ".....##.",
                    ".....##.",
                    ".....##.",
                    "##...##.",
                    "##...##.",
                    ".#####.."
            },
            {
                    ".##.....",
                    ".##.....",
                    ".##.....",
                    ".##..##.",
                    ".##.##..",
                    ".####...",
                    ".####...",
                    ".##.##..",
                    ".##..##.",
                    ".##...##",
                    "........",
                    "........"
            },
            {
                    "........",
                    "........",
                    "........",
                    "........",
                    "........",
                    "........",
                    "........",
                    "........",
                    "........",
                    "........",
                    "........",
                    "........"
            },
            {
                    "........",
                    "........",
                    "........",
                    "#.##.##.",
                    "##.##.##",
                    "##.##.##",
                    "##.##.##",
                    "##.##.##",
                    "##.##.##",
                    "##.##.##",
                    "........",
                    "........"
            },
            {
                    "........",
                    "........",
                    "........",
                    "##.###..",
                    "###..##.",
                    "##....##",
                    "##....##",
                    "##....##",
                    "##....##",
                    "##....##",
                    "........",
                    "........"
            },
            {
                    "........",
                    "........",
                    "........",
                    "........",
                    "........",
                    "........",
                    "........",
                    "........",
                    "........",
                    "........",
                    "........",
                    "........"
            },
            {
                    "........",
                    "........",
                    "........",
                    "##.###..",
                    "###..##.",
                    "##....##",
                    "##....##",
                    "##....##",
                    "###..##.",
                    "##.###..",
                    "##......",
                    "##......"
            },
            {
                    "........",
                    "........",
                    "........",
                    "..###.##",
                    ".##..###",
                    "##....##",
                    "##....##",
                    "##....##",
                    ".##..###",
                    "..###.##",
                    "......##",
                    "......##"
            },
            {
                    "........",
                    "........",
                    "........",
                    "##.####.",
                    ".###..##",
                    ".##.....",
                    ".##.....",
                    ".##.....",
                    ".##.....",
                    ".##.....",
                    "........",
                    "........"
            },
            {
                    "........",
                    "........",
                    "........",
                    ".######.",
                    "##....##",
                    "##......",
                    ".######.",
                    "......##",
                    "##....##",
                    ".######.",
                    "........",
                    "........"
            },
            {
                    "........",
                    "..##....",
                    "..##....",
                    "######..",
                    "..##....",
                    "..##....",
                    "..##....",
                    "..##....",
                    "..##..##",
                    "...####.",
                    "........",
                    "........"
            },
            {
                    "........",
                    "........",
                    "........",
                    "##....##",
                    "##....##",
                    "##....##",
                    "##....##",
                    "##....##",
                    ".##..###",
                    "..###.##",
                    "........",
                    "........"
            },
            {
                    "........",
                    "........",
                    "........",
                    "##....##",
                    "##....##",
                    ".##..##.",
                    ".##..##.",
                    "..####..",
                    "..####..",
                    "...##...",
                    "........",
                    "........"
            },
            {
                    "........",
                    "........",
                    "........",
                    "##....##",
                    "##....##",
                    "##.##.##",
                    "##.##.##",
                    "##.##.##",
                    "########",
                    ".##..##.",
                    "........",
                    "........"
            },
            {
                    "........",
                    "........",
                    "........",
                    "##....##",
                    ".##..##.",
                    "..####..",
                    "...##...",
                    "..####..",
                    ".##..##.",
                    "##....##",
                    "........",
                    "........"
            },
            {
                    "........",
                    "........",
                    "........",
                    "##....##",
                    "##....##",
                    "##....##",
                    "##....##",
                    "##....##",
                    ".##..###",
                    "..###.##",
                    "#.....##",
                    ".######."
            },
            {
                    "........",
                    "........",
                    "........",
                    ".######.",
                    ".....##.",
                    "....##..",
                    "...##...",
                    "..##....",
                    ".##.....",
                    ".######.",
                    "........",
                    "........"
            }
    };
}

二、动态验证码gif识别

动态验证码gif的识别和静态验证码图片的识别非常相识,两者之间唯一的区别在于gif是由多帧静态图片所构成的。所以我们处理gif的思路很简单,从特定帧的静态图片中,识别需要的验证符号。

输入验证码:

程序运行结果:

======= print and recognize captchapic  =======
"..................................................................................................................................................",
"..................................................................................................................................................",
"....................#####.........................................................................................................................",
".................###.....###......................................................................................................................",
".................#.........#......................................................................................................................",
".................#..........#.....................................................................................................................",
"................#...........#.....................................................................................................................",
"................#.#######....#..........................######....................................................................................",
"................###########.#.........................##########..................................................................................",
"................#############........................###########..................................................................................",
"................##.....#####.........................##.....#####.................................................................................",
".................##.....####................................#####.................................................................................",
"...................###.####..................................####........................#######..................................................",
"......................#####.................................#######....................##########.................................................",
".....................#####..................................####....#..................###########................................................",
".................########..................................####......#.................#.....#####................................................",
".................#######..................................####.......#........................####................................................",
".................#########...............................####.........#.......................####................................................",
".....................######.............................###.#........#...................########.................................................",
"....................#######...........................####...#.......#.................##########.................................................",
"..................##...######........................####.....##..###.................####...####.................................................",
".................#.....####..#......................####.........#...................####....####.................................................",
".................#....#####..#.....................####..............................####....####.................................................",
"..............#...##.#####.##.....................####...............................####...#####.................................................",
".............##############......................#############.......................#############................................................",
".............###########.........................#############.......................######..#####................................................",
"...............#######...........................#############........................####...#####................................................",
"..................................................................................................................................................",
"..................................................................................................................................................",
"..................................................................................................................................................",
"..................................................................................................................................................",
"..................................................................................................................................................",
".................................................................................................................................................."
<<< frame >>>
"..................................................................................................................................................",
"..................................................................................................................................................",
"..................................................................................................................................................",
"..................................................................................................................................................",
"..................................................................................................................................................",
".................................................................................................................................#####............",
"...............................................................................................................................########...........",
"........................................................######................................................................########............",
"......................................................##########.........................................##...................####...#............",
".....................................................###########........................................#..#.................####.................",
".....................................................##.....#####.......................................#..#.................####.................",
"............................................................#####......................................#....#................####.................",
".............................................................####........................#######.......#....#..............##########.............",
"............................................................####.......................##########......#....#.............##########..............",
"............................................................####.......................###########.....#....#.............##########..............",
"...........................................................####........................#.....#####.....#....#...............####..................",
"..........................................................####................................####......#..#................####..................",
".........................................................####.................................####......#..#................####..................",
"........................................................###..............................########........##................####...................",
"......................................................####.............................##########..........................####...................",
".....................................................####.............................####...####..........................####...................",
"....................................................####......................####...####....####..........................####...................",
"...................................................####......................#....#..####....####.......####...............####...................",
"..................................................####.......................#....#..####...#####.....##....###............####...................",
".................................................#############...............#.....#.#############...#.........#..........####....................",
".................................................#############...............#....#..######..#####...#.........#..........####....................",
".................................................#############...............#....#...####...#####..#...........#.........####....................",
"..............................................................................##.#..................#...........#.................................",
"................................................................................#...................#............#................................",
"....................................................................................................#...........#.................................",
".....................................................................................................#..........#.................................",
".....................................................................................................#.........#..................................",
".....................................................................................................##.......##.................................."
recognize: 32af

以下贴出gif的分割函数,其它过程和第一部分基本相同,之后逐个解析静态子图片即可。

    /**
     * @param file 需要被分割的gif文件
     * @throws Exception
     */
    private static List<BufferedImage> splitGif(File file) throws IOException {
        FileImageInputStream in = new FileImageInputStream(file);
        ImageReaderSpi readerSpi = new GIFImageReaderSpi();
        GIFImageReader gifReader = (GIFImageReader) readerSpi.createReaderInstance();
        gifReader.setInput(in);
        int num = gifReader.getNumImages(true);

        ImageWriterSpi writerSpi = new GIFImageWriterSpi();
        GIFImageWriter writer = (GIFImageWriter) writerSpi.createWriterInstance();
        List<BufferedImage> subImgs = new ArrayList<BufferedImage>();
        for (int i = 0; i < num; i++) {
            File frame = File.createTempFile("mxt", ".png");
            FileImageOutputStream out = new FileImageOutputStream(frame);
            writer.setOutput(out);
            writer.write(gifReader.read(num - i - 1));
            out.close();

            // 分割每一帧图片,进行识别
            BufferedImage image = ImageIO.read(frame);
            if (i == 1 || i == 2) {
                printImage(image);
                System.out.println("<<< frame >>>");
            }
            subImgs.add(image.getSubimage(7 + i * 36, 5, 30, 27));
        }
        in.close();
        return subImgs;
    }

参考链接

[1] https://blog.csdn.net/problc/article/details/5794460#commentBox
[2] https://blog.csdn.net/lmj623565791/article/details/23960391/
[3] https://blog.csdn.net/chwshuang/article/details/64923354

posted @ 2020-11-19 23:15  小z同学  阅读(1192)  评论(0编辑  收藏  举报