Java OCR tesseract 图像智能字符识别技术 Java实现
Java OCR tesseract 图像智能字符识别技术 Java代码实现
接着上一篇OCR所说的,上一篇给大家介绍了tesseract 在命令行的简单用法,当然了要继承到我们的程序中,还是需要代码实现的,下面给大家分享下java实现的例子。
拿代码扫描上面的图片,然后输出结果。主要思想就是利用Java调用系统任务。
下面是核心代码:
/** * */ package cn.jorcen.dropins.tesseract; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.UnsupportedEncodingException; import java.util.LinkedList; import java.util.List; import org.apache.commons.io.IOUtils; import org.apache.log4j.Logger; /** * * * @author mjorcen * @email mjorcen@gmail.com * @dateTime Jun 19, 2014 3:42:16 PM * @version 1 */ public class TesseractOCRUtil { static Logger logger = Logger.getLogger(TesseractOCRUtil.class); static String path = "E:/data/Users/Administrator/Desktop/ocr/spelling"; public static void main(String[] args) throws Exception { File file = new File(path); String[] strs = file.list(); for (String string : strs) { File iFile = new File(path, string); if (iFile.isFile()) { parseImage(new File(file.getAbsolutePath(), string), new File( path + "/tmp", iFile.getName())); } } System.exit(0); } public static String parseImage(File file, File targetFile) throws Exception { ClearImageUtil.cleanImage(file, targetFile); return parseImageOnNoClear(targetFile); } public static String parseImageOnNoClear(File file) throws Exception { try { logger.debug("image is " + file.getAbsolutePath()); // ClearImageHelper.cleanImage(file, filename); // 构造命令 // List<String> cmd = new LinkedList<String>(); // cmd.add("tesseract"); // cmd.add(file.getAbsolutePath()); // cmd.add(file.getAbsolutePath()); // cmd.add(" "); // cmd.add("-l"); // cmd.add(" "); // cmd.add("normal"); // logger.debug(cmd); // System.out.println(cmd); // ProcessBuilder pb = new ProcessBuilder(cmd); // pb.redirectErrorStream(true); // pb.directory(new File(path)); // Process p = pb.start(); Runtime run = Runtime.getRuntime(); Process p = run.exec("cmd.exe /c tesseract " + file.getAbsolutePath() + " " + file.getAbsolutePath() + " -l normal"); getConsole(p); String sb = getResult(new File(file.getAbsolutePath() + ".txt")); return sb.toString(); } catch (Exception e) { logger.error(e); return null; } finally { } } private static String getResult(File file) throws FileNotFoundException, UnsupportedEncodingException, IOException { StringBuilder sb = new StringBuilder(); // 取得结果的输出流 InputStream resultIs = new FileInputStream(file); // 用一个读输出流类去读 InputStreamReader resultIsr = new InputStreamReader(resultIs, "utf-8"); // 用缓冲器读行 BufferedReader resultBr = new BufferedReader(resultIsr); String line; // 直到读完为止 while ((line = resultBr.readLine()) != null) { logger.debug(line); sb.append(line); } return sb.toString(); } private static void getConsole(Process p) throws UnsupportedEncodingException, IOException { // 取得命令结果的输出流 InputStream fis = p.getInputStream(); // 用一个读输出流类去读 InputStreamReader isr = new InputStreamReader(fis, "utf-8"); // 用缓冲器读行 BufferedReader br = new BufferedReader(isr); String line = null; // 直到读完为止 while ((line = br.readLine()) != null) { // System.out.println(line); } } public static void test() { try { List<String> cmd = new LinkedList<String>(); cmd.add("javac"); cmd.add("PB.java"); ProcessBuilder pb = new ProcessBuilder(cmd); pb.redirectErrorStream(true); pb.directory(new File("E:/test")); Process p = pb.start(); // 取得命令结果的输出流 InputStream fis = p.getInputStream(); // 用一个读输出流类去读 InputStreamReader isr = new InputStreamReader(fis, "utf-8"); // 用缓冲器读行 BufferedReader br = new BufferedReader(isr); String line = null; // 直到读完为止 while ((line = br.readLine()) != null) { logger.debug(line); } } catch (Exception e) { logger.error(e); } } }
结果如下:
uHx7,IXQO,\1ZYP,ZVBO,3237,5SYQ~,,87YF,\8KDN,CGPC,\c\IG\N,F\Z TA,J 9pc,Lpza,NBGC,N QW8,onwz,ox XJ,\P9FM,P PR鈥楿,QRG\I\,,RAZ v\,504i,VGPH,VPCI,\\I\M I,鈥楳J1,Y6H9\,Y OGP,
对比第一张图片, 不是很完美~哈哈 ,当然了如果你只需要实现验证码的读写,那么上面就足够了。下面继续普及图像处理的知识。
-------------------------------------------------------------------我的分割线--------------------------------------------------------------------
当然了,有时候图片被扭曲或者模糊的很厉害,很不容易识别,所以下面我给大家介绍一个去噪的辅助类, 能稍做优化,先看下效果图。
package cn.c.test3; import java.awt.Color; import java.awt.image.BufferedImage; import java.io.File; import java.io.IOException; import javax.imageio.ImageIO; public class ClearImageHelper { public static void main(String[] args) throws IOException { File testDataDir = new File("E:\\test\\code"); final String destDir = testDataDir.getAbsolutePath() + "/tmp"; for (File file : testDataDir.listFiles()) { cleanImage(file, destDir); } } /** * * @param sfile * 需要去噪的图像 * @param destDir * 去噪后的图像保存地址 * @throws IOException */ public static void cleanImage(File sfile, String destDir) throws IOException { File destF = new File(destDir); if (!destF.exists()) { destF.mkdirs(); } BufferedImage bufferedImage = ImageIO.read(sfile); int h = bufferedImage.getHeight(); int w = bufferedImage.getWidth(); // 灰度化 int[][] gray = new int[w][h]; for (int x = 0; x < w; x++) { for (int y = 0; y < h; y++) { int argb = bufferedImage.getRGB(x, y); // 图像加亮(调整亮度识别率非常高) int r = (int) (((argb >> 16) & 0xFF) * 1.1 + 30); int g = (int) (((argb >> 8) & 0xFF) * 1.1 + 30); int b = (int) (((argb >> 0) & 0xFF) * 1.1 + 30); if (r >= 255) { r = 255; } if (g >= 255) { g = 255; } if (b >= 255) { b = 255; } gray[x][y] = (int) Math .pow((Math.pow(r, 2.2) * 0.2973 + Math.pow(g, 2.2) * 0.6274 + Math.pow(b, 2.2) * 0.0753), 1 / 2.2); } } // 二值化 int threshold = ostu(gray, w, h); BufferedImage binaryBufferedImage = new BufferedImage(w, h, BufferedImage.TYPE_BYTE_BINARY); for (int x = 0; x < w; x++) { for (int y = 0; y < h; y++) { if (gray[x][y] > threshold) { gray[x][y] |= 0x00FFFF; } else { gray[x][y] &= 0xFF0000; } binaryBufferedImage.setRGB(x, y, gray[x][y]); } } // 矩阵打印 for (int y = 0; y < h; y++) { for (int x = 0; x < w; x++) { if (isBlack(binaryBufferedImage.getRGB(x, y))) { System.out.print("*"); } else { System.out.print(" "); } } System.out.println(); } ImageIO.write(binaryBufferedImage, "jpg", new File(destDir, sfile.getName())); } public static boolean isBlack(int colorInt) { Color color = new Color(colorInt); if (color.getRed() + color.getGreen() + color.getBlue() <= 300) { return true; } return false; } public static boolean isWhite(int colorInt) { Color color = new Color(colorInt); if (color.getRed() + color.getGreen() + color.getBlue() > 300) { return true; } return false; } public static int isBlackOrWhite(int colorInt) { if (getColorBright(colorInt) < 30 || getColorBright(colorInt) > 730) { return 1; } return 0; } public static int getColorBright(int colorInt) { Color color = new Color(colorInt); return color.getRed() + color.getGreen() + color.getBlue(); } public static int ostu(int[][] gray, int w, int h) { int[] histData = new int[w * h]; // Calculate histogram for (int x = 0; x < w; x++) { for (int y = 0; y < h; y++) { int red = 0xFF & gray[x][y]; histData[red]++; } } // Total number of pixels int total = w * h; float sum = 0; for (int t = 0; t < 256; t++) sum += t * histData[t]; float sumB = 0; int wB = 0; int wF = 0; float varMax = 0; int threshold = 0; for (int t = 0; t < 256; t++) { wB += histData[t]; // Weight Background if (wB == 0) continue; wF = total - wB; // Weight Foreground if (wF == 0) break; sumB += (float) (t * histData[t]); float mB = sumB / wB; // Mean Background float mF = (sum - sumB) / wF; // Mean Foreground // Calculate Between Class Variance float varBetween = (float) wB * (float) wF * (mB - mF) * (mB - mF); // Check if new maximum found if (varBetween > varMax) { varMax = varBetween; threshold = t; } } return threshold; } }
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 10年+ .NET Coder 心语,封装的思维:从隐藏、稳定开始理解其本质意义
· .NET Core 中如何实现缓存的预热?
· 从 HTTP 原因短语缺失研究 HTTP/2 和 HTTP/3 的设计差异
· AI与.NET技术实操系列:向量存储与相似性搜索在 .NET 中的实现
· 基于Microsoft.Extensions.AI核心库实现RAG应用
· 10年+ .NET Coder 心语 ── 封装的思维:从隐藏、稳定开始理解其本质意义
· 地球OL攻略 —— 某应届生求职总结
· 提示词工程——AI应用必不可少的技术
· Open-Sora 2.0 重磅开源!
· 字符编码:从基础到乱码解决