图片文字自管理敏感词集成到文章审核中
1、在heima-leadnews-common中创建工具类,封装一下tess4j
package com.heima.common.tess4j; import net.sourceforge.tess4j.Tesseract; import net.sourceforge.tess4j.TesseractException; import java.awt.image.BufferedImage; public class Tess4jClient { private String dataPath; private String language; public String doOCR(BufferedImage image) throws TesseractException { //创建Tesseract对象 Tesseract tesseract = new Tesseract(); //设置字体库路径 tesseract.setDatapath(dataPath); //中文识别 tesseract.setLanguage(language); //执行OCR识别 String result = tesseract.doOCR(image); //替换回车和Tab键,使结果为一行 result = result.replaceAll("\\r/\\n", "-").replaceAll(" ", ""); return result; } }
2、修改审核文章中的图片审核逻辑
将图片敏感词审核集成到图片审核中。
try { for (String image : images) { byte[] bytes = fileStorageService.downLoadFile(image); //byte[] 转换为bufferedImage ByteArrayInputStream in = new ByteArrayInputStream(bytes); BufferedImage bufferedImage = ImageIO.read(in); //图片文字识别 String result = tess4jClient.doOCR(bufferedImage); //过滤文字 boolean isSensitive = handleSensitiveScan(result, wmNews); if(!isSensitive){ return isSensitive; } imageList.add(bytes); } }catch (Exception e){ e.printStackTrace(); }