Java实现验证码识别

一、准备工作
安装JDK并配置环境变量。
安装Eclipse或IntelliJ IDEA作为开发工具。
安装Selenium WebDriver库和Tesseract-OCR库。
二、打开网站并设置浏览器窗口
首先,打开浏览器并将窗口最大化,以确保每次截取的图片都是相同的大小:

java

import org.openqa.selenium.WebDriver;
import org.openqa.selenium.chrome.ChromeDriver;
import io.github.bonigarcia.wdm.WebDriverManager;

public class Main {
    public static void main(String[] args) {
        WebDriverManager.chromedriver().setup();
        WebDriver driver = new ChromeDriver();
        driver.get("https://www.example.com");
        driver.manage().window().maximize();
    }
}
三、截取带有验证码的网页内容
截取当前屏幕内容,并保存到本地:

java

import org.openqa.selenium.OutputType;
import org.openqa.selenium.TakesScreenshot;

import java.io.File;
import java.io.IOException;
import org.apache.commons.io.FileUtils;

public class Main {
    public static void main(String[] args) {
        WebDriverManager.chromedriver().setup();
        WebDriver driver = new ChromeDriver();
        driver.get("https://www.example.com");
        driver.manage().window().maximize();

File screenshot = ((TakesScreenshot) driver).getScreenshotAs(OutputType.FILE);
        try {
            FileUtils.copyFile(screenshot, new File("H://test/01.png"));
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}
四、识别图片验证码
使用 Tesseract 识别图片验证码
定位验证码在图片中的位置并截取:
java

import net.sourceforge.tess4j.Tesseract;
import net.sourceforge.tess4j.TesseractException;

import java.awt.Rectangle;
import java.awt.image.BufferedImage;
import javax.imageio.ImageIO;
import java.io.File;
import java.io.IOException;

public class Main {
    public static void main(String[] args) {
        WebDriverManager.chromedriver().setup();
        WebDriver driver = new ChromeDriver();
        driver.get("https://www.example.com");
        driver.manage().window().maximize();

File screenshot = ((TakesScreenshot) driver).getScreenshotAs(OutputType.FILE);
        try {
            FileUtils.copyFile(screenshot, new File("H://test/01.png"));
        } catch (IOException e) {
            e.printStackTrace();
        }

try {
            BufferedImage fullImg = ImageIO.read(new File("H://test/01.png"));
            BufferedImage captchaImg = fullImg.getSubimage(564, 395, 79, 28); // 验证码位置
            ImageIO.write(captchaImg, "png", new File("H://test/02.png"));

Tesseract tesseract = new Tesseract();
            tesseract.setDatapath("path/to/tessdata"); // 设置tessdata路径
            String captchaText = tesseract.doOCR(captchaImg).trim();
            System.out.println("Captcha: " + captchaText);
        } catch (IOException | TesseractException e) {
            e.printStackTrace();
        }
    }
}
五、输入账号、密码和验证码
定位账号、密码和验证码输入框,并输入相关内容:

java

import org.openqa.selenium.By;
import org.openqa.selenium.WebElement;

public class Main {
    public static void main(String[] args) {
        WebDriverManager.chromedriver().setup();
        WebDriver driver = new ChromeDriver();
        driver.get("https://www.example.com");
        driver.manage().window().maximize();

File screenshot = ((TakesScreenshot) driver).getScreenshotAs(OutputType.FILE);
        try {
            FileUtils.copyFile(screenshot, new File("H://test/01.png"));
        } catch (IOException e) {
            e.printStackTrace();
        }

try {
            BufferedImage fullImg = ImageIO.read(new File("H://test/01.png"));
            BufferedImage captchaImg = fullImg.getSubimage(564, 395, 79, 28); // 验证码位置
            ImageIO.write(captchaImg, "png", new File("H://test/02.png"));

Tesseract tesseract = new Tesseract();
            tesseract.setDatapath("path/to/tessdata"); // 设置tessdata路径
            String captchaText = tesseract.doOCR(captchaImg).trim();
            System.out.println("Captcha: " + captchaText);

WebElement username = driver.findElement(By.id("username"));
            WebElement password = driver.findElement(By.id("password_1"));
            WebElement captcha = driver.findElement(By.id("user_ck"));

username.sendKeys("your_username");
            password.sendKeys("your_password");
            captcha.sendKeys(captchaText);
        } catch (IOException | TesseractException e) {
            e.printStackTrace();
        }
    }
}
六、点击登录按钮
定位并点击登录按钮:

java

public class Main {
    public static void main(String[] args) {
        WebDriverManager.chromedriver().setup();
        WebDriver driver = new ChromeDriver();
        driver.get("https://www.example.com");
        driver.manage().window().maximize();

File screenshot = ((TakesScreenshot) driver).getScreenshotAs(OutputType.FILE);
        try {
            FileUtils.copyFile(screenshot, new File("H://test/01.png"));
        } catch (IOException e) {
            e.printStackTrace();
        }

try {
            BufferedImage fullImg = ImageIO.read(new File("H://test/01.png"));
            BufferedImage captchaImg = fullImg.getSubimage(564, 395, 79, 28); // 验证码位置
            ImageIO.write(captchaImg, "png", new File("H://test/02.png"));

Tesseract tesseract = new Tesseract();
            tesseract.setDatapath("path/to/tessdata"); // 设置tessdata路径
            String captchaText = tesseract.doOCR(captchaImg).trim();
            System.out.println("Captcha: " + captchaText);

WebElement username = driver.findElement(By.id("username"));
            WebElement password = driver.findElement(By.id("password_1"));
            WebElement captcha = driver.findElement(By.id("user_ck"));

username.sendKeys("your_username");
            password.sendKeys("your_password");
            captcha.sendKeys(captchaText);

WebElement loginButton = driver.findElement(By.name("yt0"));
            loginButton.click();
        } catch (IOException | TesseractException e) {
            e.printStackTrace();
        }
    }
}
七、关闭浏览器
最后,关闭浏览器:

java

public class Main {
    public static void main(String[] args) {
        WebDriverManager.chromedriver().setup();
        WebDriver driver = new ChromeDriver();
        driver.get("https://www.example.com");
        driver.manage().window().maximize();

File screenshot = ((TakesScreenshot) driver).getScreenshotAs(OutputType.FILE);
        try {
            FileUtils.copyFile(screenshot, new File("H://test/01.png"));
        } catch (IOException e) {
            e.printStackTrace();
        }

try {
            BufferedImage fullImg = ImageIO.read(new File("H://test/01.png"));
            BufferedImage captchaImg = fullImg.getSubimage(564, 395, 79, 28); // 验证码位置
            ImageIO.write(captchaImg, "png", new File("H://test/02.png"));

Tesseract tesseract = new Tesseract();
            tesseract.setDatapath("path/to/tessdata"); // 设置tessdata路径
            String captchaText = tesseract.doOCR(captchaImg).trim();
            System.out.println("Captcha: " + captchaText);

WebElement username = driver.findElement(By.id("username"));
            WebElement password = driver.findElement(By.id("password_1"));
            WebElement captcha = driver.findElement(By.id("user_ck"));

username.sendKeys("your_username");
            password.sendKeys("your_password");
            captcha.sendKeys(captchaText);

WebElement loginButton = driver.findElement(By.name("yt0"));
            loginButton.click();
        } catch (IOException | TesseractException e) {
            e.printStackTrace();
        } finally {
            driver.quit();
        }
    }
}更多内容访问ttocr.com或联系1436423940
八、问题和解决方案
Tesseract-OCR 报错解决方案
在使用 Tesseract 识别图片时,如果报错 tesseract-ocr 相关信息,可以通过 tesseract-ocr 下载 页面下载并安装 tesseract-ocr。

设置 Tesseract 实例的 datapath,确保其指向 tesseract-ocr 的安装路径:

Tesseract tesseract = new Tesseract();
tesseract.setDatapath("path/to/tessdata"); // 设置tessdata路径

posted @   ttocr、com  阅读(149)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· TypeScript + Deepseek 打造卜卦网站:技术与玄学的结合
· Manus的开源复刻OpenManus初探
· AI 智能体引爆开源社区「GitHub 热点速览」
· C#/.NET/.NET Core技术前沿周刊 | 第 29 期(2025年3.1-3.9)
· 从HTTP原因短语缺失研究HTTP/2和HTTP/3的设计差异
点击右上角即可分享
微信分享提示