Pyhton学习分享——OCR图片识别

复制代码
python -m pip install --upgrade pip # 更新pip
pip install setuptools # 构建和打包
pip install setuptools -i https://pypi.tuna.tsinghua.edu.cn/simple   # 清华大学镜像加速
pip install --upgrade setuptools  # 升级
pip install opencv-python-headless # 开源最全的python cv
pip install paddlepaddle # 百度飞浆 cpu版
pip install paddlepaddle-gpu # 百度飞浆 gpu版

pip install pyinstaller # 项目打包生成可执行文件
pip install pyinstaller -i https://pypi.tuna.tsinghua.edu.cn/simple # 加速镜像
pyinstaller --onefile --add-data "models/*;models/" .\src\ocr.py # 打包文件
        # --onefile 将所有内容打包成单个可执行文件
        # --add-data:将模型文件夹包含到打包文件中
复制代码

python 脚本

复制代码
import cv2
import numpy as np
from paddleocr import PaddleOCR,draw_ocr
import paddleocr
import os

"""
检测倾斜角度
"""
def detect_skew_angle(image):
    # 确保输入图像是灰度图像
    if len(image.shape) == 3:
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    else:
        gray = image

    # 使用高斯模糊降低噪声
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)

    # 使用边缘检测
    edges = cv2.Canny(blurred, 50, 150, apertureSize=3)

    # 使用霍夫变换检测直线
    lines = cv2.HoughLinesP(edges, 1, np.pi/180, threshold=100, minLineLength=150, maxLineGap=10)

    # 计算所有检测到的直线的角度
    angles = []
    if lines is not None:
        for line in lines:
            x1, y1, x2, y2 = line[0]
            angle = np.degrees(np.arctan2(y2 - y1, x2 - x1))
            # 将角度调整到0到180度之间
            if angle < 0:
                angle += 180
            angles.append(angle)

    print(f"{angles}")
    # 如果检测到足够的直线,计算平均角度
    if len(angles) > 0:
        mean_angle = np.mean(angles)
        return mean_angle
    else:
        return 0

"""
展示图片
"""
def show_image(image):
    cv2.namedWindow('Rotated Image', cv2.WINDOW_NORMAL)  # 设置为可调整大小
    cv2.imshow('Rotated Image', image)

    # 定义鼠标回调函数用于缩放
    def on_mouse(event, x, y, flags, param):
        if event == cv2.EVENT_MOUSEWHEEL:
            if flags > 0:
                scale_percent = 110  # 放大10%
            else:
                scale_percent = 90   # 缩小10%
            width = int(image.shape[1] * scale_percent / 100)
            height = int(image.shape[0] * scale_percent / 100)
            dim = (width, height)
            resized = cv2.resize(image, dim, interpolation=cv2.INTER_AREA)
            cv2.imshow('Rotated Image', resized)

    cv2.setMouseCallback('Rotated Image', on_mouse)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

"""
获取图片并纠正
"""
def correct_skew(image_path):
    # 读取原始彩色图像
    original_img = cv2.imread(image_path)
    if original_img is None:
        print(f"Error: Unable to load image at {image_path}")
        return None

    # 转换为灰度图像用于检测
    # img_gray = cv2.cvtColor(original_img, cv2.COLOR_BGR2GRAY)

    # 检测图像的倾斜角度
    angle = detect_skew_angle(original_img)

    print(f"{angle}")

    # 如果角度接近0,则不需要校正
    if abs(angle) < 10:
        return original_img

    # 计算旋转矩阵
    (h, w) = original_img.shape[:2]
    center = (w // 2, h // 2)
    M = cv2.getRotationMatrix2D(center, -angle, 1.0)

    # 旋转图像
    rotated = cv2.warpAffine(original_img, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)

    return rotated

"""
输出识别图片
"""
def show_draw_ocr(result):
    boxes = []
    texts = []
    scores = []
    for line in result:
        # 如[[[[13.0, 15.0], [261.0, 8.0], [262.0, 51.0], [14.0, 58.0]], ('CMIITTD', 0.8830805420875549)]]
        # 文本区域的框的四个顶点坐标,文本内容及置信度
        for word in line:
            boxes.append(word[0])
            texts.append(word[-1][0])
            scores.append(word[-1][-1])

    # 可视化识别结果
    # image = draw_ocr(img, boxes, texts, scores, font_path='./path/to/chinese_font.ttf')
    image = draw_ocr(img, boxes, texts, scores)
    show_image(image)

"""
整理图片识别输出
"""
def out_result_list(result):
    # 获取结果
    for line in result:
        # print(f"[all]:{line}")
        # 如[[[[13.0, 15.0], [261.0, 8.0], [262.0, 51.0], [14.0, 58.0]], ('CMIITTD', 0.8830805420875549)]]
        # 文本区域的框的四个顶点坐标,文本内容及置信度
        for word in line:
            print(f"box:{word[0]}  [0]:{word[-1][0]}    [1]:{word[-1][1]}")

# 读取图片
img_path = r'D:\XXX\xk_2.jpg' # 经营许可证

# 如果识别不准,可以添加均值或高斯模糊来去噪img = cv2.GaussianBlur(img, (5, 5), 0)#先模糊,去除噪声
img = cv2.imread(img_path)
# img = cv2.GaussianBlur(img, (5, 5), 0) # 不同的去噪方式会对图片识别产生影响
if img is None:
    print(f"Error: Unable to load image at {img_path}")
    # 输出图片路径下的所有文件
    print(f"Listing files in directory: {os.path.dirname(img_path)}")
    for filename in os.listdir(os.path.dirname(img_path)):
        print(filename)
else:
    # show_image(img)
    # 第一次运行会自动下载模型,默认下载到工作目录的,ch是中文模型,也能检测英文
    ocr = PaddleOCR(
        use_angle_cls=True,
        lang='ch',
        det_model_dir='./models/ch_ppocr_server_v1.1_det_infer',  # 文本检测模型路径(可选)
        rec_model_dir='./models/ch_ppocr_server_v1.1_rec_infer',  # 文本识别模型路径(可选)
        cls_model_dir='./models/ch_ppocr_mobile_v1.1_cls_infer'   # 方向分类模型路径(可选)
    )
    # 执行文字检测和识别
    result = ocr.ocr(img)
    out_result_list(result)
    show_draw_ocr(result)
复制代码

 

工具对比

Tesseract 识别效率尚可 开源 Java 需下载模型文件整体体积较小
Umi-OCR 工具软件 开源 python https://github.com/hiroi-sora/Umi-OCR/releases/
EasyOCR 识别效率高 开源 pyhton 需要GPU
    pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu
    pip install easyocr
    pip install easyocr --model-storage-directory=/path/to/your/model/directory 指定模型存储位置
PaddleOCR 识别效率高 百度飞桨 python CPU版本即可 pip install paddlepaddle; pip install paddlepaddle-gpu
腾讯OCR 识别效率最高 付费 接口 智能结构化API 0.05/0.06高级 卡证识别API 0.01/条 https://cloud.tencent.com/product/ocr



docker镜像完成对整个PaddleOCR 项目的使用

复制代码
# Version: 2.0.0
FROM paddlepaddle/paddle:2.6.1

# PaddleOCR base on Python3.7
RUN pip install --upgrade pip -i https://pypi.tuna.tsinghua.edu.cn/simple

RUN pip install paddlehub --upgrade -i https://pypi.tuna.tsinghua.edu.cn/simple

RUN pip uninstall -y astroid

RUN pip install astroid==2.12.2

RUN git clone https://gitee.com/PaddlePaddle/PaddleOCR.git /PaddleOCR

WORKDIR /PaddleOCR

RUN pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple

RUN mkdir -p /PaddleOCR/inference/

# Download orc detect model(light version). if you want to change normal version, you can change ch\_ppocr\_mobile\_v2.0\_det\_infer to ch\_ppocr\_server\_v2.0\_det\_infer, also remember change det\_model\_dir in deploy/hubserving/ocr\_system/params.py)
ADD https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar /PaddleOCR/inference/
ADD https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar /PaddleOCR/inference/
ADD https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar /PaddleOCR/inference/


RUN tar xf /PaddleOCR/inference/ch_PP-OCRv3_det_infer.tar -C /PaddleOCR/inference/
RUN tar xf /PaddleOCR/inference/ch_ppocr_mobile_v2.0_cls_infer.tar -C /PaddleOCR/inference/
RUN tar xf /PaddleOCR/inference/ch_PP-OCRv3_rec_infer.tar -C /PaddleOCR/inference/

RUN pip install protobuf==3.20.0 -i https://pypi.tuna.tsinghua.edu.cn/simple

# paddle2onnx 编译需要
RUN apt install -y protobuf-compiler
# paddle2onnx 版本必须和paddlepaddle 版本相匹配
RUN pip install paddle2onnx==1.3.1 -i https://pypi.tuna.tsinghua.edu.cn/simple

EXPOSE 8866

CMD ["/bin/bash","-c","hub install deploy/hubserving/ocr_system/ && hub serving start -m ocr_system"]
View Code
复制代码

绑定端口到宿主机

docker run -d -p 9866:8866 --name python_test sha256:ace8be45d2668fb9aa4e518eabe9bd886483d023a09d95fb1b5c358a10352586
9866 宿主机端口 8866 对外暴露端口
测试访问代码
复制代码
import requests
import json
import base64

# 接口地址
url = "http://127.0.0.1:8866/predict/ocr_system"

# 准备请求数据
# 这里假设你要识别的是一张本地图片
image_path = r"D:\workerSpace\xx.jpg"
with open(image_path, 'rb') as f:
    image_data = f.read()
    # 对图像数据进行 Base64 编码
    image_base64 = base64.b64encode(image_data).decode('utf-8')

# 构造请求体
data = {
    "images": [image_base64]
}
print(image_base64)

# 发送 POST 请求
headers = {"Content-Type": "application/json"}
response = requests.post(url, data=json.dumps(data), headers=headers)

# 解析响应
if response.status_code == 200:
    result = response.json()
    result_str_list = []
    # 获取结果
    for line in result['results']:
        # 如[[[[13.0, 15.0], [261.0, 8.0], [262.0, 51.0], [14.0, 58.0]], ('CMIITTD', 0.8830805420875549)]]
        # 文本区域的框的四个顶点坐标,文本内容及置信度
        for word in line:
            # print(f"box:{word[0]}  [0]:{word[-1][0]}    [1]:{word[-1][1]}")
            if word['confidence'] > 0.80:
                result_str_list.append(word['text'])
    print(result)
    print(result['results'])
    print(result_str_list)
else:
    print(f"Request failed with status code {response.status_code}")
View Code
复制代码

 

posted @   小破防今天尚未破防!  阅读(2)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· 物流快递公司核心技术能力-地址解析分单基础技术分享
· .NET 10首个预览版发布:重大改进与新特性概览!
· AI与.NET技术实操系列(二):开始使用ML.NET
· 单线程的Redis速度为什么快?
· Pantheons:用 TypeScript 打造主流大模型对话的一站式集成库
跟随粒子特效
点击右上角即可分享
微信分享提示