Pyhton学习分享——OCR图片识别
1.Pyhton学习分享——OCR图片识别
2.Python学习分享 —— 恶魔的快乐小屋python -m pip install --upgrade pip # 更新pip pip install setuptools # 构建和打包 pip install setuptools -i https://pypi.tuna.tsinghua.edu.cn/simple # 清华大学镜像加速 pip install --upgrade setuptools # 升级 pip install opencv-python-headless # 开源最全的python cv pip install paddlepaddle # 百度飞浆 cpu版 pip install paddlepaddle-gpu # 百度飞浆 gpu版 pip install pyinstaller # 项目打包生成可执行文件 pip install pyinstaller -i https://pypi.tuna.tsinghua.edu.cn/simple # 加速镜像 pyinstaller --onefile --add-data "models/*;models/" .\src\ocr.py # 打包文件 # --onefile 将所有内容打包成单个可执行文件 # --add-data:将模型文件夹包含到打包文件中
python 脚本
import cv2 import numpy as np from paddleocr import PaddleOCR,draw_ocr import paddleocr import os """ 检测倾斜角度 """ def detect_skew_angle(image): # 确保输入图像是灰度图像 if len(image.shape) == 3: gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) else: gray = image # 使用高斯模糊降低噪声 blurred = cv2.GaussianBlur(gray, (5, 5), 0) # 使用边缘检测 edges = cv2.Canny(blurred, 50, 150, apertureSize=3) # 使用霍夫变换检测直线 lines = cv2.HoughLinesP(edges, 1, np.pi/180, threshold=100, minLineLength=150, maxLineGap=10) # 计算所有检测到的直线的角度 angles = [] if lines is not None: for line in lines: x1, y1, x2, y2 = line[0] angle = np.degrees(np.arctan2(y2 - y1, x2 - x1)) # 将角度调整到0到180度之间 if angle < 0: angle += 180 angles.append(angle) print(f"{angles}") # 如果检测到足够的直线,计算平均角度 if len(angles) > 0: mean_angle = np.mean(angles) return mean_angle else: return 0 """ 展示图片 """ def show_image(image): cv2.namedWindow('Rotated Image', cv2.WINDOW_NORMAL) # 设置为可调整大小 cv2.imshow('Rotated Image', image) # 定义鼠标回调函数用于缩放 def on_mouse(event, x, y, flags, param): if event == cv2.EVENT_MOUSEWHEEL: if flags > 0: scale_percent = 110 # 放大10% else: scale_percent = 90 # 缩小10% width = int(image.shape[1] * scale_percent / 100) height = int(image.shape[0] * scale_percent / 100) dim = (width, height) resized = cv2.resize(image, dim, interpolation=cv2.INTER_AREA) cv2.imshow('Rotated Image', resized) cv2.setMouseCallback('Rotated Image', on_mouse) cv2.waitKey(0) cv2.destroyAllWindows() """ 获取图片并纠正 """ def correct_skew(image_path): # 读取原始彩色图像 original_img = cv2.imread(image_path) if original_img is None: print(f"Error: Unable to load image at {image_path}") return None # 转换为灰度图像用于检测 # img_gray = cv2.cvtColor(original_img, cv2.COLOR_BGR2GRAY) # 检测图像的倾斜角度 angle = detect_skew_angle(original_img) print(f"{angle}") # 如果角度接近0,则不需要校正 if abs(angle) < 10: return original_img # 计算旋转矩阵 (h, w) = original_img.shape[:2] center = (w // 2, h // 2) M = cv2.getRotationMatrix2D(center, -angle, 1.0) # 旋转图像 rotated = cv2.warpAffine(original_img, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE) return rotated """ 输出识别图片 """ def show_draw_ocr(result): boxes = [] texts = [] scores = [] for line in result: # 如[[[[13.0, 15.0], [261.0, 8.0], [262.0, 51.0], [14.0, 58.0]], ('CMIITTD', 0.8830805420875549)]] # 文本区域的框的四个顶点坐标,文本内容及置信度 for word in line: boxes.append(word[0]) texts.append(word[-1][0]) scores.append(word[-1][-1]) # 可视化识别结果 # image = draw_ocr(img, boxes, texts, scores, font_path='./path/to/chinese_font.ttf') image = draw_ocr(img, boxes, texts, scores) show_image(image) """ 整理图片识别输出 """ def out_result_list(result): # 获取结果 for line in result: # print(f"[all]:{line}") # 如[[[[13.0, 15.0], [261.0, 8.0], [262.0, 51.0], [14.0, 58.0]], ('CMIITTD', 0.8830805420875549)]] # 文本区域的框的四个顶点坐标,文本内容及置信度 for word in line: print(f"box:{word[0]} [0]:{word[-1][0]} [1]:{word[-1][1]}") # 读取图片 img_path = r'D:\XXX\xk_2.jpg' # 经营许可证 # 如果识别不准,可以添加均值或高斯模糊来去噪img = cv2.GaussianBlur(img, (5, 5), 0)#先模糊,去除噪声 img = cv2.imread(img_path) # img = cv2.GaussianBlur(img, (5, 5), 0) # 不同的去噪方式会对图片识别产生影响 if img is None: print(f"Error: Unable to load image at {img_path}") # 输出图片路径下的所有文件 print(f"Listing files in directory: {os.path.dirname(img_path)}") for filename in os.listdir(os.path.dirname(img_path)): print(filename) else: # show_image(img) # 第一次运行会自动下载模型,默认下载到工作目录的,ch是中文模型,也能检测英文 ocr = PaddleOCR( use_angle_cls=True, lang='ch', det_model_dir='./models/ch_ppocr_server_v1.1_det_infer', # 文本检测模型路径(可选) rec_model_dir='./models/ch_ppocr_server_v1.1_rec_infer', # 文本识别模型路径(可选) cls_model_dir='./models/ch_ppocr_mobile_v1.1_cls_infer' # 方向分类模型路径(可选) ) # 执行文字检测和识别 result = ocr.ocr(img) out_result_list(result) show_draw_ocr(result)
工具对比
Tesseract 识别效率尚可 开源 Java 需下载模型文件整体体积较小
Umi-OCR 工具软件 开源 python https://github.com/hiroi-sora/Umi-OCR/releases/
EasyOCR 识别效率高 开源 pyhton 需要GPU
pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu
pip install easyocr
pip install easyocr --model-storage-directory=/path/to/your/model/directory 指定模型存储位置
PaddleOCR 识别效率高 百度飞桨 python CPU版本即可 pip install paddlepaddle; pip install paddlepaddle-gpu
腾讯OCR 识别效率最高 付费 接口 智能结构化API 0.05/0.06高级 卡证识别API 0.01/条 https://cloud.tencent.com/product/ocr
docker镜像完成对整个PaddleOCR 项目的使用

# Version: 2.0.0 FROM paddlepaddle/paddle:2.6.1 # PaddleOCR base on Python3.7 RUN pip install --upgrade pip -i https://pypi.tuna.tsinghua.edu.cn/simple RUN pip install paddlehub --upgrade -i https://pypi.tuna.tsinghua.edu.cn/simple RUN pip uninstall -y astroid RUN pip install astroid==2.12.2 RUN git clone https://gitee.com/PaddlePaddle/PaddleOCR.git /PaddleOCR WORKDIR /PaddleOCR RUN pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple RUN mkdir -p /PaddleOCR/inference/ # Download orc detect model(light version). if you want to change normal version, you can change ch\_ppocr\_mobile\_v2.0\_det\_infer to ch\_ppocr\_server\_v2.0\_det\_infer, also remember change det\_model\_dir in deploy/hubserving/ocr\_system/params.py) ADD https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar /PaddleOCR/inference/ ADD https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar /PaddleOCR/inference/ ADD https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar /PaddleOCR/inference/ RUN tar xf /PaddleOCR/inference/ch_PP-OCRv3_det_infer.tar -C /PaddleOCR/inference/ RUN tar xf /PaddleOCR/inference/ch_ppocr_mobile_v2.0_cls_infer.tar -C /PaddleOCR/inference/ RUN tar xf /PaddleOCR/inference/ch_PP-OCRv3_rec_infer.tar -C /PaddleOCR/inference/ RUN pip install protobuf==3.20.0 -i https://pypi.tuna.tsinghua.edu.cn/simple # paddle2onnx 编译需要 RUN apt install -y protobuf-compiler # paddle2onnx 版本必须和paddlepaddle 版本相匹配 RUN pip install paddle2onnx==1.3.1 -i https://pypi.tuna.tsinghua.edu.cn/simple EXPOSE 8866 CMD ["/bin/bash","-c","hub install deploy/hubserving/ocr_system/ && hub serving start -m ocr_system"]
绑定端口到宿主机
docker run -d -p 9866:8866 --name python_test sha256:ace8be45d2668fb9aa4e518eabe9bd886483d023a09d95fb1b5c358a10352586
9866 宿主机端口 8866 对外暴露端口
测试访问代码

import requests import json import base64 # 接口地址 url = "http://127.0.0.1:8866/predict/ocr_system" # 准备请求数据 # 这里假设你要识别的是一张本地图片 image_path = r"D:\workerSpace\xx.jpg" with open(image_path, 'rb') as f: image_data = f.read() # 对图像数据进行 Base64 编码 image_base64 = base64.b64encode(image_data).decode('utf-8') # 构造请求体 data = { "images": [image_base64] } print(image_base64) # 发送 POST 请求 headers = {"Content-Type": "application/json"} response = requests.post(url, data=json.dumps(data), headers=headers) # 解析响应 if response.status_code == 200: result = response.json() result_str_list = [] # 获取结果 for line in result['results']: # 如[[[[13.0, 15.0], [261.0, 8.0], [262.0, 51.0], [14.0, 58.0]], ('CMIITTD', 0.8830805420875549)]] # 文本区域的框的四个顶点坐标,文本内容及置信度 for word in line: # print(f"box:{word[0]} [0]:{word[-1][0]} [1]:{word[-1][1]}") if word['confidence'] > 0.80: result_str_list.append(word['text']) print(result) print(result['results']) print(result_str_list) else: print(f"Request failed with status code {response.status_code}")
小破防今天尚未破防! :痛苦预示着超脱
本文来自博客园,作者:小破防今天尚未破防!,转载请注明原文链接:https://www.cnblogs.com/supperlhg/p/18738968
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 物流快递公司核心技术能力-地址解析分单基础技术分享
· .NET 10首个预览版发布:重大改进与新特性概览!
· AI与.NET技术实操系列(二):开始使用ML.NET
· 单线程的Redis速度为什么快?
· Pantheons:用 TypeScript 打造主流大模型对话的一站式集成库