ocr识别

步骤分两步:

第一步先将图片摆正,输出扫描版(应用透视变换)

第二步用pytesseract识别即可

 

 

 

 

main.py

import torch
import numpy as np
import cv2
import math
import test


def show(img):
    cv2.imshow('name', img)
    cv2.waitKey(0)
    cv2.destroyAllWindows()


def pers_transform(img, coor):
    coor = sorted(coor, key = lambda a : (a[0], -a[1])) #根据第0个元素升序,若相同则根据第二个降序
    lx1, lx2, rx1, rx2 = coor
    scoor = np.array([lx1, lx2, rx1, rx2], dtype = np.float32)
    w1 = math.sqrt(math.pow((lx1[0] - rx1[0]), 2) + math.pow((lx1[1] - rx1[1]), 2))
    w2 = math.sqrt(math.pow((lx2[0] - rx2[0]), 2) + math.pow((lx2[1] - rx2[1]), 2))
    w = int(max(w1, w2))  #w和h均取大的那一个,因为近似轮廓可能只是个四边形,不是矩形
    h1 = math.sqrt(math.pow((lx1[0] - lx2[0]), 2) + math.pow((lx1[1] - lx2[1]), 2))
    h2 = math.sqrt(math.pow((rx1[0] - rx2[0]), 2) + math.pow((rx1[1] - rx2[1]), 2))
    h = int(max(h1, h2))
    dcoor = np.array([[0, 0], [0, h], [w, 0], [w, h]], dtype = np.float32) #摆正后的坐标,顺序与原坐标相对应
    trans_m = cv2.getPerspectiveTransform(scoor, dcoor) #获取透视变换矩阵
    return cv2.warpPerspective(img, trans_m, (w, h))    #传入图像、矩阵、宽和长,返回变换完成的图像




if __name__ == '__main__':
    dst = 'C:/Users/Dell/PycharmProjects/ExtractText/scan.jpg'
    img = cv2.imread('C:/Users/Dell/PycharmProjects/ExtractText/images/receipt.jpg')
    ratio = img.shape[0] / 600
    img_k = cv2.resize(img, (int(img.shape[1] / (img.shape[0] / 1000)), 1000))
    img_gray = cv2.cvtColor(img_k, cv2.COLOR_BGR2GRAY)
    img_canny = cv2.Canny(img_gray, 200, 255)
#img_bin = cv2.threshold(img_canny, 200, 255, cv2.THRESH_BINARY)[1]
    contours = cv2.findContours(img_canny, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)[1]
    contours = sorted(contours, key = cv2.contourArea, reverse = True)
    img_k_copy = img_k.copy()
    peri = cv2.arcLength(contours[0], True)
    approx = cv2.approxPolyDP(contours[0], 0.02 * peri, True)
    cv2.drawContours(img_k_copy, [approx], -1, (0, 0, 255), 2)
    img_pers = pers_transform(img_gray, approx.reshape(4, 2)) #轮廓是三维的,所以reshape一下,可以变成2维
    img_pers_bin = cv2.threshold(img_pers, 150, 255, cv2.THRESH_BINARY)[1]
    cv2.imwrite('scan.jpg', img_pers_bin)
    print('Accepted')
    test.scan(dst)

 

test.py

from PIL import Image
import cv2
import numpy as np
import pytesseract

def show(img):
    cv2.imshow('name', img)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

def scan(dst):
    img = cv2.imread(dst)
    # img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    # img_bin = cv2.threshold(img_gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]

    img_k = Image.open(dst)
    text = pytesseract.image_to_string(img_k)
    print(text)
    show(img)

 

posted @ 2021-09-16 09:46  WTSRUVF  阅读(207)  评论(0编辑  收藏  举报