opencv OCR检测:EAST

加载east 模型 进行 文本检测

模型下载 https://codeload.github.com/oyyd/frozen_east_text_detection.pb/zip/refs/heads/master

#coding:utf-8
import cv2
import math
############ Utility functions ############
def decode(scores, geometry, scoreThresh):
    detections = []
    confidences = []

    ############ CHECK DIMENSIONS AND SHAPES OF geometry AND scores ############
    assert len(scores.shape) == 4, "Incorrect dimensions of scores"
    assert len(geometry.shape) == 4, "Incorrect dimensions of geometry"
    assert scores.shape[0] == 1, "Invalid dimensions of scores"
    assert geometry.shape[0] == 1, "Invalid dimensions of geometry"
    assert scores.shape[1] == 1, "Invalid dimensions of scores"
    assert geometry.shape[1] == 5, "Invalid dimensions of geometry"
    assert scores.shape[2] == geometry.shape[2], "Invalid dimensions of scores and geometry"
    assert scores.shape[3] == geometry.shape[3], "Invalid dimensions of scores and geometry"
    height = scores.shape[2]
    width = scores.shape[3]
    for y in range(0, height):

        # Extract data from scores
        scoresData = scores[0][0][y]
        x0_data = geometry[0][0][y]
        x1_data = geometry[0][1][y]
        x2_data = geometry[0][2][y]
        x3_data = geometry[0][3][y]
        anglesData = geometry[0][4][y]
        for x in range(0, width):
            score = scoresData[x]

            # If score is lower than threshold score, move to next x
            if(score<scoreThresh):
                continue

            # Calculate offset
            offsetX = x * 4.0
            offsetY = y * 4.0
            angle = anglesData[x]

            # Calculate cos and sin of angle
            cosA = math.cos(angle)
            sinA = math.sin(angle)
            h = x0_data[x] + x2_data[x]
            w = x1_data[x] + x3_data[x]

            # Calculate offset
            offset = ([offsetX + cosA * x1_data[x] + sinA * x2_data[x], offsetY - sinA * x1_data[x] + cosA * x2_data[x]])

            # Find points for rectangle
            p1 = (-sinA * h + offset[0], -cosA * h + offset[1])
            p3 = (-cosA * w + offset[0],  sinA * w + offset[1])
            center = (0.5*(p1[0]+p3[0]), 0.5*(p1[1]+p3[1]))
            detections.append((center, (w,h), -1*angle * 180.0 / math.pi))
            confidences.append(float(score))

    # Return detections and confidences
    return [detections, confidences]

modelpath = "d:/downloads/frozen_east_text_detection.pb"

net = cv2.dnn.readNetFromTensorflow(modelpath)
names = net.getLayerNames()
outNames = ['feature_fusion/Conv_7/Sigmoid', 'feature_fusion/concat_3']
inputsize = (320,320)

# input need 3 channels
img = cv2.imread('d:/ocr.png',1)
height = img.shape[0]
width = img.shape[1]
rW = width / float(inputsize[0])
rH = height /float(inputsize[1])
confThreshold = 0.5
nmsThreshold = 0.4 
scalefactor = 1.0
meanval = (123.68, 116.78, 103.94)
# pre proc
blob = cv2.dnn.blobFromImage(img,scalefactor,inputsize,meanval,  True,False)
net.setInput(blob)
out = net.forward(outNames)
t,_ = net.getPerfProfile()
label  = "inference time: %.2f ms"%(t*1000.0/cv2.getTickFrequency())
print(label)
print(out[0].shape, out[1].shape)
scores = out[0]
geometry = out[1]

[boxes, confidences] = decode(scores, geometry, confThreshold)

if(1):
    frame = img
    # Apply NMS
    indices = cv2.dnn.NMSBoxesRotated(boxes, confidences, confThreshold,nmsThreshold)
    print(indices)
    for i in indices:
        # get 4 corners of the rotated rect
        vertices = cv2.boxPoints(boxes[i])
        print("vertices:", vertices)
        # scale the bounding box coordinates based on the respective ratios
        for j in range(4):
            vertices[j][0] *= rW
            vertices[j][1] *= rH
        for j in range(4):
            ri = lambda x: int(round(x))
            p1 = (ri(vertices[j][0]), ri(vertices[j][1]))
            p2 = (ri(vertices[(j + 1) % 4][0]), ri(vertices[(j + 1) % 4][1]))
            cv2.line(frame, p1, p2, (0, 255, 0), 2, cv2.LINE_AA);
            # cv.putText(frame, "{:.3f}".format(confidences[i[0]]), (vertices[0][0], vertices[0][1]), cv.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 1, cv.LINE_AA)

    # Put efficiency information
    cv2.putText(frame, label, (0, 15), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255))

    # Display the frame
    cv2.imshow("result",frame)
    cv2.waitKey(3000)
    cv2.destroyAllWindows()

 

posted @ 2024-02-05 10:46  哈库拉  阅读(154)  评论(0)    收藏  举报