Python基于 BaseHTTPRequestHandler 创建简单Web服务

启动一个最基础的 WEB 服务

创建文件 server.py

# Python 3 server example
from http.server import BaseHTTPRequestHandler, HTTPServer

hostName = "localhost"
serverPort = 8080

class MyServer(BaseHTTPRequestHandler):
    def do_GET(self):
        self.send_response(200)
        self.send_header("Content-type", "text/html")
        self.end_headers()
        self.wfile.write(bytes("<html><body><p>Request: %s</p></body></html>" % self.path, "utf-8"))

if __name__ == "__main__":
    webServer = HTTPServer((hostName, serverPort), MyServer)
    print("Server started http://%s:%s" % (hostName, serverPort))

    try:
        webServer.serve_forever()
    except KeyboardInterrupt:
        pass

    webServer.server_close()
    print("Server stopped.")

启动命令

python3 server.py

区分访问路径

do_GET方法内, 使用 self.path 变量区分

def do_GET(self):
    if self.path == '/':
        self.send_response(200)
        self.send_header("Content-type", "text/html")
        self.end_headers()
        self.wfile.write(bytes("<html><body><p>Request: %s</p></body></html>" % self.path, "utf-8"))
    elif self.path == '/upload':
        self.send_response(200)
        self.send_header("Content-type", "text/html")
        self.end_headers()
        self.wfile.write(bytes("<html><body><p>Request: %s</p></body></html>" % self.path, "utf-8"))

处理 POST 请求

实现do_POST方法

def do_POST(self):
    content_length = int(self.headers['Content-Length'])
    file_content = self.rfile.read(content_length)

    # Do what you wish with file_content
    #print file_content

    # Respond with 200 OK
    self.send_response(200)
    self.send_header("Content-type", "text/html")
    self.end_headers()
    self.wfile.write(bytes("<html><body><p>Request: %s</p></body></html>" % self.path, "utf-8"))

处理请求参数和 COOKIE 等

添加以下引用

from functools import cached_property
from http.cookies import SimpleCookie
from urllib.parse import parse_qsl, urlparse

在 MyServer 类下添加以下处理方法

    @cached_property
    def url(self):
        return urlparse(self.path)

    @cached_property
    def query_data(self):
        return dict(parse_qsl(self.url.query))

    @cached_property
    def post_data(self):
        content_length = int(self.headers.get("Content-Length", 0))
        return self.rfile.read(content_length)

    @cached_property
    def form_data(self):
        return dict(parse_qsl(self.post_data.decode("utf-8")))

    @cached_property
    def cookies(self):
        return SimpleCookie(self.headers.get("Cookie"))

处理 Multipart 文件上传

需要引入

from urllib.parse import parse_qs, parse_qsl, urlparse
import cgi

对请求根据 content-type 分别处理

    def parse_POST(self):
        print(self.headers)
        ctype, pdict = cgi.parse_header(self.headers['content-type'])

        if ctype == 'multipart/form-data':
            print("file request")
            pdict['boundary'] = bytes(pdict['boundary'], "utf-8")
            postvars = cgi.parse_multipart(self.rfile, pdict)

        elif ctype == 'application/x-www-form-urlencoded' or 'application/json':   
            print("non-file request")
            length = int(self.headers['content-length'])
            postvars = parse_qs(
                self.rfile.read(length).decode('utf8'),
                keep_blank_values=1)

        elif ctype == 'application/octet-stream':
            print("octet stream header")
            postvars = {}

        else:
            print("nothing")
            postvars = {}
            a = self.rfile
            print(dir(a))
            print(a.peek())
        return postvars

do_POST 中调用

    def do_POST(self):
        postvars = self.parse_POST()
        print(postvars)

一个接收文件并调用 PaddleOCR 识别的WEB服务例子

server.py

from http.server import BaseHTTPRequestHandler, HTTPServer
from urllib.parse import parse_qs, parse_qsl, urlparse
import cgi
import json
import paddleocr_helper

hostName = "localhost"
serverPort = 8080

class MyServer(BaseHTTPRequestHandler):

    def parse_POST(self):
        print(self.headers)
        ctype, pdict = cgi.parse_header(self.headers['content-type'])

        if ctype == 'multipart/form-data':
            print("file request")
            pdict['boundary'] = bytes(pdict['boundary'], "utf-8")
            postvars = cgi.parse_multipart(self.rfile, pdict)

        elif ctype == 'application/x-www-form-urlencoded' or 'application/json':   
            print("non-file request")
            length = int(self.headers['content-length'])
            postvars = parse_qs(
                self.rfile.read(length).decode('utf8'),
                keep_blank_values=1)

        elif ctype == 'application/octet-stream':
            print("octet stream header")
            postvars = {}

        else:
            print("nothing")
            postvars = {}
            a = self.rfile
            print(dir(a))
            print(a.peek())
        return postvars

    def do_GET(self):
        if self.path == '/':
            self.send_response(200)
            self.send_header("Content-type", "text/html")
            self.end_headers()
            self.wfile.write(bytes("<html><body><p>Request: %s</p></body></html>\r\n" % self.path, "utf-8"))

    def do_POST(self):
        postvars = self.parse_POST()
        #print(postvars)
        #print(type(postvars['file']))
        result = {}
        try:
            result = paddleocr_helper.parse_and_lookup(postvars['file'][0])
        except:
            print("Error occurred")
            result = {
                "code": 1,
                "message": "error",
                "data": None
            }
            pass
        # Respond with 200 OK
        self.send_response(200)
        self.send_header("Content-type", "application/json")
        self.end_headers()
        #self.wfile.write(bytes("<html><body><p>Request: %s</p></body></html>\r\n" % self.path, "utf-8"))
        self.wfile.write(bytes(json.dumps(result), "utf-8"))


if __name__ == "__main__":
    webServer = HTTPServer((hostName, serverPort), MyServer)
    print("Server started http://%s:%s" % (hostName, serverPort))

    try:
        webServer.serve_forever()
    except KeyboardInterrupt:
        pass

    webServer.server_close()
    print("Server stopped.")

paddleocr_helper.py

import json
import re
import math
from paddleocr import PaddleOCR


def parse_image(imagedata):
    ocr = PaddleOCR(show_log=False, use_angle_cls=True, lang="ch") # need to run only once to download and load model into memory
    result = ocr.ocr(imagedata, cls=True)
    outputs = []
    for idx in range(len(result)):
        res = result[idx]
        output = []
        for line in res:
            ocr_result = {
                'boxes' : line[0],
                'text'  : line[1][0],
                'score' : line[1][1]
            }
            output.append(ocr_result)
        outputs.append(output)
    return outputs

def lookup_invoice_number(ocr_blocks):
    block_no = None
    block_numbers = []
    for ocr_block in ocr_blocks:
        #print(ocr_block['text'])
        regex_result = re.compile('(票据号码|柔据号码|桑据号码|柔线号码|柔楼号码|系热号码):(\d+)').search(ocr_block['text'])
        if not regex_result is None:
            #print('----> ' + regex_result.group(2))
            return regex_result.group(2)
        regex_result = re.compile('N(?:.*?)(\d{8,})').search(ocr_block['text'])
        if not regex_result is None:
            #print('----> ' + regex_result.group(1))
            return regex_result.group(1)
        if re.match('No(\.|:|:)', ocr_block['text']):
            #print('- No. block: {}'.format(ocr_block['text']))
            block_no = ocr_block
        regex_result = re.compile('(\d{8,})').search(ocr_block['text'])
        if not regex_result is None:
            #print('- Num block: {}'.format(regex_result.group(1)))
            ocr_block['text'] = regex_result.group(1)
            block_numbers.append(ocr_block)
    if not block_no is None and not len(block_numbers) == 0:
        #print('- block_no:{}'.format(block_no))
        distance_min = None
        candidate = None
        for block_number in block_numbers:
            # calculate distance between number and No
            distance = calcu_distance(block_no['boxes'], block_number['boxes'])
            #print('- dist:{}, block:{}'.format(distance, block_number))
            print('- dist:{}, block:{}'.format(distance, block_number['text']))
            if (distance_min is None) or (distance < distance_min):
                distance_min = distance
                candidate = block_number['text']
        return candidate
    return None


def parse_and_lookup(imagedata):
    invoice_numbers = []
    ocr_result = parse_image(imagedata)
    if len(ocr_result) > 0:
        for ocr_blocks in ocr_result:
            invoice_number = lookup_invoice_number(ocr_blocks)
            if (not invoice_number is None):
                invoice_numbers.append(invoice_number)
    resp = {
        "code": 0,
        "message": "succ",
        "data": {
            "invoice_numbers": invoice_numbers
        }
    }
    return resp

def calcu_distance(boxes1, boxes2):
    distance_min = None
    box1 = None
    for box in boxes1:
        distance = point_distance(box, boxes2[0])
        if (distance_min is None) or (distance < distance_min):
            distance_min = distance
            box1 = box
    for box in boxes2:
        distance = point_distance(box, box1)
        if (distance_min is None) or (distance < distance_min):
            distance_min = distance
    return distance_min

def point_distance(point1, point2):
    x = point1[0] - point2[0]
    y = point1[1] - point2[1]
    qrt = math.sqrt(x**2 + y**2)
    return qrt

参考

posted on 2024-03-27 01:08  Milton  阅读(227)  评论(0编辑  收藏  举报

导航