from fontTools.ttLib import TTFont
import ddddocr
from io import BytesIO
from PIL import Image, ImageDraw, ImageFont
def convert_cmap_to_image(cmap_code, font_path):
img_size = 1024
img = Image.new("1", (img_size, img_size), 255) # 创建一个黑白图像对象
draw = ImageDraw.Draw(img) # 创建图对象
font = ImageFont.truetype(font_path, img_size) # 加载字体文件
character = chr(cmap_code) # 将 cmap code转换为字符
bbox = draw.textbbox((0, 0), character, font=font) # 获取文本在图像中的边界框,bbox获取的是四个值
width = bbox[2] - bbox[0] # 文本宽度
height = bbox[3] - bbox[1] # 文本高度
draw.text(((img_size - width) // 2, (img_size - height) // 2), character, font=font) # 绘制文本,居中显示
return img
def extract_text_from_font(font_path):
font = TTFont(font_path) # 加载字体文件
# font.saveXML("font.xml") # 保存XML文件(可以这么操作,但是这里我们不用)
ocr = ddddocr.DdddOcr(beta=True, show_ad=False) # 实例化
font_map = {}
for cmap_code, glyph in font['cmap'].getBestCmap().items():
image = convert_cmap_to_image(cmap_code, font_path)
bytes_io = BytesIO()
image.save(bytes_io, format='PNG')
# 识别图像
text = ocr.classification(bytes_io.getvalue())
# 生成对应名字的图片
# image.save(f'{text}.png', format='PNG')
# 最后生成映射表
# font_map[cmap_code] = text
font_map[glyph.replace("uni", "&#x").lower()] = text # 根据案例来实现的
return font_map
# font_file_path = "./font.woff"
# print(extract_text_from_font(font_file_path))