字体图片批量生成-字体识别模型数据

众所周知，我们的文字有各种字体，字体通过字体文件方式供操作系统使用，在需要使用字体图片的场景，我们如何快速生成呢？

这篇文章介绍下，如何通过操作系统自带的字体文件，利用python的pillow包快速生成字体图片。

各操作系统字体文件路径
windows\linux\macos：

dirs = []
if sys.platform == "win32":
    # check the windows font repository
    # NOTE: must use uppercase WINDIR, to work around bugs in
    # 1.5.2's os.environ.get()
    windir = os.environ.get("WINDIR")
    if windir:
        dirs.append(os.path.join(windir, "fonts"))
elif sys.platform in ("linux", "linux2"):
    data_home = os.environ.get("XDG_DATA_HOME")
    if not data_home:
        # The freedesktop spec defines the following default directory for
        # when XDG_DATA_HOME is unset or empty. This user-level directory
        # takes precedence over system-level directories.
        data_home = os.path.expanduser("~/.local/share")
    xdg_dirs = [data_home]

    data_dirs = os.environ.get("XDG_DATA_DIRS")
    if not data_dirs:
        # Similarly, defaults are defined for the system-level directories
        data_dirs = "/usr/local/share:/usr/share"
    xdg_dirs += data_dirs.split(":")

    dirs += [os.path.join(xdg_dir, "fonts") for xdg_dir in xdg_dirs]
elif sys.platform == "darwin":
    dirs += [
        "/Library/Fonts",
        "/System/Library/Fonts",
        os.path.expanduser("~/Library/Fonts"),
    ]

pillow生成图片


import os
import random

import nltk
from PIL import Image, ImageDraw, ImageFont

# Download the necessary data from nltk
nltk.download('inaugural')

def wrap_text(text, line_length=4):
    """Wraps the provided text every 'line_length' words."""
    words = text.split()
    return "\n".join([" ".join(words[i:i + line_length]) for i in range(0, len(words), line_length)])


def random_prose_text(line_length=4):
    """Returns a random snippet from the Gutenberg corpus."""
    corpus = nltk.corpus.inaugural.raw()
    start = random.randint(0, len(corpus) - 800)
    end = start + 800
    return wrap_text(corpus[start:end], line_length=line_length)


def gen_images():
    # get font name and font files
    font_files = []
    for font_dir in dirs:
        for font_file in os.listdir(font_dir):
            if font_file.endswith('.ttf') or font_file.endswith('.ttc'):
                font_path = os.path.join(font_dir, font_file)
                font_name = font_file.split('.')[0]
                font_files.append((font_path, font_name))

    # Generate images for each font file
    for font_path, font_name in font_files:
        # Output the font name so we can see the progress
        print(font_path, font_name)

        # Counter for the image filename
        j = 0
        for i in range(IMAGES_PER_FONT):  # Generate 50 images per font - reduced to 10 for now to make things faster
            # Random font size
            font_size = random.choice(range(18, 72))

            if font_path.endswith('.ttc'):
                # ttc fonts have multiple fonts in one file, so we need to specify which one we want
                font = ImageFont.truetype(font_path, font_size, index=0)
            elif font_name in FONT_EXCEPTS:
                continue
            else:
                # ttf fonts have only one font in the file
                font = ImageFont.truetype(font_path, font_size)

            # Determine the number of words that will fit on a line
            font_avg_char_width = font.getbbox('x')[2]
            words_per_line = int(800 / (font_avg_char_width * 5))
            prose_sample = random_prose_text(line_length=words_per_line)

            # print("generate font image: " + str(prose_sample))
            for text in [prose_sample]:
                img = Image.new('RGB', (800, 400), color="white")  # Canvas size
                draw = ImageDraw.Draw(img)

                # Random offsets, but ensuring that text isn't too far off the canvas
                offset_x = random.randint(-20, 10)
                offset_y = random.randint(-20, 10)

                # vary the line height
                line_height = random.uniform(0, 1.25) * font_size
                draw.text((offset_x, offset_y), text, fill="black", font=font, spacing=line_height)

                j += 1
                output_file = os.path.join(GEN_IMAGES_DIR, f"{font_name}_{j}.png")
                img.save(output_file)

源代码都记录在这里了：
https://github.com/chenzuoli/font-identifier

本代码参考开源项目：https://huggingface.co/gaborcselle/font-identifier

好了，记录到这里，持续更新中。

记录问题也是一种修行。

欢迎关注微信公众号，你的资源可变现：【乐知付加密平台】

欢迎关注微信公众号，这里记录博主的创业之旅：【程序员写书】

一起学习，一起进步。

posted @ 2024-12-09 23:49 陈作立的博客阅读(128) 评论(0) 收藏举报

刷新页面返回顶部

字体图片批量生成-字体识别模型数据

公告