python 判断语种类别

使用langdetect 或者langid

安装

pip install langid
or 
pip install langdetect

适用于linux系统

测试

#! /usr/bin/env python
# -*- coding: utf-8 -*-#
import langid
from langdetect import detect
from langdetect import detect_langs
from langdetect import DetectorFactory

DetectorFactory.seed = 0


def check(line):
    lang_set = set()
    for c in line.strip().split(" "):
        lang_set.add(detect(c.title()))
    return lang_set


def main(content_file):
    with open(content_file, mode="r") as f:
        for line in f:
            print(f"{line.strip()}:{detect_langs(line.strip())}")
            print(f"{line.strip()}:{langid.classify(line.strip())[0]}")
            print(check(line))


if __name__ == '__main__':
    # content_path = input("请输入文本路径:")
    content_path = r"input_folder/demo.txt"
    main(content_path)
posted @ 2021-10-13 16:30  不能说的秘密  阅读(262)  评论(0编辑  收藏  举报