红宝书词汇导出到成欧路词典单词库和生词本
记录一下红宝书数据库导出成欧路词典格式的代码
import sqlite3
import time
beginTime = time.time_ns()
dbPath = './2023.db'
outputPath = './2023RedBabyBook.txt'
# Switches for all items
settings = {
'unit': True, # 单元
'support': True, # 助记
'derivative': True, # 派生
'antonym': True, # 反义
'phrase': True, # 词组
'discrimination': True, # 辨义
'related': True, # 关联
'example': True, # 例句
'book': True # 权重
}
# The order of items that appear only once
onceItemOrder = ['support', 'derivative', 'antonym', 'phrase', 'discrimination', 'related', 'unit', 'book']
# new line symbol
nl = '<br>'
# 可选
opt = {
'unit': [lambda un: f"from Unit {un}" if settings['unit'] else ''], # 单元
'support': [lambda su: f"助记:{su}" if settings['support'] and su else ''], # 助记
'derivative': [lambda de: f"派生词:{de}" if settings['derivative'] and de else ''], # 派生
'antonym': [lambda an: f"{nl}反义词:{an}" if settings['antonym'] and an else ''], # 反义
'phrase': [lambda ph: f"词组和短语:{ph}" if settings['phrase'] and ph else ''], # 词组
'discrimination': [lambda di: f"词义辨析:{di}" if settings['discrimination'] and di else ''], # 辨义
'related': [lambda re: f"关联词:{re}" if settings['related'] and re else ''], # 关联
'example': [lambda ex: f"{nl}{ex}" if settings['example'] and ex else ''], # 例句
'book': [lambda bo: f"属于{bo}" if settings['book'] else ''] # 权重
}
# 必须
ess = [
'word', # 单词
'pos', # 词性
'meaning' # 释义
]
# 这里本来想设计一种按需读取的数据结构与算法,但是奈何学艺不精。_ 。
# TODO sql = f"SELECT {','.join([','.join(essentialItem),','.join(k for k in optionalItem if optionalItem[k][0])])} FROM words_all_info;"
sql = f"SELECT {','.join(ess)},{','.join(opt)} FROM words_all_info;"
# CURD
conn = sqlite3.connect(dbPath)
curs = conn.cursor()
data = list(curs.execute(sql))
# 在读取数据后opt自我改造一下
tuple(map(lambda i, v: opt[v].append(i), *tuple(zip(*enumerate(opt)))))
# 存放单词最终结果
wordsDic, booksDic = {}, {k: [] for k in range(1, max(i[len(ess) + opt['unit'][-1]] for i in data) + 1)}
for items in data:
word, pos, meaning = items[:3]
inf = items[3:]
res = {k: opt[k][0](inf[opt[k][1]]) for k in opt}
posMeaning = pos + ' ' + meaning
if word in wordsDic:
wordsDic[word]['pri'] += nl + posMeaning
wordsDic[word]['sec'] += res['example']
else:
wordsDic[word] = {
'pri': posMeaning,
'sec': f"{nl*4}例句:{res['example']}" if res['example'] else '',
'once': nl * 2 + (nl * 2).join([res[k] for k in onceItemOrder if res[k]])
}
booksDic[int(res['unit'][-2:])].append(word)
# 按照欧路词典格式生成词库文件
with open(outputPath, 'w', encoding='utf-8') as f:
for item in (word + '@' + ''.join([wordsDic[word][k] for k in wordsDic[word]]) + '\n' for word in wordsDic):
f.write(item)
# 按照单元来分生词本
with open('unitwords.txt', 'w', encoding='utf-8') as f:
ln = '\n'
f.write('\n'.join([f"#Unit{k:02d}{ln}{ln.join(booksDic[k])}" for k in booksDic]))
# 释放资源
curs.close()
conn.close()
endTime = time.time_ns()
print(f"一共写入{len(wordsDic)}条单词,共耗时{(endTime-beginTime)/1e9:.2f}秒")
本文来自博客园,作者:Biem,转载请注明原文链接:https://www.cnblogs.com/biem/p/16101097.html