代码:
"""
Created on Fri Aug 5 17:11:50 2022
@author: koneko
"""
import requests
import docx
import re
import sqlite3
import openpyxl
def translate(keyword):
url = 'https://fanyi.baidu.com/sug'
data = {'kw': keyword}
html = requests.post(url,data)
json = html.json()
if json['data'] == []:
return []
else:
return json['data'][0]['v']
def lang_detect(keyword):
url = 'https://fanyi.baidu.com/langdetect'
data = {'query':keyword}
html = requests.post(url, data)
json = html.json()
return json['lan']
def load_docx_and_get_words(fileName):
doc = docx.Document(fileName)
text = ''
for paragraph in doc.paragraphs:
text += paragraph.text
text = text.lower()
words = re.findall(r'[A-Za-z]+', text)[:10]
words = list(set(words))
print('总共解析出'+str(len(words))+'个单词')
return words
def words_filter(words):
for i, word in enumerate(words):
print(i, word)
if len(word) <= 2:
print('remove '+ word +' for length <= 2')
words.remove(word)
continue
lan = lang_detect(word)
if lan != 'en':
print('remove '+ word + ' for not english' )
words.remove(word)
continue
print('清理后共'+str(len(words))+'个单词')
return words
def words_to_dictionary(words):
dictionary = dict()
for word in words:
trans = translate(word)
if trans == []:
remains = 3
while remains and trans == []:
trans = translate(word)
remains -= 1
if trans == []:
print(word,'找不到翻译')
continue
print(word)
print(trans)
dictionary[word] = trans
dictionary = sorted(dictionary.items(), key = lambda x:x[0])
return dict(dictionary)
def save_to_xlsx(fileName, dictionary):
wb = openpyxl.Workbook()
ws = wb.active
ws.title = 'vocabulary'
for row, item in enumerate(dictionary.items()):
ws.cell(row+1, 1).value = item[0]
ws.cell(row+1, 2).value = item[1]
wb.save(fileName+'.xlsx')
words = load_docx_and_get_words('cet4-1.docx')
words = words_filter(words,)
dictionary = words_to_dictionary(words)
save_to_xlsx('myVocabulary', dictionary)
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 被坑几百块钱后,我竟然真的恢复了删除的微信聊天记录!
· 没有Manus邀请码?试试免邀请码的MGX或者开源的OpenManus吧
· 【自荐】一款简洁、开源的在线白板工具 Drawnix
· 园子的第一款AI主题卫衣上架——"HELLO! HOW CAN I ASSIST YOU TODAY
· Docker 太简单,K8s 太复杂?w7panel 让容器管理更轻松!