wordsinasentence.com 单词英英翻译 17987个 含获取nodejs代码
wordsinasentence.com 单词英英翻译 17987个
这个网站的英英单词翻译非常不错,汇总成markdown,方便查询
官方在线查询地址
https://wordsinasentence.com/vocabulary-word-list/
由于直接贴上来,页面就崩了,所以改为附件了,本地查阅吧
https://files.cnblogs.com/files/pengchenggang/wordsinasentence-wordsArr.zip?t=1722836509&download=true
wordsinasentence.js
// wordsinasentence.com 单词的英语翻译
const { getPageListUrl, loadData, getTxt, saveData } = require('../getHtmlUtils.js')
const axios = require("axios")
const startNum = 1
const endNum = 37
const rootPath = 'wordsIn/' // 根目录 目录请手工创建
const folderPath = 'wordsinasentence/' // 目录请手工创建 下次叫list
const detailPath = 'details/' // 目录请手工创建
const step1 = () => {
// 1. 获取单词列表的页面
const pageListUrl = 'https://wordsinasentence.com/vocabulary-word-list/?_page=$num'
// 获取全部列表页
getPageListUrl(pageListUrl, startNum, endNum, rootPath + folderPath)
}
const step2 = () => {
// 获取本地文件,解析其中的详情页连接,并将结果放到一个数组
const allArr = []
for (let i = startNum; i <= endNum; i++) {
const data = loadData(rootPath + folderPath + i + '.html')
const txt = dataFilter(data) // 对数据进行一次过滤
const arr = txtReg(txt) // 获取页面中的链接和单词的数组
allArr.push(...arr)
}
// console.log(allArr)
saveData(rootPath + 'linkArr.json', JSON.stringify(allArr))
}
const txtReg = txt => {
const regex = /<a href="([^"]+)">([^<]+)<\/a>/gs
var matches = [...txt.matchAll(regex)]
return matches.map(match => {
return [match[1], match[2].trim()]
})
}
const dataFilter = data => {
const mainStartPosition = `<div class="pt-cv-wrapper"><div class="pt-cv-view`
const mainEndPosition = `<div class="text-left pt-cv-pagination-wrapper"><ul`
return getTxt(mainStartPosition, mainEndPosition, data)
}
// step2()
const step3 = () => { // 获取详情页
const linkArr = JSON.parse(loadData(rootPath + 'linkArr.json'))
getLinkManager(linkArr, 499)
}
let currIndex = 0
const getLinkManager = (linkArr, index) => {
currIndex = index
// 创建5个线程 加快速度
for (let i = 0; i < 20; i++) {
currIndex += 1
getLink(linkArr, currIndex)
}
}
const getLink = (linkArr, index) => {
if (index >= linkArr.length) return
const [link, word] = linkArr[index]
axios.get(link).then(res => {
console.info('获取 word:' + word)
const html = res.data
saveData(rootPath + detailPath + index + '.html', html)
currIndex += 1
getLink(linkArr, currIndex) // 递归调用
})
}
// step3()
const step4 = () => { // 获取详情页面中的单词和解释
let wordsArr = []
let wordsTxt = ''
for (let i = 0; i <= 17986; i++) {
console.info('i', i)
const html = loadData(rootPath + detailPath + i +'.html')
const html2 = dataFilterByStep4(html)
// console.log('html2', html2)
const [title, content] = txtRegByStep4(html2)
// wordsArr.push({ title, content})
wordsTxt += `## ${title} \n${content} \n \n`
}
saveData(rootPath + 'wordsArr.txt', wordsTxt)
}
const txtRegByStep4 = html2 => {
var regex = /<p[^>]*>(.*?)<\/p>/gs
var matches = [...html2.matchAll(regex)]
var firstTwo = matches.map(match => {
return match[1]
}).slice(0, 2)
return [firstTwo[0].replace('Definition of ', ''), firstTwo[1]]
}
const dataFilterByStep4 = data => {
const mainStartPosition = `<!--.headline_area-->`
const mainEndPosition = `<!–– SENTENCE 1 */ ––>`
return getTxt(mainStartPosition, mainEndPosition, data)
}
step4()
getHtmlUtils.js
const axios = require("axios")
const fs = require('fs')
exports.getPageListUrl = (url, startNum, endNum, folderPath) => {
// let i = startNum
for (let i = startNum; i <= endNum; i++) {
axios.get(url.replace('$num', i)).then(res => {
const html = res.data
const filePath = '.\\' + folderPath + i + '.html'
saveData(filePath, html)
})
}
}
exports.saveData = (filePath, data) => {
fs.writeFile(filePath, data, function () {
console.info(filePath + ' 写入完成!')
})
}
exports.loadData = (filePath) => {
return fs.readFileSync(filePath, 'utf8')
}
exports.getTxt = (mainStartPosition, mainEndPosition, content) => {
// const mainStartPosition = '<div class="lacontent">'
// const mainEndPosition = '<div class="contextdhall clearfix">'
let pos1 = 0, pos2 = 0
pos1 = content.indexOf(mainStartPosition) // + mainStartPosition.length
pos2 = content.indexOf(mainEndPosition) // + mainEndPosition.length
// console.info('pos1', pos1)
// console.info('pos2', pos2)
const listContent = content.substring(pos1, pos2)
// console.info('listContent', listContent)
return listContent
}
---------------------------------------------
生活的意义就是你自己知道你要做什么,明确目标。没有目标,后面都是瞎扯!
https://pengchenggang.gitee.io/navigator/
SMART原则:
目标必须是具体的(Specific)
目标必须是可以衡量的(Measurable)
目标必须是可以达到的(Attainable)
目标必须和其他目标具有相关性(Relevant)
目标必须具有明确的截止期限(Time-based)
生活的意义就是你自己知道你要做什么,明确目标。没有目标,后面都是瞎扯!
https://pengchenggang.gitee.io/navigator/
SMART原则:
目标必须是具体的(Specific)
目标必须是可以衡量的(Measurable)
目标必须是可以达到的(Attainable)
目标必须和其他目标具有相关性(Relevant)
目标必须具有明确的截止期限(Time-based)