D:\code_gitee\python_get_msedge_history\新建文件夹\cnblog2anki.py
from base64 import encode
import os
import re
import shutil
import easygui
import requests
from subprocess import run
from easygui import *
from bs4 import BeautifulSoup
def get_html_content (link ):
headers = {
'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.0.0 Safari/537.36'
}
res = requests.get(url=link, headers=headers).text
soup = BeautifulSoup(res, "html.parser" )
title = soup.select('.postTitle' )[0 ]
content = soup.select('.postText' )[0 ]
return title, content
class User (EgStore ):
def __init__ (self, filename ):
self.path = ''
EgStore.__init__(self, filename)
def get_file_path ():
file_dir_flag = '1'
file_dir_flag = easygui.enterbox(msg='file(1) or dir(2):' , strip=True )
user = User("settings.txt" )
user.restore()
file_or_path = None
if file_dir_flag == '2' :
file_or_path = easygui.diropenbox(default=user.path)
user.path = file_or_path
user.store()
files = []
for i, j, k in os.walk(file_or_path):
for file in k:
filename = file_or_path + '\\' + file
if re.match("^[\s\S]*\.(html|mhtml|htm|txt)$" , filename):
files.append(filename)
return files
else :
file_or_path = easygui.fileopenbox(multiple=True , default=user.path)
user.path = file_or_path[0 ]
user.store()
return file_or_path
def setDir (filepath ):
'''
如果文件夹不存在就创建,如果文件存在就清空!
:param filepath:需要创建的文件夹路径
:return:
'''
if not os.path.exists(filepath):
os.mkdir(filepath)
else :
shutil.rmtree(filepath, ignore_errors=True )
os.mkdir(filepath)
def cnblog2anki (file ):
res = []
with open (file, "r" , encoding='utf-8' ) as f:
data = f.read()
soup = BeautifulSoup(data, 'html.parser' )
tbody = soup.select("tbody" )[0 ]
for tr_ele in tbody.select('tr' ):
title = tr_ele.select('td:nth-child(1)>a' )[0 ].text
url = 'http:' + tr_ele.select('td:nth-child(1)>a' )[0 ].get('href' )
res.append((title, url))
return res
def write2txt (msg ):
with open (file+'.csv' , "a" , encoding='utf-8' ) as f:
f.writelines(msg)
if __name__ == '__main__' :
res = get_file_path()
for file in res:
res = cnblog2anki(file)
for content in res:
print (content)
write2txt(f'{content[0 ]} \t<a href={content[1 ]} >{content[0 ]} </a>\n' )
D:\code_gitee\python_get_msedge_history\新建文件夹\docs2anki.ts
import { readFileSync, appendFile } from "fs" ;
import { readdirSync, statSync } from "node:fs" ;
import { resolve, join } from "node:path" ;
const files = getAllFilePath (resolve ("./content" ));
for (let file of files) {
const content = readFileSync (file, {
encoding : "utf-8" ,
});
console .log (file);
write2file (content, file.split ("content\\" ).pop () ?? "" );
}
function write2file (content: string , fileName: string ) {
const contents = splitContent (content);
if (!contents) return ;
for (let content of contents) {
const contentEncoding = encodeContent (content);
const url = rebuildUrl (contentEncoding);
const contentDecoding = decodeURIComponent (contentEncoding);
const newContent = `<div>${contentDecoding} </div>\t<a href=${url} >${fileName} </a>\n` ;
appendFile ("anki.txt" , newContent, (err ) => {});
}
}
function getAllFilePath (filePath: string ) {
const filePaths : string [] = [];
fileDisplay (filePath);
return filePaths;
function fileDisplay (filePath: string ) {
const files = readdirSync (filePath);
files.forEach ((fileName: string ) => {
const filedir = join (filePath, fileName);
const stats = statSync (filedir);
if (stats.isFile ()) {
filePaths.push (filedir);
}
if (stats.isDirectory ()) {
fileDisplay (filedir);
}
});
}
}
function encodeContent (content: string ) {
const newContent = encodeURIComponent (content)
.replace (/%0D%0A/g , "%5F%5F" )
.replace (/%0A/g , "%5F" )
.replace (/(%60){3}.*(%60){3}/g , "" );
return newContent;
}
function rebuildUrl (content: string ) {
return `https://fanyi.sogou.com/text?transfrom=auto&transto=zh-CHS&model=general&keyword=${content} ` ;
}
function splitContent (content: string ) {
const res = content.matchAll (/[\.|!]\s*[A-Z]{1}/g );
const contents : string [] = [];
let next = res.next ();
if (!next.value ) return ;
let startIndex = 0 ;
let endIndex = next.value ["index" ] + 1 ;
while (!next.done ) {
contents.push (content.slice (startIndex, endIndex));
startIndex = endIndex;
next = res.next ();
if (next.value ) {
endIndex = next.value ["index" ] + 1 ;
} else {
contents.push (content.slice (endIndex));
}
}
return contents;
}
D:\code_gitee\python_get_msedge_history\新建文件夹\get_history.py
import os
import sqlite3
class History :
def __init__ (self, chromePath ):
self.chromePath = chromePath
def connect (self ):
self.conn = sqlite3.connect(os.path.join(self.chromePath, "History" ))
self.cousor = self.conn.cursor()
def close (self ):
self.conn.close()
def get_history (self ):
cursor = self.conn.execute(
"SELECT id,url,title from urls" )
rows = []
for _id , url, title, visit_count in cursor:
row = {}
row['id' ] = _id
row['url' ] = url
row['title' ] = title
rows.append(row)
return rows
if __name__ == "__main__" :
path = ".\\"
history = History(path)
history.connect()
rows = history.get_history()
f = open ('.//history.txt' , 'a' , encoding='utf-8' )
for row in rows:
f.write(row['title' ]+"\n" )
history.close()
D:\code_gitee\python_get_msedge_history\新建文件夹\get_history.ts
import { verbose } from "sqlite3" ;
import { appendFileSync, write } from "fs" ;
type IRow = {
url : string ;
title : string ;
};
const sqlite3 = verbose ();
const db = new sqlite3.Database ("History.sqlite" );
db.each (
"SELECT url,title FROM urls WHERE url LIKE '%youdao.com%' " ,
(err: Error | null , row: IRow ) => {
if (err) return ;
handleRow (row, "youdao" );
}
);
db.each (
"SELECT url,title FROM urls WHERE url LIKE '%fanyi.baidu.com%' " ,
(err: Error | null , row: IRow ) => {
if (err) return ;
handleRow (row, "baidu" );
}
);
db.close ();
function handleRow (row: IRow, cidian: string ) {
const rawContent = getContent (row.url , cidian);
if (!rawContent) return ;
const newUrl = rebuildUrl (rawContent);
const content = decodeContent (rawContent);
write2file (content + `\t<a href=${newUrl} >${content} </a>\n` );
}
function decodeContent (url: string ) {
return decodeURIComponent (url);
}
function getContent (url: string , cidian: string ) {
let startPos = 0 ;
switch (cidian) {
case "youdao" :
startPos = url.search (RegExp ("word=.+lang=en" , "g" ));
if (startPos === -1 ) return ;
return url.slice (startPos + 5 , -8 );
case "baidu" :
startPos = url.search (RegExp ("#en/zh/.+" , "g" ));
if (startPos === -1 ) return ;
return url.slice (startPos + 7 );
default :
break ;
}
}
function write2file (content: string ) {
appendFileSync ("history.txt" , content, "utf8" );
}
function rebuildUrl (content: string ) {
return `https://fanyi.sogou.com/text?transfrom=auto&transto=zh-CHS&model=general&keyword=${content} ` ;
}
D:\code_gitee\python_get_msedge_history\新建文件夹\readFile.ts
import { readdirSync, readdir, stat, statSync } from "node:fs" ;
import { resolve, join } from "node:path" ;
const filePath = resolve ("./content" );
const files = getAllFilePath (filePath);
for (let file of files) {
console .log (file.split ("content\\" ).pop ());
}
function getAllFilePath (filePath: string ) {
const filePaths : string [] = [];
fileDisplay (filePath);
return filePaths;
function fileDisplay (filePath: string ) {
const files = readdirSync (filePath);
files.forEach ((fileName: string ) => {
const filedir = join (filePath, fileName);
const stats = statSync (filedir);
if (stats.isFile ()) {
filePaths.push (filedir);
}
if (stats.isDirectory ()) {
fileDisplay (filedir);
}
});
}
}
D:\code_gitee\python_get_msedge_history\新建文件夹\replaceCode.ts
let content = `%0A%0AIf%20you%20want%20the%20node%20application%20to%20close%20after%20the%20script%20finishes%20(e.g.%2C%20for%20a%20script%20running%20CRON%20jobs)%2C%20add%20%60await%20app.close()%60%20to%20the%20end%20of%20your%20%60bootstrap%60%20function%3A%0A%0A%60%60%60typescript%0A%40%40filename()%0Aasync%20function%20bootstrap()%20%7B%0A%20%20const%20app%20%3D%20await%20NestFactory.createApplicationContext(AppModule)%3B%0A%20%20%2F%2F%20application%20logic...%0A%20%20await%20app.close()%3B%0A%7D%0Abootstrap()%3B%0A%60%60%60%0A%0A%23%23%23%23%20Example%0A%0AA%20working%20example%20is%20available%20%5Bhere%5D(https%3A%2F%2Fgithub.com%2Fnestjs%2Fnest%2Ftree%2Fmaster%2Fsample%2F18-context).%0A` ;
let newCon = content.replace (/(%60){3}.*(%60){3}/g , "" );
console .log (newCon);
console .log (decodeURIComponent (newCon));
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 阿里最新开源QwQ-32B,效果媲美deepseek-r1满血版,部署成本又又又降低了!
· 单线程的Redis速度为什么快?
· SQL Server 2025 AI相关能力初探
· AI编程工具终极对决:字节Trae VS Cursor,谁才是开发者新宠?
· 展开说说关于C#中ORM框架的用法!