cnblog2anki
| from base64 import encode |
| import os |
| import re |
| import shutil |
| import easygui |
| from subprocess import run |
| from easygui import * |
| from bs4 import BeautifulSoup |
| |
| |
| class User(EgStore): |
| def __init__(self, filename): |
| self.path = '' |
| EgStore.__init__(self, filename) |
| |
| |
| def get_file_path(): |
| |
| |
| file_dir_flag = '1' |
| file_dir_flag = easygui.enterbox(msg='file(1) or dir(2):', strip=True) |
| |
| |
| user = User("settings.txt") |
| |
| user.restore() |
| file_or_path = None |
| if file_dir_flag == '2': |
| |
| file_or_path = easygui.diropenbox(default=user.path) |
| user.path = file_or_path |
| user.store() |
| files = [] |
| for i, j, k in os.walk(file_or_path): |
| for file in k: |
| filename = file_or_path + '\\' + file |
| if re.match("^[\s\S]*\.(html|mhtml|htm|txt)$", filename): |
| files.append(filename) |
| return files |
| else: |
| file_or_path = easygui.fileopenbox(multiple=True, default=user.path) |
| user.path = file_or_path[0] |
| user.store() |
| return file_or_path |
| |
| |
| def setDir(filepath): |
| ''' |
| 如果文件夹不存在就创建,如果文件存在就清空! |
| :param filepath:需要创建的文件夹路径 |
| :return: |
| ''' |
| if not os.path.exists(filepath): |
| os.mkdir(filepath) |
| else: |
| shutil.rmtree(filepath, ignore_errors=True) |
| os.mkdir(filepath) |
| |
| |
| def cnblog2anki(file): |
| res = [] |
| with open(file, "r", encoding='utf-8') as f: |
| data = f.read() |
| soup = BeautifulSoup(data, 'html.parser') |
| tbody = soup.select("tbody")[0] |
| for tr_ele in tbody.select('tr'): |
| title = tr_ele.select('td:nth-child(1)>a')[0].text |
| url = 'http:' + tr_ele.select('td:nth-child(1)>a')[0].get('href') |
| res.append((title, url)) |
| return res |
| |
| |
| def write2txt(msg): |
| with open(file+'.csv', "a", encoding='utf-8') as f: |
| f.writelines(msg) |
| |
| |
| if __name__ == '__main__': |
| res = get_file_path() |
| for file in res: |
| res = cnblog2anki(file) |
| for content in res: |
| print(content) |
| write2txt(f'{content[0]}\t<a href={content[1]}>{content[0]}</a>\n') |
weibo2anki
| import os |
| import re |
| import shutil |
| |
| import easygui |
| from bs4 import BeautifulSoup |
| from easygui import * |
| |
| |
| class User(EgStore): |
| def __init__(self, filename): |
| self.path = '' |
| EgStore.__init__(self, filename) |
| |
| |
| def get_file_path(): |
| |
| file_dir_flag = '1' |
| file_dir_flag = easygui.enterbox(msg='file(1) or dir(2):', strip=True) |
| |
| |
| user = User("settings.txt") |
| |
| user.restore() |
| file_or_path = None |
| if file_dir_flag == '2': |
| |
| file_or_path = easygui.diropenbox(default=user.path) |
| user.path = file_or_path |
| user.store() |
| files = [] |
| for i, j, k in os.walk(file_or_path): |
| for file in k: |
| filename = file_or_path + '\\' + file |
| if re.match("^[\s\S]*\.(html|mhtml|htm|txt)$", filename): |
| files.append(filename) |
| return files |
| else: |
| file_or_path = easygui.fileopenbox(multiple=True, default=user.path) |
| user.path = file_or_path[0] |
| user.store() |
| return file_or_path |
| |
| |
| def setDir(filepath): |
| ''' |
| 如果文件夹不存在就创建,如果文件存在就清空! |
| :param filepath:需要创建的文件夹路径 |
| :return: |
| ''' |
| if not os.path.exists(filepath): |
| os.mkdir(filepath) |
| else: |
| shutil.rmtree(filepath, ignore_errors=True) |
| os.mkdir(filepath) |
| |
| |
| def cnblog2anki(file): |
| res = [] |
| content_reg = r'(anki)|(vue)|(javascript)|(typescript)|(three(.|)js)|(js)|(ts)' |
| pattern = re.compile(content_reg) |
| with open(file, "r", encoding='utf-8') as f: |
| data = f.read() |
| soup = BeautifulSoup(data, 'html.parser') |
| cards = soup.select(".vue-recycle-scroller__item-view") |
| if cards is None: |
| return |
| for card in cards: |
| text_ele = card.select(".detail_wbtext_4CRf9") |
| if text_ele is None or len(text_ele) == 0: |
| continue |
| pub_text = text_ele[0].text |
| |
| content_res = re.search(pattern, pub_text) |
| if not content_res: |
| continue |
| |
| pub_text = pub_text.replace('诺亚方卓的微博视频', '') |
| |
| pub_ele = card.select("a.head-info_time_6sFQg") |
| if pub_ele is None or len(pub_ele) == 0: |
| continue |
| pub_time = pub_ele[0].get('title') |
| pub_url = pub_ele[0].get('href') |
| res.append((pub_text,pub_time,pub_url)) |
| return res |
| |
| |
| def write2txt(msg): |
| with open(file + '.csv', "a", encoding='utf-8') as f: |
| f.writelines(msg) |
| |
| |
| if __name__ == '__main__': |
| res = get_file_path() |
| for file in res: |
| res = cnblog2anki(file) |
| for content in res: |
| write2txt(f'{content[0]+content[1]}\t<a href={content[2]}>{content[0]+content[1]}</a>\n') |
| |
代码地址
https://gitee.com/zhuo-xiaosong/cnblog_weibo_to_anki
https://download.csdn.net/download/zhuoss/86978246
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 阿里最新开源QwQ-32B,效果媲美deepseek-r1满血版,部署成本又又又降低了!
· 单线程的Redis速度为什么快?
· SQL Server 2025 AI相关能力初探
· AI编程工具终极对决:字节Trae VS Cursor,谁才是开发者新宠?
· 展开说说关于C#中ORM框架的用法!