D:\code_gitee\python_cnblog2anki_and_weibo2anki\main.py
| import os |
| import re |
| import shutil |
| |
| import easygui |
| from bs4 import BeautifulSoup |
| from easygui import * |
| |
| |
| class User(EgStore): |
| def __init__(self, filename): |
| self.path = '' |
| EgStore.__init__(self, filename) |
| |
| |
| def get_file_path(): |
| |
| file_dir_flag = '1' |
| file_dir_flag = easygui.enterbox(msg='file(1) or dir(2):', strip=True) |
| |
| |
| user = User("settings.txt") |
| |
| user.restore() |
| file_or_path = None |
| if file_dir_flag == '2': |
| |
| file_or_path = easygui.diropenbox(default=user.path) |
| user.path = file_or_path |
| user.store() |
| files = [] |
| for i, j, k in os.walk(file_or_path): |
| for file in k: |
| filename = file_or_path + '\\' + file |
| if re.match("^[\s\S]*\.(html|mhtml|htm|txt)$", filename): |
| files.append(filename) |
| return files |
| else: |
| file_or_path = easygui.fileopenbox(multiple=True, default=user.path) |
| user.path = file_or_path[0] |
| user.store() |
| return file_or_path |
| |
| |
| def setDir(filepath): |
| ''' |
| 如果文件夹不存在就创建,如果文件存在就清空! |
| :param filepath:需要创建的文件夹路径 |
| :return: |
| ''' |
| if not os.path.exists(filepath): |
| os.mkdir(filepath) |
| else: |
| shutil.rmtree(filepath, ignore_errors=True) |
| os.mkdir(filepath) |
| |
| |
| def cnblog2anki(file): |
| res = [] |
| content_reg = r'(anki)|(vue)|(javascript)|(typescript)|(three(.|)js)|(js)|(ts)' |
| pattern = re.compile(content_reg) |
| with open(file, "r", encoding='utf-8') as f: |
| data = f.read() |
| soup = BeautifulSoup(data, 'html.parser') |
| cards = soup.select(".vue-recycle-scroller__item-view") |
| if cards is None: |
| return |
| for card in cards: |
| text_ele = card.select(".detail_wbtext_4CRf9") |
| if text_ele is None or len(text_ele) == 0: |
| continue |
| pub_text = text_ele[0].text |
| |
| content_res = re.search(pattern, pub_text) |
| if not content_res: |
| continue |
| |
| pub_text = pub_text.replace('诺亚方卓的微博视频', '') |
| |
| pub_ele = card.select("a.head-info_time_6sFQg") |
| if pub_ele is None or len(pub_ele) == 0: |
| continue |
| pub_time = pub_ele[0].get('title') |
| pub_url = pub_ele[0].get('href') |
| res.append((pub_text,pub_time,pub_url)) |
| return res |
| |
| |
| def write2txt(msg): |
| with open(file + '.csv', "a", encoding='utf-8') as f: |
| f.writelines(msg) |
| |
| |
| if __name__ == '__main__': |
| res = get_file_path() |
| for file in res: |
| res = cnblog2anki(file) |
| for content in res: |
| write2txt(f'{content[0]+content[1]}\t<a href={content[2]}>{content[0]+content[1]}</a>\n') |
D:\code_gitee\python_cnblog2anki_and_weibo2anki\main_cnblog2anki.py
| from base64 import encode |
| import os |
| import re |
| import shutil |
| import easygui |
| from subprocess import run |
| from easygui import * |
| from bs4 import BeautifulSoup |
| |
| |
| class User(EgStore): |
| def __init__(self, filename): |
| self.path = '' |
| EgStore.__init__(self, filename) |
| |
| |
| def get_file_path(): |
| |
| |
| file_dir_flag = '1' |
| file_dir_flag = easygui.enterbox(msg='file(1) or dir(2):', strip=True) |
| |
| |
| user = User("settings.txt") |
| |
| user.restore() |
| file_or_path = None |
| if file_dir_flag == '2': |
| |
| file_or_path = easygui.diropenbox(default=user.path) |
| user.path = file_or_path |
| user.store() |
| files = [] |
| for i, j, k in os.walk(file_or_path): |
| for file in k: |
| filename = file_or_path + '\\' + file |
| if re.match("^[\s\S]*\.(html|mhtml|htm|txt)$", filename): |
| files.append(filename) |
| return files |
| else: |
| file_or_path = easygui.fileopenbox(multiple=True, default=user.path) |
| user.path = file_or_path[0] |
| user.store() |
| return file_or_path |
| |
| |
| def setDir(filepath): |
| ''' |
| 如果文件夹不存在就创建,如果文件存在就清空! |
| :param filepath:需要创建的文件夹路径 |
| :return: |
| ''' |
| if not os.path.exists(filepath): |
| os.mkdir(filepath) |
| else: |
| shutil.rmtree(filepath, ignore_errors=True) |
| os.mkdir(filepath) |
| |
| |
| def cnblog2anki(file): |
| res = [] |
| with open(file, "r", encoding='utf-8') as f: |
| data = f.read() |
| soup = BeautifulSoup(data, 'html.parser') |
| tbody = soup.select("tbody")[0] |
| for tr_ele in tbody.select('tr'): |
| title = tr_ele.select('td:nth-child(1)>a')[0].text |
| url = 'http:' + tr_ele.select('td:nth-child(1)>a')[0].get('href') |
| res.append((title, url)) |
| return res |
| |
| |
| def write2txt(msg): |
| with open(file+'.csv', "a", encoding='utf-8') as f: |
| f.writelines(msg) |
| |
| |
| if __name__ == '__main__': |
| res = get_file_path() |
| for file in res: |
| res = cnblog2anki(file) |
| for content in res: |
| print(content) |
| write2txt(f'{content[0]}\t<a href={content[1]}>{content[0]}</a>\n') |
D:\code_gitee\python_cnblog2anki_and_weibo2anki\main_weibo2anki.py
| import os |
| import re |
| import shutil |
| |
| import easygui |
| from bs4 import BeautifulSoup |
| from easygui import * |
| |
| |
| class User(EgStore): |
| def __init__(self, filename): |
| self.path = '' |
| EgStore.__init__(self, filename) |
| |
| |
| def get_file_path(): |
| |
| file_dir_flag = '1' |
| file_dir_flag = easygui.enterbox(msg='file(1) or dir(2):', strip=True) |
| |
| |
| user = User("settings.txt") |
| |
| user.restore() |
| file_or_path = None |
| if file_dir_flag == '2': |
| |
| file_or_path = easygui.diropenbox(default=user.path) |
| user.path = file_or_path |
| user.store() |
| files = [] |
| for i, j, k in os.walk(file_or_path): |
| for file in k: |
| filename = file_or_path + '\\' + file |
| if re.match("^[\s\S]*\.(html|mhtml|htm|txt)$", filename): |
| files.append(filename) |
| return files |
| else: |
| file_or_path = easygui.fileopenbox(multiple=True, default=user.path) |
| user.path = file_or_path[0] |
| user.store() |
| return file_or_path |
| |
| |
| def setDir(filepath): |
| ''' |
| 如果文件夹不存在就创建,如果文件存在就清空! |
| :param filepath:需要创建的文件夹路径 |
| :return: |
| ''' |
| if not os.path.exists(filepath): |
| os.mkdir(filepath) |
| else: |
| shutil.rmtree(filepath, ignore_errors=True) |
| os.mkdir(filepath) |
| |
| |
| def cnblog2anki(file): |
| res = [] |
| content_reg = r'(anki)|(vue)|(javascript)|(typescript)|(three(.|)js)|(js)|(ts)' |
| pattern = re.compile(content_reg) |
| with open(file, "r", encoding='utf-8') as f: |
| data = f.read() |
| soup = BeautifulSoup(data, 'html.parser') |
| cards = soup.select(".vue-recycle-scroller__item-view") |
| if cards is None: |
| return |
| for card in cards: |
| text_ele = card.select(".detail_wbtext_4CRf9") |
| if text_ele is None or len(text_ele) == 0: |
| continue |
| pub_text = text_ele[0].text |
| |
| content_res = re.search(pattern, pub_text) |
| if not content_res: |
| continue |
| |
| pub_text = pub_text.replace('诺亚方卓的微博视频', '') |
| |
| pub_ele = card.select("a.head-info_time_6sFQg") |
| if pub_ele is None or len(pub_ele) == 0: |
| continue |
| pub_time = pub_ele[0].get('title') |
| pub_url = pub_ele[0].get('href') |
| res.append((pub_text,pub_time,pub_url)) |
| return res |
| |
| |
| def write2txt(msg): |
| with open(file + '.csv', "a", encoding='utf-8') as f: |
| f.writelines(msg) |
| |
| |
| if __name__ == '__main__': |
| res = get_file_path() |
| for file in res: |
| res = cnblog2anki(file) |
| for content in res: |
| write2txt(f'{content[0]+content[1]}\t<a href={content[2]}>{content[0]+content[1]}</a>\n') |
D:\code_gitee\python_cnblog2anki_and_weibo2anki\learn\le01.py
| import datetime |
| |
| |
| def run_cmd( cmd_str='', echo_print=1): |
| """ |
| 执行cmd命令,不显示执行过程中弹出的黑框 |
| 备注:subprocess.run()函数会将本来打印到cmd上的内容打印到python执行界面上,所以避免了出现cmd弹出框的问题 |
| :param cmd_str: 执行的cmd命令 |
| :return: |
| """ |
| from subprocess import run |
| if echo_print == 1: |
| print('\n执行cmd指令="{}"'.format(cmd_str)) |
| run(cmd_str, shell=True) |
| |
| if __name__ == '__main__': |
| list = ['a','b','c'] |
| for i,j in enumerate(list): |
| run_cmd(f'echo 开始时间:{i}--{j}',0) |
D:\code_gitee\python_cnblog2anki_and_weibo2anki\learn\le02.py
| import os |
| |
| if __name__ == '__main__': |
| list = ['a','b','c'] |
| for i,j in enumerate(list): |
| |
| os.popen(f'echo 开始时间:{i}--{j}') |
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 阿里最新开源QwQ-32B,效果媲美deepseek-r1满血版,部署成本又又又降低了!
· 单线程的Redis速度为什么快?
· SQL Server 2025 AI相关能力初探
· AI编程工具终极对决:字节Trae VS Cursor,谁才是开发者新宠?
· 展开说说关于C#中ORM框架的用法!