cnblog2anki

 from base64 import encode
import os
import re
import shutil
import easygui
from subprocess import run
from easygui import *
from bs4 import BeautifulSoup
 
 
class User(EgStore):
    def __init__(self, filename):
        self.path = ''
        EgStore.__init__(self, filename)
 
 
def get_file_path():
 
    # 通过gui的方式获取文件夹路径
    file_dir_flag = '1'
    file_dir_flag = easygui.enterbox(msg='file(1) or dir(2):', strip=True)
 
    # 创建存储对象
    user = User("settings.txt")
    # 取出以前保存的文件
    user.restore()
    file_or_path = None
    if file_dir_flag == '2':
 
        file_or_path = easygui.diropenbox(default=user.path)
        user.path = file_or_path
        user.store()
        files = []
        for i, j, k in os.walk(file_or_path):
            for file in k:
                filename = file_or_path + '\\' + file
                if re.match("^[\s\S]*\.(html|mhtml|htm|txt)$", filename):
                    files.append(filename)
        return files
    else:
        file_or_path = easygui.fileopenbox(multiple=True, default=user.path)
        user.path = file_or_path[0]
        user.store()
        return file_or_path
 
 
def setDir(filepath):
    '''
    如果文件夹不存在就创建，如果文件存在就清空！
    :param filepath:需要创建的文件夹路径
    :return:
    '''
    if not os.path.exists(filepath):
        os.mkdir(filepath)
    else:
        shutil.rmtree(filepath, ignore_errors=True)
        os.mkdir(filepath)
 
 
def cnblog2anki(file):
    res = []
    with open(file, "r", encoding='utf-8') as f:  # 打开文件
        data = f.read()  # 读取文件
        soup = BeautifulSoup(data, 'html.parser')
        tbody = soup.select("tbody")[0]
        for tr_ele in tbody.select('tr'):
            title = tr_ele.select('td:nth-child(1)>a')[0].text
            url = 'http:' + tr_ele.select('td:nth-child(1)>a')[0].get('href')
            res.append((title, url))
    return res
 
 
def write2txt(msg):
    with open(file+'.csv', "a", encoding='utf-8') as f:  # 打开文件
        f.writelines(msg)
 
 
if __name__ == '__main__':
    res = get_file_path()
    for file in res:
        res = cnblog2anki(file)
        for content in res:
            print(content)
            write2txt(f'{content[0]}\t<a href={content[1]}>{content[0]}</a>\n')

weibo2anki

 import os
import re
import shutil
 
import easygui
from bs4 import BeautifulSoup
from easygui import *
 
 
class User(EgStore):
    def __init__(self, filename):
        self.path = ''
        EgStore.__init__(self, filename)
 
 
def get_file_path():
    # 通过gui的方式获取文件夹路径
    file_dir_flag = '1'
    file_dir_flag = easygui.enterbox(msg='file(1) or dir(2):', strip=True)
 
    # 创建存储对象
    user = User("settings.txt")
    # 取出以前保存的文件
    user.restore()
    file_or_path = None
    if file_dir_flag == '2':
 
        file_or_path = easygui.diropenbox(default=user.path)
        user.path = file_or_path
        user.store()
        files = []
        for i, j, k in os.walk(file_or_path):
            for file in k:
                filename = file_or_path + '\\' + file
                if re.match("^[\s\S]*\.(html|mhtml|htm|txt)$", filename):
                    files.append(filename)
        return files
    else:
        file_or_path = easygui.fileopenbox(multiple=True, default=user.path)
        user.path = file_or_path[0]
        user.store()
        return file_or_path
 
 
def setDir(filepath):
    '''
    如果文件夹不存在就创建，如果文件存在就清空！
    :param filepath:需要创建的文件夹路径
    :return:
    '''
    if not os.path.exists(filepath):
        os.mkdir(filepath)
    else:
        shutil.rmtree(filepath, ignore_errors=True)
        os.mkdir(filepath)
 
 
def cnblog2anki(file):
    res = []
    content_reg = r'(anki)|(vue)|(javascript)|(typescript)|(three(.|)js)|(js)|(ts)'
    pattern = re.compile(content_reg)
    with open(file, "r", encoding='utf-8') as f:  # 打开文件
        data = f.read()  # 读取文件
        soup = BeautifulSoup(data, 'html.parser')
        cards = soup.select(".vue-recycle-scroller__item-view")
        if cards is None:
            return
        for card in cards:
            text_ele = card.select(".detail_wbtext_4CRf9")
            if text_ele is None or len(text_ele) == 0:
                continue
            pub_text = text_ele[0].text
            # 检测文本中是否有包含的关键字
            content_res = re.search(pattern, pub_text)
            if not content_res:
                continue
            # 删除文本中的关键字
            pub_text = pub_text.replace('诺亚方卓的微博视频', '')
            # 获取发布时间和发布的链接
            pub_ele = card.select("a.head-info_time_6sFQg")
            if pub_ele is None or len(pub_ele) == 0:
                continue
            pub_time = pub_ele[0].get('title')
            pub_url = pub_ele[0].get('href')
            res.append((pub_text,pub_time,pub_url))
    return res
 
 
def write2txt(msg):
    with open(file + '.csv', "a", encoding='utf-8') as f:  # 打开文件
        f.writelines(msg)
 
 
if __name__ == '__main__':
    res = get_file_path()
    for file in res:
        res = cnblog2anki(file)
        for content in res:
            write2txt(f'{content[0]+content[1]}\t<a href={content[2]}>{content[0]+content[1]}</a>\n')

代码地址

https://gitee.com/zhuo-xiaosong/cnblog_weibo_to_anki

https://download.csdn.net/download/zhuoss/86978246

posted on 2022-11-12 21:50 超级无敌美少男战士阅读(40) 评论(0) 编辑收藏举报

刷新页面返回顶部

登录后才能查看或发表评论，立即登录或者逛逛博客园首页

相关博文：

· 博客园文章链接导入anki和微博链接导入anki

· chrome_history_and_docs_2_anki

· cnblogs 停更, 迁移到 GitHub SSR website All In One

· 【山东艾思软件】分享一套完整的Python采集公众号文件代码

· Fishlulu黑马头条微服务项目日志

阅读排行：
· 阿里最新开源QwQ-32B，效果媲美deepseek-r1满血版，部署成本又又又降低了！
· 单线程的Redis速度为什么快？
· SQL Server 2025 AI相关能力初探
· AI编程工具终极对决：字节Trae VS Cursor，谁才是开发者新宠？
· 展开说说关于C#中ORM框架的用法！

怪物奇妙物语

最新随笔

我的标签

积分与排名

随笔分类 (20)

阅读排行榜

评论排行榜

推荐排行榜

最新评论

cnblog2anki

weibo2anki

代码地址

	from base64 import encode
	import os
	import re
	import shutil
	import easygui
	from subprocess import run
	from easygui import *
	from bs4 import BeautifulSoup


	class User(EgStore):
	def __init__(self, filename):
	self.path = ''
	EgStore.__init__(self, filename)


	def get_file_path():

	# 通过gui的方式获取文件夹路径
	file_dir_flag = '1'
	file_dir_flag = easygui.enterbox(msg='file(1) or dir(2):', strip=True)

	# 创建存储对象
	user = User("settings.txt")
	# 取出以前保存的文件
	user.restore()
	file_or_path = None
	if file_dir_flag == '2':

	file_or_path = easygui.diropenbox(default=user.path)
	user.path = file_or_path
	user.store()
	files = []
	for i, j, k in os.walk(file_or_path):
	for file in k:
	filename = file_or_path + '\\' + file
	if re.match("^[\s\S]*\.(html\|mhtml\|htm\|txt)$", filename):
	files.append(filename)
	return files
	else:
	file_or_path = easygui.fileopenbox(multiple=True, default=user.path)
	user.path = file_or_path[0]
	user.store()
	return file_or_path


	def setDir(filepath):
	'''
	如果文件夹不存在就创建，如果文件存在就清空！
	:param filepath:需要创建的文件夹路径
	:return:
	'''
	if not os.path.exists(filepath):
	os.mkdir(filepath)
	else:
	shutil.rmtree(filepath, ignore_errors=True)
	os.mkdir(filepath)


	def cnblog2anki(file):
	res = []
	with open(file, "r", encoding='utf-8') as f: # 打开文件
	data = f.read() # 读取文件
	soup = BeautifulSoup(data, 'html.parser')
	tbody = soup.select("tbody")[0]
	for tr_ele in tbody.select('tr'):
	title = tr_ele.select('td:nth-child(1)>a')[0].text
	url = 'http:' + tr_ele.select('td:nth-child(1)>a')[0].get('href')
	res.append((title, url))
	return res


	def write2txt(msg):
	with open(file+'.csv', "a", encoding='utf-8') as f: # 打开文件
	f.writelines(msg)


	if __name__ == '__main__':
	res = get_file_path()
	for file in res:
	res = cnblog2anki(file)
	for content in res:
	print(content)
	write2txt(f'{content[0]}\t<a href={content[1]}>{content[0]}</a>\n')