C:\my_script\2anki\cnblog2anki\cnblog2anki.py
from base64 import encode
import os
import re
import shutil
import easygui
from subprocess import run
from easygui import *
from bs4 import BeautifulSoup
class User(EgStore):
def __init__(self, filename):
self.path = ''
EgStore.__init__(self, filename)
def get_file_path():
# 通过gui的方式获取文件夹路径
file_dir_flag = '1'
file_dir_flag = easygui.enterbox(msg='file(1) or dir(2):', strip=True)
# 创建存储对象
user = User("settings.txt")
# 取出以前保存的文件
user.restore()
file_or_path = None
if file_dir_flag == '2':
file_or_path = easygui.diropenbox(default=user.path)
user.path = file_or_path
user.store()
files = []
for i, j, k in os.walk(file_or_path):
for file in k:
filename = file_or_path + '\\' + file
if re.match("^[\s\S]*\.(html|mhtml|htm|txt)$", filename):
files.append(filename)
return files
else:
file_or_path = easygui.fileopenbox(multiple=True, default=user.path)
user.path = file_or_path[0]
user.store()
return file_or_path
def setDir(filepath):
'''
如果文件夹不存在就创建,如果文件存在就清空!
:param filepath:需要创建的文件夹路径
:return:
'''
if not os.path.exists(filepath):
os.mkdir(filepath)
else:
shutil.rmtree(filepath, ignore_errors=True)
os.mkdir(filepath)
def cnblog2anki(file):
res = []
with open(file, "r", encoding='utf-8') as f: # 打开文件
data = f.read() # 读取文件
soup = BeautifulSoup(data, 'html.parser')
tbody = soup.select("tbody")[0]
for tr_ele in tbody.select('tr'):
title = tr_ele.select('td:nth-child(1)>a')[0].text
url = 'http:' + tr_ele.select('td:nth-child(1)>a')[0].get('href')
res.append((title, url))
return res
def write2txt(msg):
with open(file+'.csv', "a", encoding='utf-8') as f: # 打开文件
f.writelines(msg)
if __name__ == '__main__':
res = get_file_path()
for file in res:
res = cnblog2anki(file)
for content in res:
print(content)
write2txt(f'{content[0]}\t<a href={content[1]}>{content[0]}</a>\n')
C:\my_script\2anki\weibo2anki\weibo2anki.py
import os
import re
import shutil
import easygui
from bs4 import BeautifulSoup
from easygui import *
class User(EgStore):
def __init__(self, filename):
self.path = ''
EgStore.__init__(self, filename)
def get_file_path():
# 通过gui的方式获取文件夹路径
file_dir_flag = '1'
file_dir_flag = easygui.enterbox(msg='file(1) or dir(2):', strip=True)
# 创建存储对象
user = User("settings.txt")
# 取出以前保存的文件
user.restore()
file_or_path = None
if file_dir_flag == '2':
file_or_path = easygui.diropenbox(default=user.path)
user.path = file_or_path
user.store()
files = []
for i, j, k in os.walk(file_or_path):
for file in k:
filename = file_or_path + '\\' + file
if re.match("^[\s\S]*\.(html|mhtml|htm|txt)$", filename):
files.append(filename)
return files
else:
file_or_path = easygui.fileopenbox(multiple=True, default=user.path)
user.path = file_or_path[0]
user.store()
return file_or_path
def setDir(filepath):
'''
如果文件夹不存在就创建,如果文件存在就清空!
:param filepath:需要创建的文件夹路径
:return:
'''
if not os.path.exists(filepath):
os.mkdir(filepath)
else:
shutil.rmtree(filepath, ignore_errors=True)
os.mkdir(filepath)
def cnblog2anki(file):
res = []
content_reg = r'(anki)|(vue)|(javascript)|(typescript)|(three(.|)js)|(js)|(ts)'
pattern = re.compile(content_reg)
with open(file, "r", encoding='utf-8') as f: # 打开文件
data = f.read() # 读取文件
soup = BeautifulSoup(data, 'html.parser')
cards = soup.select(".vue-recycle-scroller__item-view")
if cards is None:
return
for card in cards:
text_ele = card.select(".detail_wbtext_4CRf9")
if text_ele is None or len(text_ele) == 0:
continue
pub_text = text_ele[0].text
# 检测文本中是否有包含的关键字
content_res = re.search(pattern, pub_text)
if not content_res:
continue
# 删除文本中的关键字
pub_text = pub_text.replace('诺亚方卓的微博视频', '')
# 获取发布时间和发布的链接
pub_ele = card.select("a.head-info_time_6sFQg")
if pub_ele is None or len(pub_ele) == 0:
continue
pub_time = pub_ele[0].get('title')
pub_url = pub_ele[0].get('href')
res.append((pub_text,pub_time,pub_url))
return res
def write2txt(msg):
with open(file + '.csv', "a", encoding='utf-8') as f: # 打开文件
f.writelines(msg)
if __name__ == '__main__':
res = get_file_path()
for file in res:
res = cnblog2anki(file)
for content in res:
write2txt(f'{content[0]+content[1]}\t<a href={content[2]}>{content[0]+content[1]}</a>\n')
C:\my_script\bookmark2image\bookmark2_image_and_word.py
import os
import re
import shutil
import easygui
from easygui import *
from docx import Document
from docx.shared import Cm
class User(EgStore):
def __init__(self, filename):
self.path = ''
EgStore.__init__(self, filename)
def get_dir_path_gui():
# 创建存储对象
user = User("settings.txt")
# 取出以前保存的文件
user.restore()
file_path = easygui.fileopenbox(default=user.path)
user.path = file_path
user.store()
return file_path
def setDir(filepath):
'''
如果文件夹不存在就创建,如果文件存在就清空!
:param filepath:需要创建的文件夹路径
:return:
'''
if not os.path.exists(filepath):
os.mkdir(filepath)
else:
shutil.rmtree(filepath, ignore_errors=True)
os.mkdir(filepath)
def bookmark2image(file, file_dir, document):
with open(file, "r", encoding='utf-16') as f: # 打开文件
bookmarks = f.readlines() # 读取文件
for index, bookmark in enumerate(bookmarks):
# 第一行和最后一行过滤掉
if index == 0 or bookmark == '\n':
continue
path = bookmark.split('=', 1)[1].strip()
documnet.add_picture(path, width=Cm(28))
shutil.copy(path, file_dir)
def mkdir(path):
# 去除首位空格
path = path.strip()
# 去除尾部 \ 符号
path = path.rstrip("\\")
# 判断路径是否存在
# 存在 True
# 不存在 False
isExists = os.path.exists(path)
# 判断结果
if not isExists:
# 如果不存在则创建目录
# 创建目录操作函数
os.makedirs(path)
return True
else:
# 如果目录存在则不创建,并提示目录已存在
print('已存在')
if __name__ == '__main__':
documnet = Document()
documnet.sections[0].page_width = Cm(30)
documnet.sections[0].page_height = Cm(62)
documnet.sections[0].left_margin = Cm(1)
documnet.sections[0].right_margin = Cm(1)
documnet.sections[0].top_margin = Cm(1)
documnet.sections[0].bottom_margin = Cm(1)
file_path = get_dir_path_gui()
dir_name = os.path.splitext(file_path)[0]
file_name = os.path.split(file_path)[1].split('.')[0]
setDir(dir_name)
bookmark2image(file_path, dir_name, documnet)
documnet.save(dir_name+"//"+file_name+'.docx')
C:\my_script\code2markdown\code2md.py
import os
import re
import shutil
import easygui
from easygui import *
from pathlib import Path
class User(EgStore):
def __init__(self, filename):
self.path = ''
EgStore.__init__(self, filename)
def get_dir_path_gui():
# 创建存储对象
user = User("settings.txt")
# 取出以前保存的文件
user.restore()
file_path = easygui.diropenbox(default=user.path)
user.path = file_path
user.store()
return file_path
def get_root_dir(dir_path):
file_list = os.listdir(dir_path)
path_list = []
root_file_list = []
for file in file_list:
print(file)
# 过滤隐藏文件
if file.startswith('.'):
continue
# 过滤所有的文件
is_file = re.findall(r'\.[^.\\/:*?"<>|\r\n]+$', file)
if len(is_file):
# 反向过滤,后缀文件
res_abort = re.findall(re.compile(r'(\.json|d\.ts|config\.ts|config\.js)$'), file)
if res_abort:
continue
# 保留根文件夹的(\.py|vue|js|ts)$ 结尾的文件
res_save = re.findall(re.compile(r'(\.py|vue|js|ts|html)$'), file)
if len(res_save):
root_file_list.append(file)
continue
# 过滤node_modules
res_abort = re.findall(re.compile(r'(__pycache__|venv|build|dist|node_modules|public|LICENSE)'), file)
if len(res_abort):
continue
# 拼接成路径
file_path = os.path.join(dir_path, file)
path_list.append(file_path)
return path_list, root_file_list
def get_deep_dirs(path):
file_path = []
for root, dirs, files in os.walk(path):
# 过滤不符合的文件夹------------------------------------------------------------------------
del_dir_index = []
for i, dir in enumerate(dirs):
# 过滤隐藏文件
if dir.startswith('.'):
del_dir_index.append(i)
# 过滤掉所有不符合的文件夹
res_abort = re.findall(re.compile(r'(__pycache__|venv|build|dist|node_modules|public|LICENSE|assets)'), dir)
if len(res_abort):
del_dir_index.append(i)
# 去重,排序,过滤文件夹
del_dir_index = list(set(del_dir_index))
del_dir_index.sort()
for counter, index in enumerate(del_dir_index):
index = index - counter
dirs.pop(index)
# 过滤不符合的文件-----------------------------------------------------------------------------
del_file_index = []
for i, file in enumerate(files):
# 过滤隐藏文件
# (\.gitignore)|(\.prettierrc)
if file.startswith('.'):
del_file_index.append(i)
# 过滤掉所有不符合的文件
res_abort = re.findall(re.compile(
r'(\.json|\.d\.ts|\.lock|\.config\.ts|\.config\.js|\.png|\.woff2|\.ttf|\.woff|\.css|README\.md|\.toml)$'),
file)
if len(res_abort):
del_file_index.append(i)
# 去重排序,过滤文件
del_file_index = list(set(del_file_index))
del_file_index.sort()
for counter, index in enumerate(del_file_index):
index = index - counter
files.pop(index)
# 筛选所有符合后缀的文件------------------------------------------------------------------------
for file in files:
# 正向过滤含有(\.py|vue|js|ts)$ 结尾的文件
res_save = re.findall(re.compile(r'(\.py|vue|js|ts|html)$'), file)
if len(res_save):
file_path.append(os.path.join(root, file))
return file_path
def readcode_writemd(file_path, root_path):
suffix = re.findall(r'\.[^.\\/:*?"<>|\r\n]+$', file_path)
if len(suffix):
suffix = suffix[0][1:]
with open(file_path, "r", encoding='utf-8') as f: # 打开文件
head_line = f.readline()
rest_line = f.read()
write2md(head_line, head_line + rest_line, suffix, file_path, root_path)
def write2md(head, content, suffix, file_path, root_path):
with open(root_path + '/NOTE.md', "a", encoding='utf-8') as f: # 打开文件
f.write(f"# `{file_path}`\n\n")
# f.write(f"# {head}\n\n")
f.write(f"```{suffix}\n")
f.write(content+"\n")
f.write(f"```\n")
if __name__ == '__main__':
root_path = get_dir_path_gui()
md_file = os.path.join(root_path, 'NOTE.md')
# 清楚上一次的文件
if os.path.exists(md_file):
os.remove(md_file)
file_path_list = get_deep_dirs(root_path)
for file_path in file_path_list:
print(file_path)
readcode_writemd(file_path, root_path)
print('!!!complete!!!')
C:\my_script\html2word\add_leading.py
from docx import Document
import easygui
from easygui import *
class User(EgStore):
def __init__(self, filename):
self.path = ''
EgStore.__init__(self, filename)
def get_dir_path_gui():
# 创建存储对象
user = User("settings.txt")
# 取出以前保存的文件
user.restore()
file_path = easygui.fileopenbox(default=user.path)
user.path = file_path
user.store()
return file_path
def read_doc(file_path):
document = Document(file_path)
for p in document.paragraphs:
line = p.text
if (line.startswith("# ")):
print(line)
p.style = document.styles["Heading 1"]
document.save(file_path)
if __name__ == '__main__':
file_path = get_dir_path_gui()
read_doc(file_path)
C:\my_script\video2img\main.py
import os
import re
import cv2
import shutil
import easygui
from PIL import Image
import datetime
from subprocess import run
from easygui import *
class User(EgStore):
def __init__(self, filename):
self.path = ''
EgStore.__init__(self, filename)
def getVideoPath():
# 通过gui的方式获取文件夹路径
file_dir_flag = '1'
file_dir_flag = easygui.enterbox(msg='file(1) or dir(2):', strip=True)
# 创建存储对象
user = User("settings.txt")
# 取出以前保存的文件
user.restore()
file_or_path = None
if file_dir_flag == '2':
file_or_path = easygui.diropenbox(default=user.path)
user.path = file_or_path
user.store()
files = []
for i, j, k in os.walk(file_or_path):
for file in k:
filename = file_or_path + '\\' + file
if re.match("^[\s\S]*\.(flv|mp4|mkv|avi|wmv|mpeg|f4v|rmvb|rm|mov)$", filename):
files.append(filename)
return files
else:
file_or_path = easygui.fileopenbox(multiple=True,default=user.path)
user.path = file_or_path[0]
user.store()
return file_or_path
def setDir(filepath):
'''
如果文件夹不存在就创建,如果文件存在就清空!
:param filepath:需要创建的文件夹路径
:return:
'''
if not os.path.exists(filepath):
os.mkdir(filepath)
else:
shutil.rmtree(filepath, ignore_errors=True)
os.mkdir(filepath)
# 将视频拆分为图片
def splitFrames(video_full_path, time_frency=30):
start = datetime.datetime.now()
run_cmd(f'echo 开始时间:{start}', 0)
cap = cv2.VideoCapture(video_full_path)
num = 0
path, file = os.path.split(video_full_path)
img_dir = os.path.join(path, file.split('.')[0])
# 创建存放图片的文件夹
setDir(img_dir)
# 一般一秒30帧
# time_frency = 120 # 视频帧计数间隔频率
while True:
ret, data = cap.read()
if not ret:
break
img = Image.fromarray(data)
name = file + "_" + str(num) + ".png"
if num % time_frency == 0: # 每隔timeF帧进行存储操作
img.save(os.path.join(img_dir, name)) # 保存当前帧的图像
num = num + 1
cap.release()
end = datetime.datetime.now()
run_cmd(f'echo 视频拆分结束!:{end}', 0)
run_cmd(f'echo 用时:{(end - start).seconds}s', 0)
run_cmd(f'echo -----------------------------', 0)
def run_cmd(cmd_str='', echo_print=1):
"""
执行cmd命令,不显示执行过程中弹出的黑框
备注:subprocess.run()函数会将本来打印到cmd上的内容打印到python执行界面上,所以避免了出现cmd弹出框的问题
:param cmd_str: 执行的cmd命令
:return:
"""
if echo_print == 1:
print('\n执行cmd指令="{}"'.format(cmd_str))
run(cmd_str, shell=True)
if __name__ == '__main__':
# splitFrames('./test')
files = getVideoPath()
time_frency = easygui.enterbox(msg='Input time_frency(default=30):', strip=True)
default_flag = False
if (time_frency.strip() == ''):
default_flag = True
else:
time_frency = int(time_frency)
total = len(files)
for index, file in enumerate(files):
run_cmd(f'echo {index}/{total}--{file}', 0)
if default_flag:
splitFrames(file)
else:
splitFrames(file, time_frency)