日常生活的交流与学习

首页 新随笔 联系 管理

C:\my_script\2anki\cnblog2anki\cnblog2anki.py

from base64 import encode
import os
import re
import shutil
import easygui
from subprocess import run
from easygui import *
from bs4 import BeautifulSoup


class User(EgStore):
    def __init__(self, filename):
        self.path = ''
        EgStore.__init__(self, filename)


def get_file_path():

    # 通过gui的方式获取文件夹路径
    file_dir_flag = '1'
    file_dir_flag = easygui.enterbox(msg='file(1) or dir(2):', strip=True)

    # 创建存储对象
    user = User("settings.txt")
    # 取出以前保存的文件
    user.restore()
    file_or_path = None
    if file_dir_flag == '2':

        file_or_path = easygui.diropenbox(default=user.path)
        user.path = file_or_path
        user.store()
        files = []
        for i, j, k in os.walk(file_or_path):
            for file in k:
                filename = file_or_path + '\\' + file
                if re.match("^[\s\S]*\.(html|mhtml|htm|txt)$", filename):
                    files.append(filename)
        return files
    else:
        file_or_path = easygui.fileopenbox(multiple=True, default=user.path)
        user.path = file_or_path[0]
        user.store()
        return file_or_path


def setDir(filepath):
    '''
    如果文件夹不存在就创建,如果文件存在就清空!
    :param filepath:需要创建的文件夹路径
    :return:
    '''
    if not os.path.exists(filepath):
        os.mkdir(filepath)
    else:
        shutil.rmtree(filepath, ignore_errors=True)
        os.mkdir(filepath)


def cnblog2anki(file):
    res = []
    with open(file, "r", encoding='utf-8') as f:  # 打开文件
        data = f.read()  # 读取文件
        soup = BeautifulSoup(data, 'html.parser')
        tbody = soup.select("tbody")[0]
        for tr_ele in tbody.select('tr'):
            title = tr_ele.select('td:nth-child(1)>a')[0].text
            url = 'http:' + tr_ele.select('td:nth-child(1)>a')[0].get('href')
            res.append((title, url))
    return res


def write2txt(msg):
    with open(file+'.csv', "a", encoding='utf-8') as f:  # 打开文件
        f.writelines(msg)


if __name__ == '__main__':
    res = get_file_path()
    for file in res:
        res = cnblog2anki(file)
        for content in res:
            print(content)
            write2txt(f'{content[0]}\t<a href={content[1]}>{content[0]}</a>\n')

C:\my_script\2anki\weibo2anki\weibo2anki.py

import os
import re
import shutil

import easygui
from bs4 import BeautifulSoup
from easygui import *


class User(EgStore):
    def __init__(self, filename):
        self.path = ''
        EgStore.__init__(self, filename)


def get_file_path():
    # 通过gui的方式获取文件夹路径
    file_dir_flag = '1'
    file_dir_flag = easygui.enterbox(msg='file(1) or dir(2):', strip=True)

    # 创建存储对象
    user = User("settings.txt")
    # 取出以前保存的文件
    user.restore()
    file_or_path = None
    if file_dir_flag == '2':

        file_or_path = easygui.diropenbox(default=user.path)
        user.path = file_or_path
        user.store()
        files = []
        for i, j, k in os.walk(file_or_path):
            for file in k:
                filename = file_or_path + '\\' + file
                if re.match("^[\s\S]*\.(html|mhtml|htm|txt)$", filename):
                    files.append(filename)
        return files
    else:
        file_or_path = easygui.fileopenbox(multiple=True, default=user.path)
        user.path = file_or_path[0]
        user.store()
        return file_or_path


def setDir(filepath):
    '''
    如果文件夹不存在就创建,如果文件存在就清空!
    :param filepath:需要创建的文件夹路径
    :return:
    '''
    if not os.path.exists(filepath):
        os.mkdir(filepath)
    else:
        shutil.rmtree(filepath, ignore_errors=True)
        os.mkdir(filepath)


def cnblog2anki(file):
    res = []
    content_reg = r'(anki)|(vue)|(javascript)|(typescript)|(three(.|)js)|(js)|(ts)'
    pattern = re.compile(content_reg)
    with open(file, "r", encoding='utf-8') as f:  # 打开文件
        data = f.read()  # 读取文件
        soup = BeautifulSoup(data, 'html.parser')
        cards = soup.select(".vue-recycle-scroller__item-view")
        if cards is None:
            return
        for card in cards:
            text_ele = card.select(".detail_wbtext_4CRf9")
            if text_ele is None or len(text_ele) == 0:
                continue
            pub_text = text_ele[0].text
            # 检测文本中是否有包含的关键字
            content_res = re.search(pattern, pub_text)
            if not content_res:
                continue
            # 删除文本中的关键字
            pub_text = pub_text.replace('诺亚方卓的微博视频', '')
            # 获取发布时间和发布的链接
            pub_ele = card.select("a.head-info_time_6sFQg")
            if pub_ele is None or len(pub_ele) == 0:
                continue
            pub_time = pub_ele[0].get('title')
            pub_url = pub_ele[0].get('href')
            res.append((pub_text,pub_time,pub_url))
    return res


def write2txt(msg):
    with open(file + '.csv', "a", encoding='utf-8') as f:  # 打开文件
        f.writelines(msg)


if __name__ == '__main__':
    res = get_file_path()
    for file in res:
        res = cnblog2anki(file)
        for content in res:
            write2txt(f'{content[0]+content[1]}\t<a href={content[2]}>{content[0]+content[1]}</a>\n')

C:\my_script\bookmark2image\bookmark2_image_and_word.py

import os
import re
import shutil

import easygui
from easygui import *
from docx import Document
from docx.shared import Cm


class User(EgStore):
    def __init__(self, filename):
        self.path = ''
        EgStore.__init__(self, filename)


def get_dir_path_gui():
    # 创建存储对象
    user = User("settings.txt")
    # 取出以前保存的文件
    user.restore()
    file_path = easygui.fileopenbox(default=user.path)
    user.path = file_path
    user.store()
    return file_path


def setDir(filepath):
    '''
    如果文件夹不存在就创建,如果文件存在就清空!
    :param filepath:需要创建的文件夹路径
    :return:
    '''
    if not os.path.exists(filepath):
        os.mkdir(filepath)
    else:
        shutil.rmtree(filepath, ignore_errors=True)
        os.mkdir(filepath)


def bookmark2image(file, file_dir, document):
    with open(file, "r", encoding='utf-16') as f:  # 打开文件
        bookmarks = f.readlines()  # 读取文件
        for index, bookmark in enumerate(bookmarks):
            # 第一行和最后一行过滤掉
            if index == 0 or bookmark == '\n':
                continue
            path = bookmark.split('=', 1)[1].strip()
            documnet.add_picture(path, width=Cm(28))
            shutil.copy(path, file_dir)


def mkdir(path):
    # 去除首位空格
    path = path.strip()
    # 去除尾部 \ 符号
    path = path.rstrip("\\")

    # 判断路径是否存在
    # 存在     True
    # 不存在   False
    isExists = os.path.exists(path)

    # 判断结果
    if not isExists:
        # 如果不存在则创建目录
        # 创建目录操作函数
        os.makedirs(path)
        return True
    else:
        # 如果目录存在则不创建,并提示目录已存在
        print('已存在')


if __name__ == '__main__':
    documnet = Document()
    documnet.sections[0].page_width = Cm(30)
    documnet.sections[0].page_height = Cm(62)

    documnet.sections[0].left_margin = Cm(1)
    documnet.sections[0].right_margin = Cm(1)
    documnet.sections[0].top_margin = Cm(1)
    documnet.sections[0].bottom_margin = Cm(1)
    
    
    file_path = get_dir_path_gui()
    dir_name = os.path.splitext(file_path)[0]
    file_name = os.path.split(file_path)[1].split('.')[0]
    setDir(dir_name)
    bookmark2image(file_path, dir_name, documnet)
    documnet.save(dir_name+"//"+file_name+'.docx')

C:\my_script\code2markdown\code2md.py

import os
import re
import shutil

import easygui
from easygui import *

from pathlib import Path


class User(EgStore):
    def __init__(self, filename):
        self.path = ''
        EgStore.__init__(self, filename)


def get_dir_path_gui():
    # 创建存储对象
    user = User("settings.txt")
    # 取出以前保存的文件
    user.restore()
    file_path = easygui.diropenbox(default=user.path)
    user.path = file_path
    user.store()
    return file_path


def get_root_dir(dir_path):
    file_list = os.listdir(dir_path)
    path_list = []
    root_file_list = []
    for file in file_list:
        print(file)
        # 过滤隐藏文件
        if file.startswith('.'):
            continue
        # 过滤所有的文件
        is_file = re.findall(r'\.[^.\\/:*?"<>|\r\n]+$', file)
        if len(is_file):
            # 反向过滤,后缀文件
            res_abort = re.findall(re.compile(r'(\.json|d\.ts|config\.ts|config\.js)$'), file)
            if res_abort:
                continue
            # 保留根文件夹的(\.py|vue|js|ts)$ 结尾的文件
            res_save = re.findall(re.compile(r'(\.py|vue|js|ts|html)$'), file)
            if len(res_save):
                root_file_list.append(file)
            continue
        # 过滤node_modules
        res_abort = re.findall(re.compile(r'(__pycache__|venv|build|dist|node_modules|public|LICENSE)'), file)
        if len(res_abort):
            continue
        # 拼接成路径
        file_path = os.path.join(dir_path, file)
        path_list.append(file_path)
    return path_list, root_file_list



def get_deep_dirs(path):
    file_path = []
    for root, dirs, files in os.walk(path):

        # 过滤不符合的文件夹------------------------------------------------------------------------
        del_dir_index = []
        for i, dir in enumerate(dirs):
            # 过滤隐藏文件
            if dir.startswith('.'):
                del_dir_index.append(i)
            # 过滤掉所有不符合的文件夹
            res_abort = re.findall(re.compile(r'(__pycache__|venv|build|dist|node_modules|public|LICENSE|assets)'), dir)
            if len(res_abort):
                del_dir_index.append(i)

        # 去重,排序,过滤文件夹
        del_dir_index = list(set(del_dir_index))
        del_dir_index.sort()
        for counter, index in enumerate(del_dir_index):
            index = index - counter
            dirs.pop(index)

        # 过滤不符合的文件-----------------------------------------------------------------------------
        del_file_index = []
        for i, file in enumerate(files):
            # 过滤隐藏文件
            # (\.gitignore)|(\.prettierrc)
            if file.startswith('.'):
                del_file_index.append(i)
            # 过滤掉所有不符合的文件
            res_abort = re.findall(re.compile(
                r'(\.json|\.d\.ts|\.lock|\.config\.ts|\.config\.js|\.png|\.woff2|\.ttf|\.woff|\.css|README\.md|\.toml)$'),
                                   file)
            if len(res_abort):
                del_file_index.append(i)

        # 去重排序,过滤文件
        del_file_index = list(set(del_file_index))
        del_file_index.sort()
        for counter, index in enumerate(del_file_index):
            index = index - counter
            files.pop(index)

        # 筛选所有符合后缀的文件------------------------------------------------------------------------
        for file in files:
            # 正向过滤含有(\.py|vue|js|ts)$ 结尾的文件
            res_save = re.findall(re.compile(r'(\.py|vue|js|ts|html)$'), file)
            if len(res_save):
                file_path.append(os.path.join(root, file))
    return file_path




def readcode_writemd(file_path, root_path):
    suffix = re.findall(r'\.[^.\\/:*?"<>|\r\n]+$', file_path)
    if len(suffix):
        suffix = suffix[0][1:]
    with open(file_path, "r", encoding='utf-8') as f:  # 打开文件
        head_line = f.readline()
        rest_line = f.read()
        write2md(head_line, head_line + rest_line, suffix, file_path, root_path)




def write2md(head, content, suffix, file_path, root_path):
    with open(root_path + '/NOTE.md', "a", encoding='utf-8') as f:  # 打开文件
        f.write(f"# `{file_path}`\n\n")
        # f.write(f"# {head}\n\n")
        f.write(f"```{suffix}\n")
        f.write(content+"\n")
        f.write(f"```\n")


 


if __name__ == '__main__':
    root_path = get_dir_path_gui()
    md_file = os.path.join(root_path, 'NOTE.md')

    # 清楚上一次的文件
    if os.path.exists(md_file):
        os.remove(md_file)

    file_path_list = get_deep_dirs(root_path)
    for file_path in file_path_list:
        print(file_path)
        readcode_writemd(file_path, root_path)
    print('!!!complete!!!')

C:\my_script\html2word\add_leading.py

from docx import Document
import easygui
from easygui import *

class User(EgStore):
    def __init__(self, filename):
        self.path = ''
        EgStore.__init__(self, filename)


def get_dir_path_gui():
    # 创建存储对象
    user = User("settings.txt")
    # 取出以前保存的文件
    user.restore()
    file_path = easygui.fileopenbox(default=user.path)
    user.path = file_path
    user.store()
    return file_path



def read_doc(file_path):
    document = Document(file_path)
    for p in document.paragraphs:
        line = p.text
        if (line.startswith("# ")):
          print(line)
          p.style = document.styles["Heading 1"]
    document.save(file_path)


if __name__ == '__main__':
    file_path = get_dir_path_gui()
    read_doc(file_path)


C:\my_script\video2img\main.py

import os
import re

import cv2

import shutil

import easygui
from PIL import Image
import datetime
from subprocess import run

from easygui import *


class User(EgStore):
    def __init__(self, filename):
        self.path = ''
        EgStore.__init__(self, filename)








def getVideoPath():

    # 通过gui的方式获取文件夹路径
    file_dir_flag = '1'
    file_dir_flag = easygui.enterbox(msg='file(1) or dir(2):', strip=True)

    # 创建存储对象
    user = User("settings.txt")
    # 取出以前保存的文件
    user.restore()
    file_or_path = None
    if file_dir_flag == '2':

        file_or_path = easygui.diropenbox(default=user.path)
        user.path = file_or_path
        user.store()
        files = []
        for i, j, k in os.walk(file_or_path):
            for file in k:
                filename = file_or_path + '\\' + file
                if re.match("^[\s\S]*\.(flv|mp4|mkv|avi|wmv|mpeg|f4v|rmvb|rm|mov)$", filename):
                    files.append(filename)
        return files
    else:
        file_or_path = easygui.fileopenbox(multiple=True,default=user.path)
        user.path = file_or_path[0]
        user.store()
        return file_or_path


def setDir(filepath):
    '''
    如果文件夹不存在就创建,如果文件存在就清空!
    :param filepath:需要创建的文件夹路径
    :return:
    '''
    if not os.path.exists(filepath):
        os.mkdir(filepath)
    else:
        shutil.rmtree(filepath, ignore_errors=True)
        os.mkdir(filepath)


# 将视频拆分为图片
def splitFrames(video_full_path, time_frency=30):
    start = datetime.datetime.now()
    run_cmd(f'echo 开始时间:{start}', 0)
    cap = cv2.VideoCapture(video_full_path)
    num = 0
    path, file = os.path.split(video_full_path)
    img_dir = os.path.join(path, file.split('.')[0])
    # 创建存放图片的文件夹
    setDir(img_dir)
    # 一般一秒30帧
    # time_frency = 120  # 视频帧计数间隔频率
    while True:
        ret, data = cap.read()
        if not ret:
            break
        img = Image.fromarray(data)
        name = file + "_" + str(num) + ".png"

        if num % time_frency == 0:  # 每隔timeF帧进行存储操作
            img.save(os.path.join(img_dir, name))  # 保存当前帧的图像
        num = num + 1
    cap.release()
    end = datetime.datetime.now()
    run_cmd(f'echo 视频拆分结束!:{end}', 0)
    run_cmd(f'echo 用时:{(end - start).seconds}s', 0)
    run_cmd(f'echo -----------------------------', 0)


def run_cmd(cmd_str='', echo_print=1):
    """
    执行cmd命令,不显示执行过程中弹出的黑框
    备注:subprocess.run()函数会将本来打印到cmd上的内容打印到python执行界面上,所以避免了出现cmd弹出框的问题
    :param cmd_str: 执行的cmd命令
    :return:
    """

    if echo_print == 1:
        print('\n执行cmd指令="{}"'.format(cmd_str))
    run(cmd_str, shell=True)


if __name__ == '__main__':
    # splitFrames('./test')
    files = getVideoPath()
    time_frency = easygui.enterbox(msg='Input time_frency(default=30):', strip=True)

    default_flag = False
    if (time_frency.strip() == ''):
        default_flag = True
    else:
        time_frency = int(time_frency)
    total = len(files)
    for index, file in enumerate(files):
        run_cmd(f'echo {index}/{total}--{file}', 0)
        if default_flag:
            splitFrames(file)
        else:
            splitFrames(file, time_frency)

posted on 2022-11-19 22:18  lazycookie  阅读(42)  评论(0编辑  收藏  举报