日常生活的交流与学习

首页 新随笔 联系 管理

D:\code_gitee\python_cnblog2anki_and_weibo2anki\main.py

import os
import re
import shutil

import easygui
from bs4 import BeautifulSoup
from easygui import *


class User(EgStore):
    def __init__(self, filename):
        self.path = ''
        EgStore.__init__(self, filename)


def get_file_path():
    # 通过gui的方式获取文件夹路径
    file_dir_flag = '1'
    file_dir_flag = easygui.enterbox(msg='file(1) or dir(2):', strip=True)

    # 创建存储对象
    user = User("settings.txt")
    # 取出以前保存的文件
    user.restore()
    file_or_path = None
    if file_dir_flag == '2':

        file_or_path = easygui.diropenbox(default=user.path)
        user.path = file_or_path
        user.store()
        files = []
        for i, j, k in os.walk(file_or_path):
            for file in k:
                filename = file_or_path + '\\' + file
                if re.match("^[\s\S]*\.(html|mhtml|htm|txt)$", filename):
                    files.append(filename)
        return files
    else:
        file_or_path = easygui.fileopenbox(multiple=True, default=user.path)
        user.path = file_or_path[0]
        user.store()
        return file_or_path


def setDir(filepath):
    '''
    如果文件夹不存在就创建,如果文件存在就清空!
    :param filepath:需要创建的文件夹路径
    :return:
    '''
    if not os.path.exists(filepath):
        os.mkdir(filepath)
    else:
        shutil.rmtree(filepath, ignore_errors=True)
        os.mkdir(filepath)


def cnblog2anki(file):
    res = []
    content_reg = r'(anki)|(vue)|(javascript)|(typescript)|(three(.|)js)|(js)|(ts)'
    pattern = re.compile(content_reg)
    with open(file, "r", encoding='utf-8') as f:  # 打开文件
        data = f.read()  # 读取文件
        soup = BeautifulSoup(data, 'html.parser')
        cards = soup.select(".vue-recycle-scroller__item-view")
        if cards is None:
            return
        for card in cards:
            text_ele = card.select(".detail_wbtext_4CRf9")
            if text_ele is None or len(text_ele) == 0:
                continue
            pub_text = text_ele[0].text
            # 检测文本中是否有包含的关键字
            content_res = re.search(pattern, pub_text)
            if not content_res:
                continue
            # 删除文本中的关键字
            pub_text = pub_text.replace('诺亚方卓的微博视频', '')
            # 获取发布时间和发布的链接
            pub_ele = card.select("a.head-info_time_6sFQg")
            if pub_ele is None or len(pub_ele) == 0:
                continue
            pub_time = pub_ele[0].get('title')
            pub_url = pub_ele[0].get('href')
            res.append((pub_text,pub_time,pub_url))
    return res


def write2txt(msg):
    with open(file + '.csv', "a", encoding='utf-8') as f:  # 打开文件
        f.writelines(msg)


if __name__ == '__main__':
    res = get_file_path()
    for file in res:
        res = cnblog2anki(file)
        for content in res:
            write2txt(f'{content[0]+content[1]}\t<a href={content[2]}>{content[0]+content[1]}</a>\n')

D:\code_gitee\python_cnblog2anki_and_weibo2anki\main_cnblog2anki.py

from base64 import encode
import os
import re
import shutil
import easygui
from subprocess import run
from easygui import *
from bs4 import BeautifulSoup


class User(EgStore):
    def __init__(self, filename):
        self.path = ''
        EgStore.__init__(self, filename)


def get_file_path():

    # 通过gui的方式获取文件夹路径
    file_dir_flag = '1'
    file_dir_flag = easygui.enterbox(msg='file(1) or dir(2):', strip=True)

    # 创建存储对象
    user = User("settings.txt")
    # 取出以前保存的文件
    user.restore()
    file_or_path = None
    if file_dir_flag == '2':

        file_or_path = easygui.diropenbox(default=user.path)
        user.path = file_or_path
        user.store()
        files = []
        for i, j, k in os.walk(file_or_path):
            for file in k:
                filename = file_or_path + '\\' + file
                if re.match("^[\s\S]*\.(html|mhtml|htm|txt)$", filename):
                    files.append(filename)
        return files
    else:
        file_or_path = easygui.fileopenbox(multiple=True, default=user.path)
        user.path = file_or_path[0]
        user.store()
        return file_or_path


def setDir(filepath):
    '''
    如果文件夹不存在就创建,如果文件存在就清空!
    :param filepath:需要创建的文件夹路径
    :return:
    '''
    if not os.path.exists(filepath):
        os.mkdir(filepath)
    else:
        shutil.rmtree(filepath, ignore_errors=True)
        os.mkdir(filepath)


def cnblog2anki(file):
    res = []
    with open(file, "r", encoding='utf-8') as f:  # 打开文件
        data = f.read()  # 读取文件
        soup = BeautifulSoup(data, 'html.parser')
        tbody = soup.select("tbody")[0]
        for tr_ele in tbody.select('tr'):
            title = tr_ele.select('td:nth-child(1)>a')[0].text
            url = 'http:' + tr_ele.select('td:nth-child(1)>a')[0].get('href')
            res.append((title, url))
    return res


def write2txt(msg):
    with open(file+'.csv', "a", encoding='utf-8') as f:  # 打开文件
        f.writelines(msg)


if __name__ == '__main__':
    res = get_file_path()
    for file in res:
        res = cnblog2anki(file)
        for content in res:
            print(content)
            write2txt(f'{content[0]}\t<a href={content[1]}>{content[0]}</a>\n')

D:\code_gitee\python_cnblog2anki_and_weibo2anki\main_weibo2anki.py

import os
import re
import shutil

import easygui
from bs4 import BeautifulSoup
from easygui import *


class User(EgStore):
    def __init__(self, filename):
        self.path = ''
        EgStore.__init__(self, filename)


def get_file_path():
    # 通过gui的方式获取文件夹路径
    file_dir_flag = '1'
    file_dir_flag = easygui.enterbox(msg='file(1) or dir(2):', strip=True)

    # 创建存储对象
    user = User("settings.txt")
    # 取出以前保存的文件
    user.restore()
    file_or_path = None
    if file_dir_flag == '2':

        file_or_path = easygui.diropenbox(default=user.path)
        user.path = file_or_path
        user.store()
        files = []
        for i, j, k in os.walk(file_or_path):
            for file in k:
                filename = file_or_path + '\\' + file
                if re.match("^[\s\S]*\.(html|mhtml|htm|txt)$", filename):
                    files.append(filename)
        return files
    else:
        file_or_path = easygui.fileopenbox(multiple=True, default=user.path)
        user.path = file_or_path[0]
        user.store()
        return file_or_path


def setDir(filepath):
    '''
    如果文件夹不存在就创建,如果文件存在就清空!
    :param filepath:需要创建的文件夹路径
    :return:
    '''
    if not os.path.exists(filepath):
        os.mkdir(filepath)
    else:
        shutil.rmtree(filepath, ignore_errors=True)
        os.mkdir(filepath)


def cnblog2anki(file):
    res = []
    content_reg = r'(anki)|(vue)|(javascript)|(typescript)|(three(.|)js)|(js)|(ts)'
    pattern = re.compile(content_reg)
    with open(file, "r", encoding='utf-8') as f:  # 打开文件
        data = f.read()  # 读取文件
        soup = BeautifulSoup(data, 'html.parser')
        cards = soup.select(".vue-recycle-scroller__item-view")
        if cards is None:
            return
        for card in cards:
            text_ele = card.select(".detail_wbtext_4CRf9")
            if text_ele is None or len(text_ele) == 0:
                continue
            pub_text = text_ele[0].text
            # 检测文本中是否有包含的关键字
            content_res = re.search(pattern, pub_text)
            if not content_res:
                continue
            # 删除文本中的关键字
            pub_text = pub_text.replace('诺亚方卓的微博视频', '')
            # 获取发布时间和发布的链接
            pub_ele = card.select("a.head-info_time_6sFQg")
            if pub_ele is None or len(pub_ele) == 0:
                continue
            pub_time = pub_ele[0].get('title')
            pub_url = pub_ele[0].get('href')
            res.append((pub_text,pub_time,pub_url))
    return res


def write2txt(msg):
    with open(file + '.csv', "a", encoding='utf-8') as f:  # 打开文件
        f.writelines(msg)


if __name__ == '__main__':
    res = get_file_path()
    for file in res:
        res = cnblog2anki(file)
        for content in res:
            write2txt(f'{content[0]+content[1]}\t<a href={content[2]}>{content[0]+content[1]}</a>\n')

D:\code_gitee\python_cnblog2anki_and_weibo2anki\learn\le01.py

import datetime


def run_cmd( cmd_str='', echo_print=1):
    """
    执行cmd命令,不显示执行过程中弹出的黑框
    备注:subprocess.run()函数会将本来打印到cmd上的内容打印到python执行界面上,所以避免了出现cmd弹出框的问题
    :param cmd_str: 执行的cmd命令
    :return:
    """
    from subprocess import run
    if echo_print == 1:
        print('\n执行cmd指令="{}"'.format(cmd_str))
    run(cmd_str, shell=True)

if __name__ == '__main__':
    list = ['a','b','c']
    for i,j in enumerate(list):
        run_cmd(f'echo 开始时间:{i}--{j}',0)

D:\code_gitee\python_cnblog2anki_and_weibo2anki\learn\le02.py

import os

if __name__ == '__main__':
    list = ['a','b','c']
    for i,j in enumerate(list):
        # os.system(f'echo 开始时间:{i}--{j}')
        os.popen(f'echo 开始时间:{i}--{j}')
posted on 2022-11-16 23:17  lazycookie  阅读(83)  评论(0编辑  收藏  举报