D:\code_gitee\python_cnblog2anki_and_weibo2anki\main.py
import os
import re
import shutil
import easygui
from bs4 import BeautifulSoup
from easygui import *
class User(EgStore):
def __init__(self, filename):
self.path = ''
EgStore.__init__(self, filename)
def get_file_path():
# 通过gui的方式获取文件夹路径
file_dir_flag = '1'
file_dir_flag = easygui.enterbox(msg='file(1) or dir(2):', strip=True)
# 创建存储对象
user = User("settings.txt")
# 取出以前保存的文件
user.restore()
file_or_path = None
if file_dir_flag == '2':
file_or_path = easygui.diropenbox(default=user.path)
user.path = file_or_path
user.store()
files = []
for i, j, k in os.walk(file_or_path):
for file in k:
filename = file_or_path + '\\' + file
if re.match("^[\s\S]*\.(html|mhtml|htm|txt)$", filename):
files.append(filename)
return files
else:
file_or_path = easygui.fileopenbox(multiple=True, default=user.path)
user.path = file_or_path[0]
user.store()
return file_or_path
def setDir(filepath):
'''
如果文件夹不存在就创建,如果文件存在就清空!
:param filepath:需要创建的文件夹路径
:return:
'''
if not os.path.exists(filepath):
os.mkdir(filepath)
else:
shutil.rmtree(filepath, ignore_errors=True)
os.mkdir(filepath)
def cnblog2anki(file):
res = []
content_reg = r'(anki)|(vue)|(javascript)|(typescript)|(three(.|)js)|(js)|(ts)'
pattern = re.compile(content_reg)
with open(file, "r", encoding='utf-8') as f: # 打开文件
data = f.read() # 读取文件
soup = BeautifulSoup(data, 'html.parser')
cards = soup.select(".vue-recycle-scroller__item-view")
if cards is None:
return
for card in cards:
text_ele = card.select(".detail_wbtext_4CRf9")
if text_ele is None or len(text_ele) == 0:
continue
pub_text = text_ele[0].text
# 检测文本中是否有包含的关键字
content_res = re.search(pattern, pub_text)
if not content_res:
continue
# 删除文本中的关键字
pub_text = pub_text.replace('诺亚方卓的微博视频', '')
# 获取发布时间和发布的链接
pub_ele = card.select("a.head-info_time_6sFQg")
if pub_ele is None or len(pub_ele) == 0:
continue
pub_time = pub_ele[0].get('title')
pub_url = pub_ele[0].get('href')
res.append((pub_text,pub_time,pub_url))
return res
def write2txt(msg):
with open(file + '.csv', "a", encoding='utf-8') as f: # 打开文件
f.writelines(msg)
if __name__ == '__main__':
res = get_file_path()
for file in res:
res = cnblog2anki(file)
for content in res:
write2txt(f'{content[0]+content[1]}\t<a href={content[2]}>{content[0]+content[1]}</a>\n')
D:\code_gitee\python_cnblog2anki_and_weibo2anki\main_cnblog2anki.py
from base64 import encode
import os
import re
import shutil
import easygui
from subprocess import run
from easygui import *
from bs4 import BeautifulSoup
class User(EgStore):
def __init__(self, filename):
self.path = ''
EgStore.__init__(self, filename)
def get_file_path():
# 通过gui的方式获取文件夹路径
file_dir_flag = '1'
file_dir_flag = easygui.enterbox(msg='file(1) or dir(2):', strip=True)
# 创建存储对象
user = User("settings.txt")
# 取出以前保存的文件
user.restore()
file_or_path = None
if file_dir_flag == '2':
file_or_path = easygui.diropenbox(default=user.path)
user.path = file_or_path
user.store()
files = []
for i, j, k in os.walk(file_or_path):
for file in k:
filename = file_or_path + '\\' + file
if re.match("^[\s\S]*\.(html|mhtml|htm|txt)$", filename):
files.append(filename)
return files
else:
file_or_path = easygui.fileopenbox(multiple=True, default=user.path)
user.path = file_or_path[0]
user.store()
return file_or_path
def setDir(filepath):
'''
如果文件夹不存在就创建,如果文件存在就清空!
:param filepath:需要创建的文件夹路径
:return:
'''
if not os.path.exists(filepath):
os.mkdir(filepath)
else:
shutil.rmtree(filepath, ignore_errors=True)
os.mkdir(filepath)
def cnblog2anki(file):
res = []
with open(file, "r", encoding='utf-8') as f: # 打开文件
data = f.read() # 读取文件
soup = BeautifulSoup(data, 'html.parser')
tbody = soup.select("tbody")[0]
for tr_ele in tbody.select('tr'):
title = tr_ele.select('td:nth-child(1)>a')[0].text
url = 'http:' + tr_ele.select('td:nth-child(1)>a')[0].get('href')
res.append((title, url))
return res
def write2txt(msg):
with open(file+'.csv', "a", encoding='utf-8') as f: # 打开文件
f.writelines(msg)
if __name__ == '__main__':
res = get_file_path()
for file in res:
res = cnblog2anki(file)
for content in res:
print(content)
write2txt(f'{content[0]}\t<a href={content[1]}>{content[0]}</a>\n')
D:\code_gitee\python_cnblog2anki_and_weibo2anki\main_weibo2anki.py
import os
import re
import shutil
import easygui
from bs4 import BeautifulSoup
from easygui import *
class User(EgStore):
def __init__(self, filename):
self.path = ''
EgStore.__init__(self, filename)
def get_file_path():
# 通过gui的方式获取文件夹路径
file_dir_flag = '1'
file_dir_flag = easygui.enterbox(msg='file(1) or dir(2):', strip=True)
# 创建存储对象
user = User("settings.txt")
# 取出以前保存的文件
user.restore()
file_or_path = None
if file_dir_flag == '2':
file_or_path = easygui.diropenbox(default=user.path)
user.path = file_or_path
user.store()
files = []
for i, j, k in os.walk(file_or_path):
for file in k:
filename = file_or_path + '\\' + file
if re.match("^[\s\S]*\.(html|mhtml|htm|txt)$", filename):
files.append(filename)
return files
else:
file_or_path = easygui.fileopenbox(multiple=True, default=user.path)
user.path = file_or_path[0]
user.store()
return file_or_path
def setDir(filepath):
'''
如果文件夹不存在就创建,如果文件存在就清空!
:param filepath:需要创建的文件夹路径
:return:
'''
if not os.path.exists(filepath):
os.mkdir(filepath)
else:
shutil.rmtree(filepath, ignore_errors=True)
os.mkdir(filepath)
def cnblog2anki(file):
res = []
content_reg = r'(anki)|(vue)|(javascript)|(typescript)|(three(.|)js)|(js)|(ts)'
pattern = re.compile(content_reg)
with open(file, "r", encoding='utf-8') as f: # 打开文件
data = f.read() # 读取文件
soup = BeautifulSoup(data, 'html.parser')
cards = soup.select(".vue-recycle-scroller__item-view")
if cards is None:
return
for card in cards:
text_ele = card.select(".detail_wbtext_4CRf9")
if text_ele is None or len(text_ele) == 0:
continue
pub_text = text_ele[0].text
# 检测文本中是否有包含的关键字
content_res = re.search(pattern, pub_text)
if not content_res:
continue
# 删除文本中的关键字
pub_text = pub_text.replace('诺亚方卓的微博视频', '')
# 获取发布时间和发布的链接
pub_ele = card.select("a.head-info_time_6sFQg")
if pub_ele is None or len(pub_ele) == 0:
continue
pub_time = pub_ele[0].get('title')
pub_url = pub_ele[0].get('href')
res.append((pub_text,pub_time,pub_url))
return res
def write2txt(msg):
with open(file + '.csv', "a", encoding='utf-8') as f: # 打开文件
f.writelines(msg)
if __name__ == '__main__':
res = get_file_path()
for file in res:
res = cnblog2anki(file)
for content in res:
write2txt(f'{content[0]+content[1]}\t<a href={content[2]}>{content[0]+content[1]}</a>\n')
D:\code_gitee\python_cnblog2anki_and_weibo2anki\learn\le01.py
import datetime
def run_cmd( cmd_str='', echo_print=1):
"""
执行cmd命令,不显示执行过程中弹出的黑框
备注:subprocess.run()函数会将本来打印到cmd上的内容打印到python执行界面上,所以避免了出现cmd弹出框的问题
:param cmd_str: 执行的cmd命令
:return:
"""
from subprocess import run
if echo_print == 1:
print('\n执行cmd指令="{}"'.format(cmd_str))
run(cmd_str, shell=True)
if __name__ == '__main__':
list = ['a','b','c']
for i,j in enumerate(list):
run_cmd(f'echo 开始时间:{i}--{j}',0)
D:\code_gitee\python_cnblog2anki_and_weibo2anki\learn\le02.py
import os
if __name__ == '__main__':
list = ['a','b','c']
for i,j in enumerate(list):
# os.system(f'echo 开始时间:{i}--{j}')
os.popen(f'echo 开始时间:{i}--{j}')