自动移除Typora多余图片

  1 # -*- coding: utf-8 -*-
  2 # @Time    : 8/25/2020 7:40 PM
  3 # @Author  : OhiyoX
  4 # @FileName: MDImgScripts.py
  5 # @Software: PyCharm
  6 # @Blog    :https://blog.ohiyox.in/
  7 # @Desc    : 1、用于自动清除typora写作后多余的图片,简洁文件夹
  8 #            2、也可以自动上传图片到图床,同时修改文档中的图片链接
  9 #            3、若图片为网络图片,将自动转移到图床,同时修改文档中的图片链接
 10 #            4、若图片在图床,但存放位置不规范,也会自动转移位置,同时修改文档中的图片链接
 11 
 12 
 13 import os
 14 import re
 15 import json
 16 import shutil
 17 import sys
 18 import time
 19 from urllib.parse import quote, unquote
 20 
 21 from datetime import datetime
 22 
 23 
 24 
 25 # 配置
 26 # {
 27 #     "test_mode": True,  # 调试模式
 28 #     "date": {"year": "2019",  # 设置文件夹日期,留空默认为本年
 29 #              "month": "1" # 设置文件夹日期,留空默认为本月
 30 #              },
 31 #     "clean_local_assets": True,
 32 #     "main_oss_folder_ref": "blogimg",  # 设置图床文件夹
 33 #     "style": "!xwbp",  # 设置默认上传的图片规则,以后缀!标识
 34 #     "relocate_oss_existing_file": True,  # 对于已在图床中的图片,设置是否需要重新移动整理图片
 35 #     "delete": True  # 对于移动图片,是否删除原位置
 36 # }
 37 
 38 with open('config.json') as cfig:
 39     config = json.load(cfig)
 40 
 41 
 42 class ImgMD:
 43     """MD图片处理类"""
 44 
 45     def __init__(self):
 46         # 与MD文档有关的
 47         self.article_filepath = None
 48         self.assets_dirpath = None
 49         self.content = ""
 50         # 文档中的图片信息
 51         self.imgs_list = []
 52         self.imgs_url_list = []
 53         # 图片在本地中的存储信息
 54         self.assets_list = []
 55 
 56         self.get_doc_imgs_list()
 57         self.get_assets_list()
 58 
 59         self.temp_dir = 'temp_img'
 60 
 61     def get_content(self, article_filepath="", force=False):
 62         """获得文本内容"""
 63         if self.content and not force:
 64             return self.content
 65         else:
 66             foo = False
 67             count = 0
 68             while not foo:
 69                 if count <= 5:
 70                     try:
 71                         if not config['test_mode']:
 72                             if not article_filepath:
 73                                 self.article_filepath = input("Input the article path: ").strip('\"')
 74                                 #print(self.article_filepath)
 75                                 
 76                                 if "\"" in self.article_filepath:
 77                                     self.article_filepath = re.search("\"(.*)\"", self.article_filepath).group(1)
 78                                 article_filepath = self.article_filepath
 79                         else:
 80                             article_filepath = self.article_filepath
 81                         with open(article_filepath, 'r', encoding="UTF-8") as f:
 82                             self.content = f.read()
 83                         foo = True
 84                         return self.content
 85                     except:
 86                         print("Notice! File is not found or an error occurs, retry.")
 87                         count += 1
 88                 else:
 89                     exit('failed getting article file path.')
 90 
 91     def get_doc_imgs_list(self, url=False, force=False):
 92         """获得文档中的图片列表"""
 93         if not force:
 94             if self.imgs_list and not url:
 95                 return self.imgs_list
 96             if self.imgs_url_list and url:
 97                 return self.imgs_url_list
 98 
 99         content = self.get_content(force=force)
100         
101         #self.imgs_url_list = re.findall('!\[.*?]\((.*?)\)', content, re.S)
102         self.imgs_url_list = re.findall('image-[0-9]{4,18}.png', content, re.S)
103         imgs_list = []
104         for img_url in self.imgs_url_list:
105             img_full_name = self.get_doc_imgs_list(img_url)  # 不支持绝对路径
106             imgs_list.append(img_full_name)
107         if not url:
108             print("Done, %s img(s) found in article." % len(imgs_list))
109             #for i, img in enumerate(imgs_list):
110             #   print(f"{i}: {img}")
111 
112             self.imgs_list = imgs_list
113             return imgs_list
114         else:
115             return self.imgs_url_list
116 
117 
118     def get_assets_list(self):
119         """获得图片文件夹中的图片列表"""
120         bar = False
121         count = 0
122         while not bar and count <= 5:
123             try:
124                 if not config['test_mode']:
125                     self.assets_dirpath = input("input the imgs path (Leave it empty to use default folder): ")
126                     if self.assets_dirpath == '':
127                         self.assets_dirpath = self.article_filepath.replace('.md', '.assets')
128                 for a, b, c in os.walk(self.assets_dirpath):
129                     assets_list = c
130                     print("Done, %s img(s) found in assets." % len(assets_list))
131                     print('As below:')
132                     for i, a in enumerate(assets_list):
133                         print(f"{i}: {a}")
134                     self.assets_list = assets_list
135                     return assets_list
136                 bar = True
137             except:
138                 print(Exception)
139                 count += 1
140                 if count > 5:
141                     exit(-1)
142 
143     def clear_local_imgs(self):
144         """用于清除文件夹中无用的图片"""
145         flag = False
146         redundant_list = []
147         print("------------------OUTPUT assets_list 1111---------------- \n")
148         print(self.assets_list)
149         print("------------------OUTPUT imgs_list 222---------------- \n")
150         print(self.imgs_list)
151         print("------------------OUTPUT list end---------------- \n")
152         for file in self.assets_list:
153             print(file)
154             u_file = quote(file)  # typora中使用了unicode-escape
155             #if str(u_file) not in str(self.imgs_list):
156             if str(self.imgs_list).find(str(u_file)) == -1 :
157                 redundant_list.append(self.assets_dirpath + '\\' + file)
158                 flag = True
159         if flag:
160 
161             for x in redundant_list:
162                 os.remove(x)
163                 print(x + " is removed.")
164         else:
165             print("Scan finished, no redundant img is found in assets.")
166 
167     def img_relocate(self, remote_img_ref, new_remote_img_ref, delete=True):
168         """将图床图片移动到合适位置"""
169         print('relocate_oss_existing_file process:', end='')
170 
171         def delete_(_remote_img_ref):
172             # delete 指示是否删除旧位置的文件
173             if delete:
174                 if bucket.delete_object(_remote_img_ref):
175                     print('deleted "' + self.get_filename_from_url(_remote_img_ref) + '" in original loc.')
176                     return True
177                 return False
178             p_dir = re.search('(.*)/', _remote_img_ref, re.S).group(1)
179             if bucket.delete_object(p_dir):
180                 # 清理掉旧的文件夹
181                 print('deleted empty loc.')
182             return True
183 
184         bucket = oss2.Bucket(self.auth, self.endpoint, self.oss_info['Bucket'])
185         exist = bucket.object_exists(new_remote_img_ref)
186         if exist:
187             return delete_(remote_img_ref)
188         else:
189             if bucket.copy_object(self.oss_info['Bucket'], remote_img_ref, new_remote_img_ref):
190                 return delete_(remote_img_ref)
191             else:
192                 return False
193 
194 if __name__ == '__main__':
195     md = ImgMD()
196     if config['clean_local_assets']:
197         md.clear_local_imgs()

   以上代码是根据文章开头的代码出处来修改的,请将以上文件保存成python文件 .py,然后运行即可。

posted @ 2024-02-29 15:16  xiaokangkp  阅读(184)  评论(0编辑  收藏  举报