实现将md文件中的网络图片下载下来,保存到本地./typora-img/{filename}目录,并且会将md文件中的图片地址替换为本地地址
# 代码参考:https://blog.csdn.net/weixin_34090643/article/details/91435765
import requests
import re
import os
from itertools import chain
import fnmatch
def get_md_files(path="./"):
md_files = []
for root_dir, dirs, files in os.walk(path):
for file in files:
if fnmatch.fnmatch(file, "*.md"):
file_path = os.path.join(root_dir, file)
md_files.append(os.path.relpath(file_path, path).replace("\\", "/"))
return md_files
# 替换md文件中的url
def replace_md_url(md_file, local_img_path):
img_patten = r'!\[.*?\]\((.*?)\)|<img.*?src=[\'\"](.*?)[\'\"].*?>' # Markdown中图片语法 ![](url) 或者 <img src='' />
cnt_replace = 0
with open(md_file, 'r', encoding='utf-8') as f:
post = f.read()
matches = re.compile(img_patten).findall(post)
if matches and len(matches) > 0:
# 多个group整合成一个列表
for match in list(chain(*matches)):
if match != "":
new_url = download_image(match, local_img_path) # 下载图片,返回图片路径
if new_url != False: # 确保只有图片下载成功了才会去替换url
post = post.replace(match, new_url)
cnt_replace = cnt_replace + 1 # 统计需要替换的url数量
# 如果有内容的话,就直接覆盖写入当前的markdown文件
if post and cnt_replace > 0:
open(md_file, 'w', encoding='utf-8').write(post)
print(f"已更新{cnt_replace}个url")
elif cnt_replace == 0:
print('{}中没有需要替换的URL'.format(os.path.basename(md_file)))
# 获取到图片的保存的相对路径
def get_img_local_path(md_file, path):
# /a/b/c
if path.startswith('/'):
local_img_path = path
# ./a/b/c
elif path.startswith('.'):
local_img_path = '{0}/{1}'.format(os.path.dirname(md_file), path)
# file:///a/b/c
elif path.startswith('file:///'):
local_img_path = path[8:]
local_img_path = local_img_path.replace('%20', ' ')
else:
local_img_path = '{0}/{1}'.format(os.path.dirname(md_file), path)
return local_img_path
# 下载图片
def download_image(image_url, save_dir):
os.makedirs(save_dir, exist_ok=True)
if not image_url.startswith('http://') or image_url.startswith('https://'):
return False
try:
response = requests.get(image_url, stream=True)
if response.status_code == 200:
file_name = image_url.split("/")[-1]
file_path = os.path.join(save_dir, file_name)
with open(file_path, "wb") as file:
for chunk in response.iter_content(1024):
file.write(chunk)
print(f"{image_url} --->图片爬取成功,已保存到{save_dir}")
local_img_url = f"./{path}/{file_name}"
return local_img_url
except:
print(f"{image_url} --->图片爬取失败,!!!!!!!!")
return False
if __name__ == '__main__':
md_files = get_md_files() # 可指定路径
for md_file in md_files:
md_name = md_file.split("/")[-1].split(".")[0] # md文件名
path = f'typora-img/{md_name}'
local_img_path = get_img_local_path(md_file, path) # 图片保存的相对路径
replace_md_url(md_file, local_img_path)