实现将md文件中的网络图片下载下来,保存到本地./typora-img/{filename}目录,并且会将md文件中的图片地址替换为本地地址
import requests
import re
import os
from itertools import chain
import fnmatch
def get_md_files(path="./"):
md_files = []
for root_dir, dirs, files in os.walk(path):
for file in files:
if fnmatch.fnmatch(file, "*.md"):
file_path = os.path.join(root_dir, file)
md_files.append(os.path.relpath(file_path, path).replace("\\", "/"))
return md_files
def replace_md_url(md_file, local_img_path):
img_patten = r'!\[.*?\]\((.*?)\)|<img.*?src=[\'\"](.*?)[\'\"].*?>'
cnt_replace = 0
with open(md_file, 'r', encoding='utf-8') as f:
post = f.read()
matches = re.compile(img_patten).findall(post)
if matches and len(matches) > 0:
for match in list(chain(*matches)):
if match != "":
new_url = download_image(match, local_img_path)
if new_url != False:
post = post.replace(match, new_url)
cnt_replace = cnt_replace + 1
if post and cnt_replace > 0:
open(md_file, 'w', encoding='utf-8').write(post)
print(f"已更新{cnt_replace}个url")
elif cnt_replace == 0:
print('{}中没有需要替换的URL'.format(os.path.basename(md_file)))
def get_img_local_path(md_file, path):
if path.startswith('/'):
local_img_path = path
elif path.startswith('.'):
local_img_path = '{0}/{1}'.format(os.path.dirname(md_file), path)
elif path.startswith('file:///'):
local_img_path = path[8:]
local_img_path = local_img_path.replace('%20', ' ')
else:
local_img_path = '{0}/{1}'.format(os.path.dirname(md_file), path)
return local_img_path
def download_image(image_url, save_dir):
os.makedirs(save_dir, exist_ok=True)
if not image_url.startswith('http://') or image_url.startswith('https://'):
return False
try:
response = requests.get(image_url, stream=True)
if response.status_code == 200:
file_name = image_url.split("/")[-1]
file_path = os.path.join(save_dir, file_name)
with open(file_path, "wb") as file:
for chunk in response.iter_content(1024):
file.write(chunk)
print(f"{image_url} --->图片爬取成功,已保存到{save_dir}")
local_img_url = f"./{path}/{file_name}"
return local_img_url
except:
print(f"{image_url} --->图片爬取失败,!!!!!!!!")
return False
if __name__ == '__main__':
md_files = get_md_files()
for md_file in md_files:
md_name = md_file.split("/")[-1].split(".")[0]
path = f'typora-img/{md_name}'
local_img_path = get_img_local_path(md_file, path)
replace_md_url(md_file, local_img_path)
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 分享4款.NET开源、免费、实用的商城系统
· 全程不用写代码,我用AI程序员写了一个飞机大战
· MongoDB 8.0这个新功能碉堡了,比商业数据库还牛
· 白话解读 Dapr 1.15:你的「微服务管家」又秀新绝活了
· 上周热点回顾(2.24-3.2)