python脚本实现将md文件中的图片替换为本地存储

# 代码参考:https://blog.csdn.net/weixin_34090643/article/details/91435765
import requests
import re
import os
from itertools import chain
import fnmatch


def get_md_files(path="./"):
    md_files = []
    for root_dir, dirs, files in os.walk(path):
        for file in files:
            if fnmatch.fnmatch(file, "*.md"):
                file_path = os.path.join(root_dir, file)
                md_files.append(os.path.relpath(file_path, path).replace("\\", "/"))
    return md_files


# 替换md文件中的url
def replace_md_url(md_file, local_img_path):
    img_patten = r'!\[.*?\]\((.*?)\)|<img.*?src=[\'\"](.*?)[\'\"].*?>'  # Markdown中图片语法 ![](url) 或者 <img src='' />
    cnt_replace = 0
    with open(md_file, 'r', encoding='utf-8') as f:
        post = f.read()
        matches = re.compile(img_patten).findall(post)
        if matches and len(matches) > 0:
            # 多个group整合成一个列表
            for match in list(chain(*matches)):
                if match != "":
                    new_url = download_image(match, local_img_path)  # 下载图片,返回图片路径
                    if new_url != False:  # 确保只有图片下载成功了才会去替换url
                        post = post.replace(match, new_url)
                        cnt_replace = cnt_replace + 1  # 统计需要替换的url数量
                        # 如果有内容的话,就直接覆盖写入当前的markdown文件
                        if post and cnt_replace > 0:
                            open(md_file, 'w', encoding='utf-8').write(post)
                            print(f"已更新{cnt_replace}个url")
                        elif cnt_replace == 0:
                            print('{}中没有需要替换的URL'.format(os.path.basename(md_file)))


# 获取到图片的保存的相对路径
def get_img_local_path(md_file, path):
    # /a/b/c
    if path.startswith('/'):
        local_img_path = path
    # ./a/b/c
    elif path.startswith('.'):
        local_img_path = '{0}/{1}'.format(os.path.dirname(md_file), path)
    # file:///a/b/c
    elif path.startswith('file:///'):
        local_img_path = path[8:]
        local_img_path = local_img_path.replace('%20', ' ')
    else:
        local_img_path = '{0}/{1}'.format(os.path.dirname(md_file), path)

    return local_img_path


# 下载图片
def download_image(image_url, save_dir):
    os.makedirs(save_dir, exist_ok=True)
    if not image_url.startswith('http://') or image_url.startswith('https://'):
        return False
    try:
        response = requests.get(image_url, stream=True)
        if response.status_code == 200:
            file_name = image_url.split("/")[-1]
            file_path = os.path.join(save_dir, file_name)
            with open(file_path, "wb") as file:
                for chunk in response.iter_content(1024):
                    file.write(chunk)
            print(f"{image_url}  --->图片爬取成功,已保存到{save_dir}")

            local_img_url = f"./{path}/{file_name}"
            return local_img_url

    except:
        print(f"{image_url}  --->图片爬取失败,!!!!!!!!")
        return False


if __name__ == '__main__':
    md_files = get_md_files()  # 可指定路径

    for md_file in md_files:
        md_name = md_file.split("/")[-1].split(".")[0]  # md文件名
        path = f'typora-img/{md_name}'

        local_img_path = get_img_local_path(md_file, path)  # 图片保存的相对路径
        replace_md_url(md_file, local_img_path)
posted @ 2023-07-01 12:50  charonlight  阅读(100)  评论(0编辑  收藏  举报