博客传送机

博客传送机

https://clovershrub.github.io/
先将博客园的东西备份下来,它会生成一个xml文件。
警告:以下脚本仅适用于我自己。不保证其余人也可用。仅作为思路参考。
迁移所用脚本:

import requests
import os

with open('cnblogs.txt', 'r', encoding = 'utf8') as blogs:
    title = ''
    time = ''
    while True:
        context = blogs.readline()
        if context == '':
            break

        # 文章标题
        if '<title type="text">' in context:
            is_title = 0
            l = ''
            title = ''
            for c in context:
                if c == '-':
                    if l != 'F' and l != '程':
                        break
                if is_title == 1:
                    title += c
                if c == '>':
                    is_title = 1
                l = c

        # 创建图库
        if title != '' and not os.path.exists('blogs/' + title):
            os.makedirs('blogs/' + title)

        # 记录时间
        if '<published>' in context:
            l = ''
            is_time = 0
            time = ''
            for c in context:
                if c == 'T':
                    is_time = 0
                if is_time == 1:
                    time += c
                if c == '>':
                    is_time = 1

        # 文章内容
        if '<content type="text">' in context:
            with open('blogs/' + title + '.md', 'w', encoding = 'utf8') as f:
                f.write('---\n')
                f.write('title: ' + title + '\n')
                f.write('cover: /img/' + title + '/cover.jpg\n')
                f.write('date: ' + time)
                f.write('---\n')
                in_article = 0
                for c in context:
                    if in_article == 1:
                        f.write(c)
                    if c == '>':
                        in_article = 1
                sub_title = ''
                l_sub_title = ''
                num = 1
                while True:
                    tmp = blogs.readline()
                    if '</content>' in tmp:
                        break
                    if l_sub_title != sub_title:
                        num = 1
                        l_sub_title = sub_title
                    l1 = ''
                    l2 = ''
                    is_sub = 0
                    is_url = 0
                    is_done = 0
                    for c in tmp:
                        # 处理图片
                        url = ''
                        if l2 == '[' and l1 == ']' and c == '(':
                            # is_url = 0
                            for d in tmp:
                                if d == ')':
                                    pic = requests.get(url)
                                    with open(('blogs/' + title + '/' + sub_title + str(num) + '.png').replace(' ', ''), 'wb') as p:
                                        p.write(pic.content)
                                    f.write(('(/img/' + title + '/' + sub_title + str(num) + '.png)').replace(' ', '') + '\n')
                                    num += 1
                                    is_done = 1
                                if is_url == 1:
                                    url += d
                                if d == '(':
                                    is_url = 1
                        if is_done == 1:
                            break
                        # 处理标题
                        if c == '\n':
                            is_sub = 0
                        if is_sub == 1:
                            sub_title += c
                        if c != '#' and l1 == '#' and l2 == '#':
                            is_sub = 1
                            sub_title = c
                            f.write(' ' + c)
                        else:
                            f.write(c)
                        l2 = l1
                        l1 = c

posted @ 2023-09-19 10:59  Clovershrub  阅读(13)  评论(0编辑  收藏  举报