博客传送机
博客传送机
https://clovershrub.github.io/
先将博客园的东西备份下来,它会生成一个xml文件。
警告:以下脚本仅适用于我自己。不保证其余人也可用。仅作为思路参考。
迁移所用脚本:
import requests
import os
with open('cnblogs.txt', 'r', encoding = 'utf8') as blogs:
title = ''
time = ''
while True:
context = blogs.readline()
if context == '':
break
# 文章标题
if '<title type="text">' in context:
is_title = 0
l = ''
title = ''
for c in context:
if c == '-':
if l != 'F' and l != '程':
break
if is_title == 1:
title += c
if c == '>':
is_title = 1
l = c
# 创建图库
if title != '' and not os.path.exists('blogs/' + title):
os.makedirs('blogs/' + title)
# 记录时间
if '<published>' in context:
l = ''
is_time = 0
time = ''
for c in context:
if c == 'T':
is_time = 0
if is_time == 1:
time += c
if c == '>':
is_time = 1
# 文章内容
if '<content type="text">' in context:
with open('blogs/' + title + '.md', 'w', encoding = 'utf8') as f:
f.write('---\n')
f.write('title: ' + title + '\n')
f.write('cover: /img/' + title + '/cover.jpg\n')
f.write('date: ' + time)
f.write('---\n')
in_article = 0
for c in context:
if in_article == 1:
f.write(c)
if c == '>':
in_article = 1
sub_title = ''
l_sub_title = ''
num = 1
while True:
tmp = blogs.readline()
if '</content>' in tmp:
break
if l_sub_title != sub_title:
num = 1
l_sub_title = sub_title
l1 = ''
l2 = ''
is_sub = 0
is_url = 0
is_done = 0
for c in tmp:
# 处理图片
url = ''
if l2 == '[' and l1 == ']' and c == '(':
# is_url = 0
for d in tmp:
if d == ')':
pic = requests.get(url)
with open(('blogs/' + title + '/' + sub_title + str(num) + '.png').replace(' ', ''), 'wb') as p:
p.write(pic.content)
f.write(('(/img/' + title + '/' + sub_title + str(num) + '.png)').replace(' ', '') + '\n')
num += 1
is_done = 1
if is_url == 1:
url += d
if d == '(':
is_url = 1
if is_done == 1:
break
# 处理标题
if c == '\n':
is_sub = 0
if is_sub == 1:
sub_title += c
if c != '#' and l1 == '#' and l2 == '#':
is_sub = 1
sub_title = c
f.write(' ' + c)
else:
f.write(c)
l2 = l1
l1 = c