博客园博客内容转换为markdpwn格式
一、转换代码(不支持xml格式)
#!/bin/bash file=$1 save_img_path=/tmp/img cp $file ${file}_mark file_swp=${file}_mark img_list=`egrep -o "<img src=\"https://.*\"" ${file_swp}|egrep -o "https://.*[ ]"|sed 's#"##g'` sed -ri "s#</?pre[>]*#\n\`\`\`\n#g" $file_swp sed -ri "s#</?p\w+[>]*##g;s#(<img src=\")(https://.*png)(\".*)#\2#g" $file_swp sed -ri "s#</?strong[>]*#**#g" $file_swp sed -ri "s=</?h1[>]*=# =g;s/(# )(.*)(#)/\1\2/g" $file_swp sed -ri "s=</?h2[>]*=## =g;s/(## )(.*)(##)/\1\2/g" $file_swp sed -ri "s=</?h3\w+[>]*=### =g;s/(### )(.*)(###)/\1\2/g" $file_swp sed -ri "s=</?h4\w+[>]*=#### =g;s/(#### )(.*)(####)/\1\2/g" $file_swp sed -ri "s=</?h5\w+[>]*=##### =g;s/(##### )(.*)(#####)/\1\2/g" $file_swp sed -ri "s#</?\w+[^>]*>##g" $file_swp sed -ri ";s# #\n#g" $file_swp sed -ri ";s#<##g" $file_swp sed -ri ";s#>##g" $file_swp for i in ${img_list[*]}; do #jpg_name=${save_img_path}/`echo $i|awk -F/ '{print $NF}'` #curl -o ${jpg_name} -H 'User-Agent: Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36' -H 'referer: https://www.cnblogs.com/' $i #img_path=`curl -X POST -F "file=@${jpg_name}" https://cdn-ms.juejin.im/v1/upload?bucket=gold-user-assets|egrep -o "\"https://user-gold-cdn.xitu.io.*\"}"|sed "s#[}\"]##g"` sed -ri "s#$i#\!\[\](&)#g" $file_swp done