博客园博客内容转换为markdpwn格式

一、转换代码(不支持xml格式)

#!/bin/bash

file=$1
save_img_path=/tmp/img
cp $file ${file}_mark

file_swp=${file}_mark
img_list=`egrep -o "<img src=\"https://.*\""  ${file_swp}|egrep -o "https://.*[ ]"|sed 's#"##g'`

sed -ri "s#</?pre[>]*#\n\`\`\`\n#g" $file_swp
sed -ri "s#</?p\w+[>]*##g;s#(<img src=\")(https://.*png)(\".*)#\2#g"   $file_swp
sed -ri "s#</?strong[>]*#**#g"  $file_swp
sed -ri "s=</?h1[>]*=# =g;s/(# )(.*)(#)/\1\2/g" $file_swp
sed -ri "s=</?h2[>]*=## =g;s/(## )(.*)(##)/\1\2/g" $file_swp
sed -ri "s=</?h3\w+[>]*=### =g;s/(### )(.*)(###)/\1\2/g" $file_swp
sed -ri "s=</?h4\w+[>]*=#### =g;s/(#### )(.*)(####)/\1\2/g" $file_swp
sed -ri "s=</?h5\w+[>]*=##### =g;s/(##### )(.*)(#####)/\1\2/g" $file_swp
sed -ri "s#</?\w+[^>]*>##g" $file_swp
sed -ri ";s#&nbsp;#\n#g" $file_swp
sed -ri ";s#&lt;##g" $file_swp
sed -ri ";s#&gt;##g" $file_swp

for i in ${img_list[*]}; do  
    #jpg_name=${save_img_path}/`echo $i|awk -F/  '{print $NF}'`
    #curl -o ${jpg_name} -H 'User-Agent: Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36' -H 'referer: https://www.cnblogs.com/' $i    
    #img_path=`curl -X POST -F "file=@${jpg_name}"    https://cdn-ms.juejin.im/v1/upload?bucket=gold-user-assets|egrep  -o "\"https://user-gold-cdn.xitu.io.*\"}"|sed "s#[}\"]##g"`
    sed -ri "s#$i#\!\[\](&)#g"   $file_swp
    
done

 

 

 

posted @ 2019-08-27 09:25  巽逸  阅读(4)  评论(0编辑  收藏  举报