博客园博客PDF生成器
周末写了一个博客园博客PDF生成器,由于博客园文件上传大小的限制,我把源代码放在CSDN上了(想信大家都有帐号哈),如果没有帐号的请留下邮箱,我会尽快发给你,当然如果哪位朋友能帮忙把源代码上传到博客园上更好:博客园博客PDF生成器
废话不多说,直接看生成后的PDF效果哈:
博客中图片效果:
代码比较简单,这里先简单说一下思路,先通过博客地址取得该博客的RSS信息,这是一个XML文件,把源码存在本地,然后解析这个XML文件,从中取出需要的信息,再用iTextSharp这个DLL来操作PDF,从面生成PDF文档。
下面只帖出几个主要的类,大家有兴趣可以下载源代码看:
实体类channel,类属性是从XML文件中取得的:

using System.Collections.Generic;
using System.Linq;
using System.Text;
namespace BlogsConvert
{
public class channel
{
private string title;
private string link;
private string description;
private string language;
private DateTime lastBuildDate;
private DateTime pubDate;
private int ttl;
public string Title
{
get { return title; }
set { title = value; }
}
public string Link
{
get { return link; }
set { link = value; }
}
public string Description
{
get { return description; }
set { description = value; }
}
public string Language
{
get { return language; }
set { language = value; }
}
public DateTime LastBuildDate
{
get { return lastBuildDate; }
set { lastBuildDate = value; }
}
public DateTime PubDate
{
get { return pubDate; }
set { pubDate = value; }
}
public int Ttl
{
get { return ttl; }
set { ttl = value; }
}
}
}
实体类item(属性来自XML文件):

using System.Collections.Generic;
using System.Linq;
using System.Text;
namespace BlogsConvert
{
public class item
{
private string title;
private string link;
private string dc_creator;
private string author;
private DateTime pubDate;
private string guid;
private string description;
public string Title
{
get { return title; }
set { title = value; }
}
public string Link
{
get { return link; }
set { link = value; }
}
public string Dc_creator
{
get { return dc_creator; }
set { dc_creator = value; }
}
public string Author
{
get { return author; }
set { author = value; }
}
public DateTime PubDate
{
get { return pubDate; }
set { pubDate = value; }
}
public string Guid
{
get { return guid; }
set { guid = value; }
}
public string Description
{
get { return description; }
set { description = value; }
}
}
}
从XML文件中提取博客信息类:

using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Xml.Linq;
using System.Xml;
namespace BlogsConvert
{
public class BlogsInfo
{
/// <summary>
/// 从XML文件中取得博主信息
/// </summary>
/// <param name="xmlPath">xml文件路径</param>
/// <returns>channel</returns>
public channel GetChannel(string xmlPath)
{
channel cha=new channel();
//解析XML文件
XmlDocument myXml = new XmlDocument();
myXml.Load(xmlPath);
XmlNode blogs = myXml.DocumentElement;
XmlNode node=blogs.ChildNodes[0];
if (node.Name == "channel")
{
foreach (XmlNode chanode in node.ChildNodes)
{
switch (chanode.Name)
{
case "title":
cha.Title = chanode.InnerText;
break;
case "link":
cha.Link = chanode.InnerText;
break;
case "description":
cha.Description = chanode.InnerText;
break;
case "language":
cha.Language = chanode.InnerText;
break;
case "lastBuildDate":
cha.LastBuildDate = DateTime.Parse(chanode.InnerText);
break;
case "pubDate":
cha.PubDate = DateTime.Parse(chanode.InnerText);
break;
case "ttl":
cha.Ttl = int.Parse(chanode.InnerText);
break;
}
if (chanode.Name == "item")
break;
}
}
if (cha.Title.Trim()!="")
return cha;
return null;
}
/// <summary>
/// 从XML文件中取得文章信息
/// </summary>
/// <param name="xmlPath">xml文件路径</param>
/// <returns>IList</returns>
public IList<item> GetItems(string xmlPath)
{
return GetItems(xmlPath,"");
}
/// <summary>
/// 从XML文件中取得文章信息
/// </summary>
/// <param name="xmlPath">xml文件路径</param>
/// <param name="keyWord">按关键字提取博客信息</param>
/// <returns>IList</returns>
public IList<item> GetItems(string xmlPath,string keyWord)
{
IList<item> itemList = new List<item>();
item temp;
//解析XML文件
XmlDocument myXml = new XmlDocument();
myXml.Load(xmlPath);
XmlNode blogs = myXml.DocumentElement;
XmlNode node = blogs.ChildNodes[0];
if (node.Name == "channel")
{
foreach (XmlNode statusnode in node.ChildNodes)
{
switch (statusnode.Name)
{
case "item":
temp=new item();
bool flag = true;
foreach (XmlNode o in statusnode.ChildNodes)
{
if (flag)
{
switch (o.Name)
{
case "title":
if (keyWord.Trim() != "")
{
if (!o.InnerText.Contains(keyWord))
flag = false;
}
temp.Title = o.InnerText;
break;
case "link":
temp.Link = o.InnerText;
break;
case "dc:creator":
temp.Dc_creator = o.InnerText;
break;
case "author":
temp.Author = o.InnerText;
break;
case "pubDate":
temp.PubDate = DateTime.Parse(o.InnerText);
break;
case "guid":
temp.Guid = o.InnerText;
break;
case "description":
temp.Description = o.InnerText;
break;
}
}
}
if(temp.Link!=null)
itemList.Add(temp);
break;
}
}
}
if(itemList.Count>0)
return itemList;
return null;
}
}
}
PDF文件生成类,也是本软件中最重要的一个类,其实就是iTextSharp的运用(这个DLL文件在源代码中有):

using System.Collections.Generic;
using System.Linq;
using System.Text;
using iTextSharp.text;
using iTextSharp.text.pdf;
using System.IO;
using System.Text.RegularExpressions;
namespace BlogsConvert
{
public class ToPdf:IConvert
{
#region IConvert 成员
/// <summary>
/// 转为PDF
/// </summary>
/// <param name="commonInfo">博主信息</param>
/// <param name="itemList">文章信息</param>
/// <param name="path">生成的PDF文件存放路径</param>
public void Convert(channel commonInfo, IList<item> itemList,string path)
{
if (commonInfo != null && itemList != null)
{
//设置页面大小
Rectangle pageSize = PageSize.A4;
//创建文档对象
Document document = new Document(pageSize);
PdfWriter.GetInstance(document,new FileStream(path,FileMode.Create));
//打开文档
document.Open();
//定义字体
BaseFont bfSongTi = BaseFont.CreateFont(@"Fonts\SIMHEI.TTF",BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED);
Font font = new Font(bfSongTi, 12);
//定义字体
BaseFont bfSongTiBlod = BaseFont.CreateFont(@"Fonts\SIMHEI.TTF", BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED);
Font fontBlod = new Font(bfSongTiBlod, 15);
//提示段落
Paragraph pToop=new Paragraph(new Chunk("本文档由程序整理生成(生成时间:"+DateTime.Now+")",fontBlod));
//1为居中,0为居左,2为居右
pToop.Alignment = 1;
pToop.SpacingAfter = 20;
document.Add(pToop);
//博客标题
Paragraph pTitle = new Paragraph(new Phrase(commonInfo.Title, fontBlod));
pTitle.Alignment = 1;
pTitle.SpacingAfter = 20;
document.Add(pTitle);
//添加博客子标题
Paragraph pDescription=new Paragraph(commonInfo.Description,font);
pDescription.Alignment = 0;
//行间距(倍)
pDescription.MultipliedLeading = 2;
pDescription.SpacingAfter = 20;
document.Add(pDescription);
//博客目录
Paragraph allGuid = new Paragraph("目 录", fontBlod);
allGuid.Alignment = 1;
allGuid.SpacingBefore = 10;
document.Add(allGuid);
//添加目录
Paragraph guid=new Paragraph(" ");
guid.MultipliedLeading = 1;
Anchor aTitle;
for (int i = 0; i < itemList.Count;i++ )
{
item o = itemList[i];
aTitle = new Anchor("第"+(i+1)+"篇: "+o.Title,font);
aTitle.Reference = "#link" + o.PubDate.ToString();
document.Add(aTitle);
document.Add(guid);
}
document.Add(guid);
document.Add(guid);
document.Add(guid);
//文章标题
Paragraph blogTitle;
//文章内容
Paragraph blogContent;
//分割线
Paragraph hr=new Paragraph("--------------------------------------------------------------------------------------------------------");
hr.Alignment=1;
hr.SpacingAfter=20;
hr.SpacingBefore=20;
//提取图片
string Content;
Regex reg = new Regex(@"(?is)(?:<img[^>]*?src|\bbackground)=(?:(['""])(?<img>[^'"">]+)\1|(?<img>[^'""\s>]+))");
MatchCollection mc;
IList<string> picList;
//内容处理
string[] ContentArray;
Anchor lTitle;
int index = 1;
foreach (var o in itemList)
{
lTitle = new Anchor("第"+index+"篇:",font);
lTitle.Name = "link" + o.PubDate.ToString();
document.Add(lTitle);
index++;
blogTitle=new Paragraph(o.Title,fontBlod);
blogTitle.Alignment = 1;
blogTitle.MultipliedLeading = 1;
document.Add(blogTitle);
Content = o.Description;
Content = Content.Replace("<p>", "卍");
Content = Content.Replace("<br />", "卍");
Content = Content.Replace("<br/ />", "卍");
mc= reg.Matches(Content);
picList = new List<string>();
for(int i=0;i<mc.Count;i++)
{
Match m=mc[i];
if (!m.Groups["img"].Value.Contains("OutliningIndicators"))
{
picList.Add(m.Groups["img"].Value);
Content = Content.Replace(m.Groups["img"].Value, "\" />卍Pic" + m.Groups["img"].Value + "ciP卍<img src=\"");
}
}
//去掉Html标签
Content = NoHTML(Content);
//按文章内容生成段落
ContentArray = Content.Split('卍');
for (int i = 0; i < ContentArray.Length; i++)
{
for (int j = 0; j < picList.Count; j++)
{
if ( ContentArray[i] == "Pic" +picList[j] + "ciP")
{
Image jpeg = Image.GetInstance(picList[j]);
if (jpeg.Width > PageSize.A4.Width)
{
jpeg.ScaleAbsolute(PageSize.A4.Width, jpeg.Width * jpeg.Height / PageSize.A4.Width);
}
jpeg.Alignment = Image.MIDDLE_ALIGN;
document.Add(jpeg);
ContentArray[i] = "PicDRJciP";
}
}
if (ContentArray[i] != "PicDRJciP")
{
blogContent = new Paragraph(ContentArray[i], font);
blogContent.Alignment = 0;
blogContent.MultipliedLeading = 2;
blogContent.SpacingAfter = 10;
document.Add(blogContent);
}
}
document.Add(hr);
}
//提示信息
Paragraph drj = new Paragraph(new Chunk("本程序由博客园——天行健(http://home.cnblogs.com/u/durongjian/)制作,如有建议请发邮件至drjchina@163.com", font));
//1为居中,0为居左,2为居右
drj.Alignment = 1;
drj.SpacingAfter = 20;
drj.SpacingBefore = 20;
document.Add(drj);
//关闭文档
document.Close();
}
}
/// <summary>
/// 去掉HTML标签
/// </summary>
/// <param name="Htmlstring">带有HTML标签的字符串</param>
/// <returns>string</returns>
public static string NoHTML(string Htmlstring)
{
Htmlstring = Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"([\r\n])[\s]+", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"-->", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"<!--.*", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(quot|#34);", "\"", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(amp|#38);", "&", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(lt|#60);", "<", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(gt|#62);", ">", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(nbsp|#160);", " ", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(iexcl|#161);", "\xa1", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(cent|#162);", "\xa2", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(pound|#163);", "\xa3", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(copy|#169);", "\xa9", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&#(\d+);", "", RegexOptions.IgnoreCase);
Htmlstring.Replace("<", "");
Htmlstring.Replace(">", "");
Htmlstring.Replace("\r\n", "");
return Htmlstring.Trim();
}
#endregion
}
}
最后就是调用类了,先看一下软件界面吧:
后台代码:

using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using BlogsConvert;
using System.Net;
using System.IO;
namespace CnBlogsHelper
{
public partial class BlogToPdf : Form
{
public channel commonInfo=new channel();
public IList<item> blogInfos=new List<item>();
public BlogToPdf()
{
InitializeComponent();
}
private void BlogToPdf_Load(object sender, EventArgs e)
{
}
/// <summary>
/// 获取RSS源码,存入XML文件中
/// </summary>
/// <param name="PageUrl">XML文件路径</param>
public void GetXML(string PageUrl)
{
//发送GET请求,得到XML格式的数据
WebRequest request = WebRequest.Create(PageUrl);
WebResponse response = request.GetResponse();
Stream resStream = response.GetResponseStream();
StreamReader sr = new StreamReader(resStream, System.Text.Encoding.GetEncoding("GB2312"));
string Content = sr.ReadToEnd();
string xmlPath =Application.StartupPath+ @"\Blogs.xml";
//如果XML文件不存在就创建
if (!System.IO.File.Exists(xmlPath))
{
System.IO.FileStream f = System.IO.File.Create(xmlPath);
f.Close();
}
//以覆盖的形式把数据写入XML文件
System.IO.StreamWriter f2 = new System.IO.StreamWriter(xmlPath, false, System.Text.Encoding.GetEncoding("UTF-8"));
f2.Write(Content);
f2.Close();
f2.Dispose();
sr.Close();
resStream.Close();
if (Content.Trim() == "")
{
throw new Exception("用户名有误,请检查后重新输入!");
}
}
/// <summary>
/// 生成PDF文件
/// </summary>
/// <param name="saveName">生成的PDF文件名</param>
/// <param name="cha">博主信息</param>
/// <param name="itemList">文章信息</param>
public void CreatePDF(string saveName,channel cha,IList<item> itemList)
{
BlogsInfo blog = new BlogsInfo();
IConvert con = new ToPdf();
string dir = Environment.GetFolderPath(Environment.SpecialFolder.DesktopDirectory);
con.Convert(cha,itemList,dir+"\\"+saveName+".pdf");
}
//生成事件
private void btnCreate_Click(object sender, EventArgs e)
{
if (!CheckForm())
return;
try
{
if (blogInfos.Count > 0)
{
Wait f = new Wait();
f.Show();
Application.DoEvents();
CreatePDF(txtFileName.Text.Trim(), commonInfo, blogInfos);
f.Close();
MessageBox.Show("PDF文档“" + txtFileName.Text.Trim() + ".pdf”生成成功,文档在桌面!");
}
else
{
MessageBox.Show("博客数为0,请先提取博客信息!");
}
}
catch (Exception ex)
{
MessageBox.Show("异常信息:"+ex.Message);
}
}
//提取博客信息事件
private void btnFind_Click(object sender, EventArgs e)
{
if (!CheckForm())
return;
libBlog.Items.Clear();
string pageUrl = txtBlogUrl.Text.Trim();
if (pageUrl.Substring(pageUrl.Length - 1, 1) != "/")
{
pageUrl = pageUrl + @"/";
}
pageUrl = pageUrl + "rss";
try
{
//弹出等待窗体
Wait f = new Wait();
f.Show();
Application.DoEvents();
GetXML(pageUrl);
string path = Application.StartupPath + @"\Blogs.xml";
BlogsInfo blogInfo = new BlogsInfo();
commonInfo = blogInfo.GetChannel(path);
blogInfos = blogInfo.GetItems(path, txtKeyWord.Text.Trim() == "请输入标题中的关键字"?"":txtKeyWord.Text.Trim());
foreach (item o in blogInfos)
{
libBlog.Items.Add(o.Title);
}
f.Close();
}
catch (Exception ex)
{
MessageBox.Show("异常信息:" + ex.Message);
}
}
//清空事件
private void btnClearAll_Click(object sender, EventArgs e)
{
libBlog.Items.Clear();
blogInfos.Clear();
}
//删除当前选中项事件
private void btnClearCurrent_Click(object sender, EventArgs e)
{
int index=libBlog.SelectedIndex;
libBlog.Items.Remove(libBlog.Items[index]);
blogInfos.RemoveAt(index);
}
//鼠标进入文本框清空默认文本
private void txtKeyWord_Click(object sender, EventArgs e)
{
txtKeyWord.Text = txtKeyWord.Text.Trim() == "请输入标题中的关键字"?"":txtKeyWord.Text;
}
private bool CheckForm()
{
if (txtBlogUrl.Text.Trim() == "" || txtFileName.Text.Trim() == "")
{
MessageBox.Show("博客地址和保存文件名不能为空!");
txtBlogUrl.Text = "http://www.cnblogs.com/";
txtFileName.Text = "我的博客";
return false;
}
return true;
}
}
}
其中调用了一个等待窗体Wait,非常简单,这里就不说了,大家可以看源代码。
博客园中高手如云,本人只能算个菜,只是把自己写的一点小东西拿出来跟大家分享,希望能帮到大家,欢迎各位朋友批评指正,如果使用过程中有错误请留言哦。
本软件目地是服务博客园的朋友们,源代码完全开源,但转载或二次开发请注明出处。

【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· Linux系列:如何用 C#调用 C方法造成内存泄露
· AI与.NET技术实操系列(二):开始使用ML.NET
· 记一次.NET内存居高不下排查解决与启示
· 探究高空视频全景AR技术的实现原理
· 理解Rust引用及其生命周期标识(上)
· 物流快递公司核心技术能力-地址解析分单基础技术分享
· .NET 10首个预览版发布:重大改进与新特性概览!
· AI与.NET技术实操系列(二):开始使用ML.NET
· 单线程的Redis速度为什么快?
· 展开说说关于C#中ORM框架的用法!