.Net/C#: 利用反射编写通用的 rss 2.0 的 reader

/*
.Net/C#: 利用反射编写通用的 rss 2.0 的 reader

最近在写一个 Simple Rss Reader
网上找到现成代码两种:
1.代码简单的,但不够通用 (如: 本站的一些专用 rss reader)
2.代码复杂的,但没有足够时间去消化 (如: rssbandit)

遂自己动手:
由于 rss 的基本属性大家都有!
但一些特殊不通用属性,如:
slash:comments
wfw:comment
wfw:commentRss
trackbackping
不一定存在!
如何处理???
我想到了 Reflection,就此提出以下解决方案:
1. Class RssHeader 用于表示 Rss 的头信息
 你可以在为其添加新属性,原则是:
 成员变量 Fieild 的名称为 rss 的 XML 源对应的属性名称前加下划线,XML 属性名称含有 ":" 将其滤掉!
 如: <dc:language>zh-CHS</dc:language>
 将其影射为:
  private string _dclanguage
  public string DcLanguage
  {
   get
   {
    return this._dclanguage;
   }
  }

2. Class RssItem 用于表示 Rss 的 Item
 添加新属性的原则同 RssHeader!

3. 获取 rss 的 XML 源后通过递归遍历节点 (class SimpleRssReader)
 根据实际存在的 rss 属性,通过反射,"构造实例化" RssHeader 和 RssItem!
 请仔细参阅 class SimpleRssReader 的 Travel 方法!

4. 数据库 (本文使用了 Micrshaoft Data Access Application Block 3.1)
 表:
 Channels (主表)
 ChannelsDetails (细表)
 字段名称及其数据类型严格按照 rss 的 XML 源对应的属性名称,XML 属性名称含有 ":" 将其滤掉!
 存储过程:
 SP_AddChannel
 SP_AddChannelsDetails
 参数名称及其数据类型严格按照 rss 的 XML 源对应的属性名称,XML 属性名称含有 ":" 将其滤掉!


 命令行编译:
csc SimpleRsReader.cs /r:C:\WINDOWS\Microsoft.NET\Framework\v1.1.4322\System.Data.OracleClient.dll


全部代码 SimpleRssReader.cs 在此下载
https://files.cnblogs.com/Microshaoft/SimpleRssReader.rar

*/
namespace Microshaoft
{
 using System;
 using System.Xml;
 using System.Text;
 using System.Reflection;
 using System.Collections;
 using System.Text.RegularExpressions;

 public class RssHeader
 {
  //feed URL
  public RssHeader(string URL)
  {
   this._URL = URL;
  }

  public string Title
  {
   get
   {
    return this._title;
   }
  }

  public string Description
  {
   get
   {
    return this._description;
   }
  }

  public string Link
  {
   get
   {
    return this._link;
   }
  }

  public string Language
  {
   get
   {
    return this._language;
   }
  }

  public string Generator
  {
   get
   {
    return this._generator;
   }
  }

  public string Ttl
  {
   get
   {
    return this._ttl;
   }
  }

  public string Copyright
  {
   get
   {
    return this._copyright;
   }
  }

  public DateTime PubDate
  {
   get
   {
    return Util.ParseDateTime(this._pubDate);
   }
  }

  public string Category
  {
   get
   {
    return this._category;
   }
  }

  public DateTime LastBuildDate
  {
   get
   {
    return Util.ParseDateTime(this._lastBuildDate);
   }
  }
  public string ManagingEditor
  {
   get
   {
    return this._managingEditor;
   }
  }

  public string URL
  {
   get
   {
    return this._URL;
   }
  }

  public string DcLanguage
  {
   get
   {
    return this._dclanguage;
   }
  }

  //下面私有 Field 的值将 class SimpleRssReader 中通过反射赋值
  private string _dclanguage; //dc:language
  private string _URL;
  private string _managingEditor;
  private string _lastBuildDate;
  private string _title;
  private string _description;
  private string _link;
  private string _language;
  private string _generator;
  private string _ttl;
  private string _copyright;
  private string _pubDate;
  private string _category;
  

 }
 public class RssItem
 {
  private RssHeader _Header;

  public RssHeader Header
  {
   get
   {
    return this._Header;
   }
  }

  //下面私有 Field 的值将 class SimpleRssReader 中通过反射赋值
  private string _title;
  private string _link;
  private string _description;
  private string _category;
  private string _author;
  private string _pubDate;
  private string _comments;
  private string _guid;
  private string _slashcomments;
  private string _wfwcomment;
  private string _wfwcommentRss;
  private string _trackbackping;

  public string TrackbackPing
  {
   get
   {
    return this._trackbackping;
   }
  }

  public string WfwCommentRss
  {
   get
   {
    return this._wfwcommentRss;
   }
  }

  public string WfwComment
  {
   get
   {
    return this._wfwcomment;
   }
  }
  

  public string SlashComments
  {
   get
   {
    return this._slashcomments;
   }
  }
  public string Title
  {
   get
   {
    return this._title;
   }
  }

  public string Link
  {
   get
   {
    return this._link;
   }
  }

  public string Description
  {
   get
   {
    return this._description;
   }
  }

  public string Category
  {
   get
   {
    return this._category;
   }
  }

  public string Author
  {
   get
   {
    return this._author;
   }
  }

  public DateTime PubDate
  {
   get
   {
    return Util.ParseDateTime(this._pubDate);
   }
  }

  public string Comments
  {
   get
   {
    return this._comments;
   }
  }

  public string Guid
  {
   get
   {
    return this._guid;
   }
  }
 }
 public class SimpleRssReader
 {
  //RssHeader header 解析处理完毕事件
  public delegate void RssHeaderReceiveEventHandler(SimpleRssReader Sender, RssHeader Header);
  public event RssHeaderReceiveEventHandler RssHeaderReceive;

  //某一个 RssItem 解析处理完毕事件
  public delegate void RssItemReceiveEventHandler(SimpleRssReader Sender, RssItem Item);
  public event RssItemReceiveEventHandler RssItemReceive;

  private Type _TRS; //typeof(RssHeader)
  private Type _tri; //typeof(RssItem)

  private ArrayList _RssItemsAL;

  private RssHeader _rs;
  public RssHeader RssHeader
  {
   get
   {
    return this._rs;
   }
  }

  //用于存储所有的 RssItem
  private RssItem[] _RssItems;

  public RssItem[] RssItems
  {
   get
   {
    return this._RssItems;
   }
  }

  public void Rss(string URL)
  {
   XmlDocument xd = new XmlDocument();
   //如果效率不高可采用 WebRequest 替代
   xd.Load(URL);
   XmlNodeList xnl = xd.SelectNodes("/rss/channel");

   this._rs = new RssHeader(URL);

   this._TRS = typeof(RssHeader);
   this._tri = typeof(RssItem);

   this._RssItemsAL = new ArrayList();

   foreach (XmlNode xn in xnl)
   {
    //递归遍历
    this.Travel(xn, 0);
   }

   if (this._RssItemsAL.Count > 0)
   {
    this._RssItems = new RssItem[this._RssItemsAL.Count];
    int i = 0;
    foreach (object o in this._RssItemsAL)
    {
     this._RssItems[i++] = (RssItem) o;
    }
   }
  }

  /// <Header>
  /// 递归遍历
  /// </Header>
  /// <param name="xn">节点</param>
  /// <param name="i">项目数</param>
  private void Travel(XmlNode xn, int i)
  {
   if (xn.HasChildNodes)
   {
    foreach (XmlNode x in xn.ChildNodes)
    {
     if (x.ParentNode != null)
     {
      if (x.ParentNode.Name == "channel")
      {
       if (x.Name == "item")
       {
        i ++;
        if (i >= 1)
        {
         XmlNode node = null;
         bool b = false; //是否是 Rss Item
         RssItem ri = null;
         if (i == 1) //Header
         {
          node = xn;
          b = false;
         }
         else if (i > 1) //Item
         {
          node = x;
          b = true;
          ri = new RssItem();
         }

         foreach (XmlNode n in node.ChildNodes)
         {
          if (n.Name != "item")
          {
           if (!b) //Rss Header Header
           {
            //根据 XML 实际存在的属性,利用反射为 RssHeader 实例的私有成员赋值
            FieldInfo fi = this._TRS.GetField("_" + n.Name.Replace(":","") ,BindingFlags.NonPublic | BindingFlags.Instance | BindingFlags.Public);
            if (fi != null)
            {
             fi.SetValue(this._rs,n.InnerText);
            }
           }
           else //Rss Item
           {
            //根据 XML 实际存在的属性,利用反射为 RssItem 实例的私有成员赋值
            FieldInfo fi = this._tri.GetField("_" + n.Name.Replace(":",""),BindingFlags.NonPublic | BindingFlags.Instance | BindingFlags.Public);
            if (fi != null)
            {
             fi.SetValue(ri,n.InnerText);
            }
           }

          }
         }
         if (!b)
         {
          //触发 RssHeaderReceive 事件
          if (this.RssHeaderReceive != null)
          {
           this.RssHeaderReceive(this,this._rs);
          }
         }
         else
         {
          //制定 RssItem 实例的 Header/Header
          FieldInfo fi = this._tri.GetField("_Header",BindingFlags.NonPublic | BindingFlags.Instance | BindingFlags.Public);
          if (fi != null)
          {
           fi.SetValue(ri,this._rs);
          }

          //触发 RssItemReceive 事件
          if (this.RssItemReceive != null)
          {
           this.RssItemReceive(this,ri);
          }
          this._RssItemsAL.Add(ri);
         }
        }
       }
      }
     }
     if (!x.HasChildNodes)
     {
      this.Travel(x, i);
     }
    }
   }
  }
 }

 public class Util
 {
  public static DateTime ParseDateTime(string s)
  {
   DateTime dt;
   if (s == null || s.ToString().Length <= 0)
   {
    dt = DateTime.Now;
   }
   else
   {
    try
    {
     dt = DateTime.Parse(s);
    }
    catch
    {
     dt = DateTime.Now;
    }
   }
   return dt;
  }
  /// <Header>
  /// 去除 HTML tag
  /// </Header>
  /// <param name="HTML">源</param>
  /// <returns>结果</returns>
  public static string StripHTML(string HTML) //google "StripHTML" 得到
  {
   string[] Regexs =
        {
         @"<script[^>]*?>.*?</script>",
         @"<(\/\s*)?!?((\w+:)?\w+)(\w+(\s*=?\s*(([""'])(\\[""'tbnr]|[^\7])*?\7|\w+)|.{0})|\s)*?(\/\s*)?>",
         @"([\r\n])[\s]+",
         @"&(quot|#34);",
         @"&(amp|#38);",
         @"&(lt|#60);",
         @"&(gt|#62);",
         @"&(nbsp|#160);",
         @"&(iexcl|#161);",
         @"&(cent|#162);",
         @"&(pound|#163);",
         @"&(copy|#169);",
         @"&#(\d+);",
         @"-->",
         @"<!--.*\n"
        };

   string[] Replaces =
        {
         "",
         "",
         "",
         "\"",
         "&",
         "<",
         ">",
         " ",
         "\xa1", //chr(161),
         "\xa2", //chr(162),
         "\xa3", //chr(163),
         "\xa9", //chr(169),
         "",
         "\r\n",
         ""
        };

   string s = HTML;
   for (int i = 0; i < Regexs.Length; i++)
   {
    s = new Regex(Regexs[i], RegexOptions.Multiline | RegexOptions.IgnoreCase).Replace(s, Replaces[i]);
   }
   s.Replace("<", "");
   s.Replace(">", "");
   s.Replace("\r\n", "");
   return s;
  }
 }
}

//测试程序
namespace Test
{
 using System;
 using System.Data;
 using System.Reflection;
 using System.Data.SqlClient;

 using Microshaoft;
 using Microshaoft.Data;

 class ConsoleApplication
 {
  private SqlConnection _Connection;
  public string _Channel;

  public SqlConnection Connection
  {
   set
   {
    this._Connection = value;
   }
   get
   {
    return this._Connection;
   }
  }

  static void Main()
  {
   
   string s = "http://www.ccw.com.cn/rss/news2/1.xml";
   s = "http://dzh.mop.com/topic/rss.jsp?type=28";
   s = "http://www.ccw.com.cn/rss/news2/15.xml";
   s = "http://www.cnblogs.com/rss.aspx?id=-1";
   s = "http://localhost/rss.xml";
   //s = "http://weblog.siliconvalley.com/column/dangillmor/index.xml";
   //s= "http://www.skyone.com.cn/sub/rss/list_jjsc.xml";

   ConsoleApplication a = new ConsoleApplication();

   a.Connection = new SqlConnection("server=SERVER\\PSQLKE;user id=sa;password=;database=rss");
   a.Connection.Open();

   SimpleRssReader srr = new SimpleRssReader();

   srr.RssHeaderReceive += new Microshaoft.SimpleRssReader.RssHeaderReceiveEventHandler(a.srr_RssHeaderReceive);
   srr.RssItemReceive +=new Microshaoft.SimpleRssReader.RssItemReceiveEventHandler(a.srr_RssItemReceive);

   System.Console.WriteLine("waiting ....");
   srr.Rss(s); //以后改成多线程或异步

   System.Console.WriteLine("print all rss Header and items ....");
   System.Console.ReadLine();
   System.Console.WriteLine("Header: "+ srr.RssHeader.Title);
   foreach (RssItem ri in srr.RssItems)
   {
    System.Console.WriteLine("item: " + ri.Title);
   }
   System.Console.ReadLine();

  }

  private void srr_RssHeaderReceive(SimpleRssReader Sender, RssHeader Header)
  {
   System.Console.WriteLine("Header:" + Header.Link);
   System.Console.WriteLine("Header:" + Header.Title);

   this.SaveToDataBase("SP_AddChannel",typeof(RssHeader),Header);

  }

  private void srr_RssItemReceive(SimpleRssReader Sender, RssItem Item)
  {
   System.Console.WriteLine("Item: " + Item.Title);
   System.Console.WriteLine("Item: " + Item.Link);
   System.Console.WriteLine("Item: " + Util.StripHTML(Item.Description));

   this.SaveToDataBase("SP_AddChannelsDetails",typeof(RssItem),Item);

  }
  private void SaveToDataBase(string sp, Type t,object instance)
  {
   //获取 sp 所有参数
   SqlParameter[] spa = SqlHelperParameterCache.GetSpParameterSet(this.Connection, sp);
   System.Collections.Hashtable ht = new System.Collections.Hashtable();
   
   for (int i = 0; i < spa.Length; i++)
   {
    //保存 参数名称与其位置(次序) 的关系
    ht.Add(spa[i].ParameterName.ToLower().Replace("@", ""), i);

    //相当于为存储过程的所有参数赋初值
    spa[i].Value = null;
   }

   //得到所有的属性
   PropertyInfo[] pi = t.GetProperties();
   foreach (PropertyInfo x in pi)
   {
    if (ht.ContainsKey( x.Name.ToLower()))
    {
     //根据参数(属性)名称得到参数的次序!
     int i = (int) ht[x.Name.ToLower()];
     if (spa[i].Direction == System.Data.ParameterDirection.Input || spa[i].Direction == System.Data.ParameterDirection.InputOutput)
     {
      object o;
      if (x.PropertyType.Name == "String")
      {
       o = x.GetValue(instance,null);
       if (o != null)
       {
        string s = Util.StripHTML((string) o);
        o = s;
       }
      }
      else
      {
       o = x.GetValue(instance,null);
      }
      
      spa[i].Value = o;
     }
    }
    
   }

   if (t == typeof(RssItem))
   {
    spa[0].Value = ((RssItem) instance).Header.URL;
   }

   SqlHelper.ExecuteNonQuery(this.Connection, CommandType.StoredProcedure, sp, spa);
   if (spa[spa.Length - 1].Value != System.DBNull.Value)
   {
    System.Console.WriteLine("Save to ID: {0} successful!", spa[spa.Length - 1].Value);
   }
   else
   {
    System.Console.WriteLine("save failed! may be duplicate!");
   }
  }
 }
}

//==========================================================================================================
/*
--sql Script
if exists (select * from dbo.sysobjects where id = object_id(N'[dbo].[SP_AddChannel]') and OBJECTPROPERTY(id, N'IsProcedure') = 1)
drop procedure [dbo].[SP_AddChannel]
GO

if exists (select * from dbo.sysobjects where id = object_id(N'[dbo].[SP_AddChannelsDetails]') and OBJECTPROPERTY(id, N'IsProcedure') = 1)
drop procedure [dbo].[SP_AddChannelsDetails]
GO

if exists (select * from dbo.sysobjects where id = object_id(N'[dbo].[Channels]') and OBJECTPROPERTY(id, N'IsUserTable') = 1)
drop table [dbo].[Channels]
GO

if exists (select * from dbo.sysobjects where id = object_id(N'[dbo].[ChannelsDetails]') and OBJECTPROPERTY(id, N'IsUserTable') = 1)
drop table [dbo].[ChannelsDetails]
GO

CREATE TABLE [dbo].[Channels] (
 [ID] [int] IDENTITY (1, 1) NOT NULL ,
 [URL] [varchar] (1000) COLLATE Chinese_PRC_CI_AS NULL ,
 [Channel] [varchar] (100) COLLATE Chinese_PRC_CI_AS NULL ,
 [Title] [varchar] (100) COLLATE Chinese_PRC_CI_AS NULL ,
 [Description] [varchar] (1000) COLLATE Chinese_PRC_CI_AS NULL ,
 [link] [varchar] (500) COLLATE Chinese_PRC_CI_AS NULL ,
 [language] [varchar] (10) COLLATE Chinese_PRC_CI_AS NULL ,
 [generator] [varchar] (100) COLLATE Chinese_PRC_CI_AS NULL ,
 [ttl] [varchar] (100) COLLATE Chinese_PRC_CI_AS NULL ,
 [copyright] [varchar] (100) COLLATE Chinese_PRC_CI_AS NULL ,
 [pubDate] [datetime] NULL ,
 [category] [varchar] (100) COLLATE Chinese_PRC_CI_AS NULL ,
 [dclanguage] [varchar] (100) COLLATE Chinese_PRC_CI_AS NULL
) ON [PRIMARY]
GO

CREATE TABLE [dbo].[ChannelsDetails] (
 [ID] [int] IDENTITY (1, 1) NOT NULL ,
 [ChannelID] [int] NULL ,
 [title] [varchar] (8000) COLLATE Chinese_PRC_CI_AS NULL ,
 [link] [varchar] (8000) COLLATE Chinese_PRC_CI_AS NULL ,
 [description] [varchar] (8000) COLLATE Chinese_PRC_CI_AS NULL ,
 [category] [varchar] (8000) COLLATE Chinese_PRC_CI_AS NULL ,
 [author] [varchar] (8000) COLLATE Chinese_PRC_CI_AS NULL ,
 [pubDate] [datetime] NULL ,
 [comments] [varchar] (8000) COLLATE Chinese_PRC_CI_AS NULL ,
 [guid] [varchar] (8000) COLLATE Chinese_PRC_CI_AS NULL ,
 [trackbackping] [varchar] (8000) COLLATE Chinese_PRC_CI_AS NULL
) ON [PRIMARY]
GO

SET QUOTED_IDENTIFIER ON
GO
SET ANSI_NULLS ON
GO


CREATE   proc SP_AddChannel
@URL varchar(8000)
,@link varchar(8000)
,@Channel varchar(8000)
,@Title varchar(8000)
,@Image varchar(8000)
,@Description varchar(7999)
,@language varchar(8000)
,@generator varchar(8000)
,@ttl varchar(8000)
,@copyright varchar(8000)
,@pubDate datetime
,@category varchar(8000)
,@Docs varchar(8000)
,@ManagingEditor varchar(8000)
,@dclanguage varchar(8000)
,@ int out
as
set @ = 0
insert into Channels ([URL],[Channel],[Title],[Description],[link],[language],[generator],[ttl],[copyright],[pubDate],[category],[dclanguage])
select @URL,@Channel,@Title,@Description,@link,@language,@generator,@ttl,@copyright,@pubDate,@category,@dclanguage
where not exists(select 1 from Channels where [URL] = @URL)
select @ = SCOPE_IDENTITY()
GO
SET QUOTED_IDENTIFIER OFF
GO
SET ANSI_NULLS ON
GO

SET QUOTED_IDENTIFIER ON
GO
SET ANSI_NULLS ON
GO

CREATE     proc SP_AddChannelsDetails
@URL varchar(8000)
,@Title varchar(8000)
,@Description varchar(7000)
,@link varchar(8000)
,@pubDate datetime
,@category varchar(8000)
,@Comments varchar(8000)
,@Guid varchar(8000)
,@trackbackping varchar(8000)
,@ int out
as
set @ = 0
insert into ChannelsDetails ([ChannelID],[Title],[Description],[link],[pubDate],[category],[comments],[guid],[trackbackping])
select id,@Title,@Description,@link,@pubDate,@category,@comments,isnull(@guid,@link),@trackbackping
from Channels
where not exists (select 1 from ChannelsDetails where guid = isnull(@guid,@link)) and URL = @URL
select @ = SCOPE_IDENTITY()
GO
SET QUOTED_IDENTIFIER OFF
GO
SET ANSI_NULLS ON
GO
*/

posted @ 2005-03-27 15:33  于斯人也  阅读(1489)  评论(0编辑  收藏  举报