首页  :: 新随笔  :: 联系 :: 订阅 订阅  :: 管理

正则匹配 获取QQ空间日志

Posted on 2010-05-13 14:28  达奇  阅读(873)  评论(2编辑  收藏  举报
using System;   
using System.Collections.Generic;   
using System.Net;   
using System.Text.RegularExpressions;   
namespace QQ   
{   
    class QZone   
    {   
        private List<long> BlogList = null;   
        public long QNumber;   
        public Dictionary<long, QBlogData> BlogDataDictionary = null;  
        #region 正则匹配表达式   
        private Regex RgxItem = new Regex("<p class=\"list_tit\">.*?</p>", RegexOptions.Singleline | RegexOptions.Compiled);   
        private Regex RgxID = new Regex("(?<=Blog\\().*?(?=\\))", RegexOptions.Singleline | RegexOptions.Compiled);   
        private Regex RgxTitle = new Regex("(?<=<span >).*?(?=</span>)", RegexOptions.Singleline | RegexOptions.Compiled);   
        private Regex RgxCategory = new Regex("(?<=Category\\(')个人日记(?='\\))", RegexOptions.Singleline | RegexOptions.Compiled);   
        private Regex RgxContext = new Regex("(?<=<div id=\"blogDetailDiv\".*?>).*?(?=</div>)", RegexOptions.Singleline | RegexOptions.Compiled);   
        private Regex RgxTime = new Regex("(?<=(发表|转载)于).*?(?=<)", RegexOptions.Singleline | RegexOptions.Compiled);  
        #endregion   
        public QZone(long QNumber)   
        {   
            this.QNumber = QNumber;   
        }   
        public List<long> GetBlogList()   
        {   
            if(BlogList == null)   
            {   
                WebClient wc = new WebClient();   
                string RecData = wc.DownloadString(string.Format(@"http://b.qzone.qq.com/cgi-bin/blognew/blog_output_toppage?uin={0}&property=GoRE&numperpage=1000&maxlen=1000&direct=1", QNumber));   
                MatchCollection mc = RgxItem.Matches(RecData);   
                BlogList = new List<long>();   
                foreach(Match match in mc)   
                {   
                    string item = match.Value;   
                    string id = RgxID.Match(item).Value;   
                    BlogList.Add(long.Parse(id));   
                }   
            }   
            return BlogList;   
        }   
        public Dictionary<long, QBlogData> GetBlogDataDictionary()   
        {   
            if(BlogList == null)   
            {   
                GetBlogList();   
            }   
            if(BlogDataDictionary == null)   
            {   
                WebClient wc = new WebClient();   
                BlogDataDictionary = new Dictionary<long, QBlogData>();   
                foreach(long BlogID in BlogList)   
                {   
                    string RecData = wc.DownloadString(string.Format(@"http://b.qzone.qq.com/cgi-bin/blognew/blog_output_data?uin={0}&blogid={1}&numperpage=1000&property=GoRE", QNumber, BlogID));   
                    QBlogData data = new QBlogData();   
                    data.BlogID = BlogID;   
                    data.Title = RgxTitle.Match(RecData).Value;   
                    data.Context = RgxContext.Match(RecData).Value;   
                    data.Category = RgxCategory.Match(RecData).Value;   
                    data.Time = DateTime.Parse(RgxTime.Match(RecData).Value);
                    BlogDataDictionary.Add(BlogID, data);   
                }   
            }   
            return BlogDataDictionary;   
        }   
        public QBlogData GetBlogData(long BlogID)   
        {   
            if(BlogDataDictionary == null)   
            {   
                GetBlogDataDictionary();   
            }   
            return BlogDataDictionary[BlogID];   
        }
        static void Main(string[] args)
        {
            QZone qz = new QZone(这里是QQ号);
            qz.GetBlogDataDictionary();
 
        }  

    }   
    struct QBlogData   
    {   
        public long BlogID;   
        public string Title;   
        public string Context;   
        public string Category;   
        public DateTime Time;   
    }   

}