Asp.Net MVC页面静态化功能实现二:用递归算法来实现

上一篇提到采用IHttpModule来实现当用户访问网站的时候,通过重新定义Response.Filter来实现将返回给客户端的html代码保存,以便用户下一次访问是直接访问静态页面。

Asp.Net MVC页面静态化功能实现一:利用IHttpModule,摒弃ResultFilter

后来想到可以通过WebRequest获取html代码,然后采用递归算法来实现。基本实现思路如下:

通过WebRequest获取超链接地址返回的html代码,并保存;然后正则表达式匹配html代码中所有的超链接href=""里面地址信息; 循坏超链接地址,然后再递归WebRequest获取html代码

实现代码如下:

public class HtmlPageHelper
{
    private ArrayList htmlCreatedList = new ArrayList();

    /// <summary>
    /// 递归实现页面静态化功能
    /// </summary>
    /// <param name="urlString">要访问的页面链接地址</param>
    public void SaveHtmlCode(string urlString)
        {
            if (htmlCreatedList.Contains(urlString))
            {
                return;
            }
            string htmlCode = GetHtmlCodeFromUrl(urlString);
            string htmlPath = GetHtmlPathFromUrl(urlString);
            string direcHtmlPath = Path.GetDirectoryName(htmlPath);
            if (!Directory.Exists(direcHtmlPath))
            {
                Directory.CreateDirectory(direcHtmlPath);
            }
            File.WriteAllText(htmlPath, htmlCode);
            htmlCreatedList.Add(urlString);

            ArrayList urlList = GetUrlLinkFromHtmlCode(htmlCode);
            string urlTemp = string.Empty;
            foreach (string url in urlList)
            {
                urlTemp = url;
                urlTemp = Regex.Replace(urlTemp, "href\\s*=\\s*", "");
                urlTemp = urlTemp.Replace("\"", "");
                urlTemp = urlTemp.Replace("\\", "/");
                urlTemp = WebConfigInfo.UrlPrefix + urlTemp;
                SaveHtmlCode(urlTemp);
            }
        }

    /// <summary>
    /// 通过HttpWebRequest页面链接的html代码
    /// </summary>
    /// <param name="urlString">页面链接地址</param>
    /// <returns>页面链接对应的html代码</returns>
    private string GetHtmlCodeFromUrl(string urlString)
        {
            HttpWebRequest hwRequest = (HttpWebRequest)WebRequest.Create(urlString);
            hwRequest.UserAgent = "User-Agent:Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; .NET CLR 1.0.3705";
            hwRequest.Accept = "*/*";
            hwRequest.KeepAlive = true;
            hwRequest.Headers.Add("Accept-Language", "zh-cn,en-us;q=0.5");
            HttpWebResponse hwResponse = (HttpWebResponse)hwRequest.GetResponse();
            Stream streamResponse = hwResponse.GetResponseStream();
            StreamReader readerOfStream = new StreamReader(streamResponse, System.Text.Encoding.GetEncoding("utf-8"));
            string strHtml = readerOfStream.ReadToEnd();
            readerOfStream.Close();
            streamResponse.Close();
            hwResponse.Close();
            return strHtml;
        }

    /// <summary>
    /// 分析页面链接地址,生成静态页面保存的物理路径
    /// </summary>
    /// <param name="urlString">页面链接地址</param>
    /// <returns>静态页面保存的物理路径</returns>
    private string GetHtmlPathFromUrl(string urlString)
        {
            Uri uri = new Uri(urlString);
            string filePath = HttpContext.Current.Request.PhysicalApplicationPath + "Html" + uri.AbsolutePath + "\\";
            string[] querys = uri.Query.Split(new char[] { '?', '&', '=' }, StringSplitOptions.RemoveEmptyEntries);
            foreach (string query in querys)
            {
                filePath += query;
            }
            filePath += querys.Length.Equals(0) ? "Index.html" : ".html";
            filePath = filePath.Replace("/", "\\");
            filePath = filePath.Replace("\\\\", "\\");
            return filePath;
        }

    /// <summary>
    /// 正则表达式匹配出html代码中的超链接
    /// </summary>
    /// <param name="htmlCode">要找出超链接的html代码</param>
    /// <returns></returns>
    private ArrayList GetUrlLinkFromHtmlCode(string htmlCode)
        {
            ArrayList aList = new ArrayList();
            string strRegex = "href\\s*=\\s*(?:[\"'](?<1>[^\"'.#:]*)[\"'])";
            Regex r = new Regex(strRegex, RegexOptions.IgnoreCase);
            MatchCollection m = r.Matches(htmlCode);
            for (int i = 0; i <= m.Count - 1; i++)
            {
                string strNew = m[i].ToString().Replace("amp;", "");
                if (!aList.Contains(strNew))
                {
                    aList.Add(strNew);
                }
            }
            return aList;
        }
}

 

posted @ 2015-11-04 17:43  Tracine0513  阅读(438)  评论(0编辑  收藏  举报