Sitemap作用简单,搜索引擎提供站点结构,搜索引擎规范,全面,高效收录站点信息。制作Sitemap,并提交给搜索引擎可以使网站的内容完全被收录包括搜索引擎收录页面

一般如果没有规则,搜索引擎不会主动完全遍历站点所有页面特别提交生成页面。公司网站查询这块所有符合业务逻辑查询条件搜索引擎不一定全部收录,这些页面静态化,然后生成sitemap首先想到方法。

静态以前介绍

生成sitemap流程:

根据地址选择JS获取所有查询条件

生成这些查询条件postback页面静态URL

分别请求每个URL获取这些页面关键字描述

sitemap文件。

程序执行费时15秒,生成4K+记录。

如下:

using System;

using System.Data;

using System.Configuration;

using System.Collections;

using System.Web;

using System.Web.Security;

using System.Web.UI;

using System.Web.UI.WebControls;

using System.Web.UI.WebControls.WebParts;

using System.Web.UI.HtmlControls;

using System.Net;

using System.Collections.Generic;

using System.IO;

using System.Text;

namespace Ctrip.UI.PkgBkiSiteUI

{

    public partial class GenSiteMap : System.Web.UI.Page

    {

        string strDestinationUrl = "http://webresource.ctrip.com/code/js/resource/address_tuna/pkgDestination.js";

        /// <summary>

        /// get each search info page url by address select js page

        /// </summary>

        /// <returns></returns>

        List<UrlNode> GetSiteMapNodeList()

        {

            string strUrlModel = "http://vacations.ctrip.com/booking/showpkglist--startcity--{0}---searchvalue--{1}---searchtype--{2}.html";

            string[] arrDepartureCity = { "1,北京", "2,上海", "32,广州", "30,深圳", "17,杭州", "28,成都", "12,南京", "7,青岛", "25,厦门", "477,武汉", "451,沈阳", "58,香港", "144,济南" };

            List<UrlNode> list = new List<UrlNode>();

            Stream stream;

            StreamReader reader;

            WebClient web;

            try

            {

                web = new WebClient();

                stream = web.OpenRead(strDestinationUrl);

                reader = new StreamReader(stream, Encoding.UTF8);

                string js = reader.ReadToEnd();

                js = js.Remove(0, js.IndexOf("source.pkgChs"));

                js = js.Remove(0, js.IndexOf("\"") + 2);

                string[] arrPkgChs = js.Substring(0, js.IndexOf("\"")).Split('@');

                js = js.Remove(0, js.IndexOf("source.pkgEng"));

                js = js.Remove(0, js.IndexOf("\"") + 2);

                string[] arrPkgEng = js.Substring(0, js.IndexOf("\"")).Split('@');

                js = string.Empty;

                foreach (string str in arrPkgChs)//domestic product

                {

                    if (str.IndexOf('|') >= 0)

                    {

                        string[] pkg = str.Split('|');

                        foreach (string strDepartureCity in arrDepartureCity)

                        {

                            UrlNode node = new UrlNode();

                            node.Url = string.Format(strUrlModel, strDepartureCity.Split(',')[0], pkg[2], pkg[3]);

                            ReadPage(node.Url, ref node.Title, ref node.Description);

                            //node.Title = "SHANGHAI-" + pkg[0].Trim('@');

                            //node.Description = pkg[0].Trim('@');

                            list.Add(node);

                        }

                    }

                }

                foreach (string str in arrPkgEng)//overseas product

                {

                    if (str.IndexOf('|') >= 0)

                    {

                        string[] pkg = str.Split('|');

                        foreach (string strDepartureCity in arrDepartureCity)

                        {

                            UrlNode node = new UrlNode();

                            node.Url = string.Format(strUrlModel, strDepartureCity.Split(',')[0], pkg[2], pkg[3]);

                            ReadPage(node.Url, ref node.Title, ref node.Description);

                            //node.Title = "SHANGHAI-" + pkg[0].Trim('@');

                            //node.Description = pkg[0].Trim('@');

                            list.Add(node);

                        }

                    }

                }

            }

            catch (Exception ex)

            {

                Response.Write("读取JS页面出错:" + ex.ToString() + "<hr>");

            }

            finally

            {

                stream = null;

                reader = null;

                web = null;

            }

            return list;

        }

        /// <summary>

        /// get title&description info from product page

        /// </summary>

        /// <param name="webPath"></param>

        /// <param name="pageTitle"></param>

        /// <param name="pageDesc"></param>

        void ReadPage(string webPath, ref string pageTitle, ref string pageDesc)

        {

            Stream stream;

            StreamReader reader;

            WebClient web;

            string titleStart = "name=\"keywords\" content=\"";

            string descStart = "name=\"description\" content=\"";

            try

            {

                web = new WebClient();

                stream = web.OpenRead(webPath);

                reader = new StreamReader(stream, Encoding.GetEncoding("gb2312"));

                string page = reader.ReadToEnd();

                pageTitle = page.Remove(0, page.IndexOf(titleStart) + titleStart.Length);

                pageTitle = pageTitle.Substring(0, pageTitle.IndexOf('"'));

                pageDesc = page.Remove(0, page.IndexOf(descStart) + descStart.Length);

                pageDesc = pageDesc.Substring(0, pageDesc.IndexOf('"'));

            }

            catch (Exception ex)

            {

                Response.Write("读取度假产品页面出错:" + ex.ToString() + "<hr>");

            }

            finally

            {

                stream = null;

                reader = null;

                web = null;

            }

        }

        /// <summary>

        /// generate xml file

        /// </summary>

        /// <param name="nodeList"></param>

        void GenerateSiteMap(List<UrlNode> nodeList)

        {

            string strSitemapModelBegin = "<?xml version=\"1.0\" encoding=\"utf-8\" ?>"

                + "<siteMap xmlns=\"http://schemas.microsoft.com/AspNet/SiteMap-File-1.0\" ><siteMapNode>";

            string strSitemapModelEnd = "</siteMapNode></siteMap>";

            string strSitemapNodeModel = "<siteMapNode url=\"{0}\" title=\"{1}\" description=\"{2}\"/>";

            string strSavePath = Server.MapPath("~/Web.sitemap");

            StreamWriter writer;

            try

            {

                if (File.Exists(strSavePath))

                {

                    File.Delete(strSavePath);

                }

                writer = new StreamWriter(strSavePath, false, Encoding.UTF8);

                writer.WriteLine(strSitemapModelBegin);

                nodeList.ForEach(delegate(UrlNode node)

                {

                    writer.WriteLine(strSitemapNodeModel, node.Url, node.Title, node.Description);

                });

                writer.WriteLine(strSitemapModelEnd);

                writer.WriteLine("<!--By " + Request.UserHostAddress + " At " + DateTime.Now + "-->");

                writer.Flush();

            }

            catch (Exception ex)

            {

                writer = null;

                Response.Write("SITEMAP出错:" + ex.ToString() + "<hr>");

            }

        }

        protected void Page_Load(object sender, EventArgs e)

        {

           

        }

        protected void btnGenSiteMap_Click(object sender, EventArgs e)

        {

            if (TextBox1.Text != "***")

            {

                ClientScript.RegisterStartupScript(typeof(string), "", "alert('You Do Not Have Permission!')", true);

            }

            else

            {

                try

                {

                    GenerateSiteMap(GetSiteMapNodeList());

                }

                catch (Exception ex)

                {

                    Response.Write(ex.ToString());

                }

               <S

posted on 2008-06-22 21:10  Caviare  阅读(690)  评论(0编辑  收藏  举报