Sitemap的作用很简单,为搜索引擎提供站点的结构,让搜索引擎更规范,更全面,更高效地收录你的站点信息。制作Sitemap,并提交给搜索引擎可以使网站的内容完全被收录,包括搜索引擎不易收录的页面。
一般如果没有规则,搜索引擎不会主动完全遍历你的站点的所有页面,特别是在提交后生成的页面。公司网站的查询这块所有符合业务逻辑的的查询条件搜索引擎不一定能全部收录,把这些页面静态化,然后生成sitemap是首先想到的方法。
静态化以前有介绍过。
生成sitemap流程:
根据地址选择器的JS获取所有的查询条件;
生成这些查询条件postback后的页面静态URL;
分别请求每个URL,获取这些页面的关键字和描述;
写sitemap文件。
程序执行费时15秒,生成4K+条记录。
代码如下:
using System;
using System.Data;
using System.Configuration;
using System.Collections;
using System.Web;
using System.Web.Security;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.Web.UI.WebControls
using System.Web.UI.HtmlControls;
using System.Net;
using System.Collections.Generic;
using System.IO;
using System.Text;
namespace Ctrip.UI.PkgBkiSiteUI
{
public partial class GenSiteMap : System.Web.UI.Page
{
string strDestinationUrl = "http://webresource.ctrip.com
/// <summary>
/// get each search info page url by address select js page
/// </summary>
/// <returns></returns>
List<UrlNode> GetSiteMapNodeList()
{
string strUrlModel = "http://vacations.ctrip.com
string[] arrDepartureCity = { "1,北京", "2,上海", "32,广州", "30,深圳", "17,杭州", "28,成都", "12,南京", "7,青岛", "25,厦门", "477,武汉", "451,沈阳", "58,香港", "144,济南" };
List<UrlNode> list = new List<UrlNode>();
Stream stream;
StreamReader reader;
WebClient web;
try
{
web = new WebClient();
stream = web.OpenRead(strDestinationUrl
reader = new StreamReader(stream, Encoding.UTF8);
string js = reader.ReadToEnd();
js = js.Remove(0, js.IndexOf("source.pkgChs"));
js = js.Remove(0, js.IndexOf("\"") + 2);
string[] arrPkgChs = js.Substring(0, js.IndexOf("\"")).Split('@');
js = js.Remove(0, js.IndexOf("source.pkgEng"));
js = js.Remove(0, js.IndexOf("\"") + 2);
string[] arrPkgEng = js.Substring(0, js.IndexOf("\"")).Split('@');
js = string.Empty;
foreach (string str in arrPkgChs)//domestic product
{
if (str.IndexOf('|') >= 0)
{
string[] pkg = str.Split('|');
foreach (string strDepartureCity in arrDepartureCity)
{
UrlNode node = new UrlNode();
node.Url = string.Format(strUrlModel, strDepartureCity.Split(',')[0], pkg[2], pkg[3]);
ReadPage(node.Url, ref node.Title, ref node.Description);
//node.Title = "SHANGHAI-" + pkg[0].Trim('@');
//node.Description = pkg[0].Trim('@');
list.Add(node);
}
}
}
foreach (string str in arrPkgEng)//overseas product
{
if (str.IndexOf('|') >= 0)
{
string[] pkg = str.Split('|');
foreach (string strDepartureCity in arrDepartureCity)
{
UrlNode node = new UrlNode();
node.Url = string.Format(strUrlModel, strDepartureCity.Split(',')[0], pkg[2], pkg[3]);
ReadPage(node.Url, ref node.Title, ref node.Description);
//node.Title = "SHANGHAI-" + pkg[0].Trim('@');
//node.Description = pkg[0].Trim('@');
list.Add(node);
}
}
}
}
catch (Exception ex)
{
Response.Write("读取JS页面出错:" + ex.ToString() + "<hr>");
}
finally
{
stream = null;
reader = null;
web = null;
}
return list;
}
/// <summary>
/// get title&description info from product page
/// </summary>
/// <param name="webPath"></param>
/// <param name="pageTitle"></param>
/// <param name="pageDesc"></param>
void ReadPage(string webPath, ref string pageTitle, ref string pageDesc)
{
Stream stream;
StreamReader reader;
WebClient web;
string titleStart = "name=\"keywords\" content=\"";
string descStart = "name=\"description\" content=\"";
try
{
web = new WebClient();
stream = web.OpenRead(webPath);
reader = new StreamReader(stream, Encoding.GetEncoding("gb2312"));
string page = reader.ReadToEnd();
pageTitle = page.Remove(0, page.IndexOf(titleStart) + titleStart.Length);
pageTitle = pageTitle.Substring(0, pageTitle.IndexOf('"'));
pageDesc = page.Remove(0, page.IndexOf(descStart) + descStart.Length);
pageDesc = pageDesc.Substring(0, pageDesc.IndexOf('"'));
}
catch (Exception ex)
{
Response.Write("读取度假产品页面出错:" + ex.ToString() + "<hr>");
}
finally
{
stream = null;
reader = null;
web = null;
}
}
/// <summary>
/// generate xml file
/// </summary>
/// <param name="nodeList"></param>
void GenerateSiteMap(List<UrlNode> nodeList)
{
string strSitemapModelBegin = "<?xml version=\"1.0\" encoding=\"utf-8\" ?>"
+ "<siteMap xmlns=\"http://schemas.microsoft.com
string strSitemapModelEnd = "</siteMapNode></siteMap>";
string strSitemapNodeModel = "<siteMapNode url=\"{0}\" title=\"{1}\" description=\"{2}\"/>";
string strSavePath = Server.MapPath("~/Web.sitemap");
StreamWriter writer;
try
{
if (File.Exists(strSavePath))
{
File.Delete(strSavePath);
}
writer = new StreamWriter(strSavePath, false, Encoding.UTF8);
writer.WriteLine(strSitemapMode
nodeList.ForEach(delegate(UrlNode node)
{
writer.WriteLine(strSitemapNodeModel, node.Url, node.Title, node.Description);
});
writer.WriteLine(strSitemapMode
writer.WriteLine("<!--By " + Request.UserHostAddress + " At " + DateTime.Now + "-->");
writer.Flush();
}
catch (Exception ex)
{
writer = null;
Response.Write("写SITEMAP出错:" + ex.ToString() + "<hr>");
}
}
protected void Page_Load(object sender, EventArgs e)
{
}
protected void btnGenSiteMap_Click(object sender, EventArgs e)
{
if (TextBox1.Text != "***")
{
ClientScript.RegisterStartupScr
}
else
{
try
{
GenerateSiteMap(GetSiteMapNodeL
}
catch (Exception ex)
{
Response.Write(ex.ToString());
}
<S