Code
/*
* Author:Sunjoy at CCNU
* 如果您改进了这个类请发一份代码给我(ccnusjy 在gmail.com)
*/
using System;
using System.Data;
using System.Configuration;
using System.Net;
using System.IO;
using System.Text;
using System.Collections.Generic;
using System.Text.RegularExpressions;
using System.Threading;
using System.Web;
/// <summary>
/// 网页类
/// </summary>
public class WebPage
{
#region 私有成员
private Uri m_uri; //网址
private List<Link> m_links; //此网页上的链接
private string m_title; //此网页的标题
private string m_html; //此网页的HTML代码
private string m_outstr; //此网页可输出的纯文本
private bool m_good; //此网页是否可用
private int m_pagesize; //此网页的大小
private static Dictionary<string, CookieContainer> webcookies = new Dictionary<string, CookieContainer>();//存放所有网页的Cookie
private string m_post; //此网页的登陆页需要的POST数据
private string m_loginurl; //此网页的登陆页
#endregion
#region 私有方法
/// <summary>
/// 这私有方法从网页的HTML代码中分析出链接信息
/// </summary>
/// <returns>List<Link></returns>
private List<Link> getLinks()
{
if (m_links.Count == 0)
{
Regex[] regex = new Regex[2];
regex[0] = new Regex("(?m)<a[^><]+href=(\"|')?(?<url>([^>\"'\\s)])+)(\"|')?[^>]*>(?<text>(\\w|\\W)*?)</", RegexOptions.Multiline | RegexOptions.IgnoreCase);
regex[1] = new Regex("<[i]*frame[^><]+src=(\"|')?(?<url>([^>\"'\\s)])+)(\"|')?[^>]*>", RegexOptions.Multiline | RegexOptions.IgnoreCase);
for (int i = 0; i < 2; i++)
{
Match match = regex[i].Match(m_html);
while (match.Success)
{
try
{
string url = new Uri(m_uri, match.Groups["url"].Value).AbsoluteUri;
string text = "";
if (i == 0) text = new Regex("(<[^>]+>)|(\\s)|( )|&|\"", RegexOptions.Multiline | RegexOptions.IgnoreCase).Replace(match.Groups["text"].Value, "");
Link link = new Link(url, text);
m_links.Add(link);
}
catch (Exception ex) { Console.WriteLine(ex.Message); };
match = match.NextMatch();
}
}
}
return m_links;
}
/// <summary>
/// 此私有方法从一段HTML文本中提取出一定字数的纯文本
/// </summary>
/// <param name="instr">HTML代码</param>
/// <param name="firstN">提取从头数多少个字</param>
/// <param name="withLink">是否要链接里面的字</param>
/// <returns>纯文本</returns>
private string getFirstNchar(string instr, int firstN, bool withLink)
{
if (m_outstr == "")
{
m_outstr = instr.Clone() as string;
m_outstr = new Regex(@"(?m)<script[^>]*>(\w|\W)*?</script[^>]*>", RegexOptions.Multiline | RegexOptions.IgnoreCase).Replace(m_outstr, "");
m_outstr = new Regex(@"(?m)<style[^>]*>(\w|\W)*?</style[^>]*>", RegexOptions.Multiline | RegexOptions.IgnoreCase).Replace(m_outstr, "");
m_outstr = new Regex(@"(?m)<select[^>]*>(\w|\W)*?</select[^>]*>", RegexOptions.Multiline | RegexOptions.IgnoreCase).Replace(m_outstr, "");
if (!withLink) m_outstr = new Regex(@"(?m)<a[^>]*>(\w|\W)*?</a[^>]*>", RegexOptions.Multiline | RegexOptions.IgnoreCase).Replace(m_outstr, "");
Regex objReg = new System.Text.RegularExpressions.Regex("(<[^>]+?>)| ", RegexOptions.Multiline | RegexOptions.IgnoreCase);
m_outstr = objReg.Replace(m_outstr, "");
Regex objReg2 = new System.Text.RegularExpressions.Regex("(\\s)+", RegexOptions.Multiline | RegexOptions.IgnoreCase);
m_outstr = objReg2.Replace(m_outstr, " ");
}
return m_outstr.Length > firstN ? m_outstr.Substring(0, firstN) : m_outstr;
}
/// <summary>
/// 此私有方法返回一个IP地址对应的无符号整数
/// </summary>
/// <param name="x">IP地址</param>
/// <returns></returns>
private uint getuintFromIP(IPAddress x)
{
Byte[] bt = x.GetAddressBytes();
uint i = (uint)(bt[0] * 256 * 256 * 256);
i += (uint)(bt[1] * 256 * 256);
i += (uint)(bt[2] * 256);
i += (uint)(bt[3]);
return i;
}
#endregion
#region 公有文法
/// <summary>
/// 此公有方法提取网页中一定字数的纯文本,包括链接文字
/// </summary>
/// <param name="firstN">字数</param>
/// <returns></returns>
public string getContext(int firstN)
{
return getFirstNchar(m_html, firstN, true);
}
/// <summary>
/// 此公有方法提取网页中一定字数的纯文本,不包括链接文字
/// </summary>
/// <param name="firstN"></param>
/// <returns></returns>
public string getContextWithOutLink(int firstN)
{
return getFirstNchar(m_html, firstN, false);
}
/// <summary>
/// 此公有方法从本网页的链接中提取一定数量的链接,该链接的URL满足某正则式
/// </summary>
/// <param name="pattern">正则式</param>
/// <param name="count">返回的链接的个数</param>
/// <returns>List<Link></returns>
public List<Link> getSpecialLinksByUrl(string pattern, int count)
{
if (m_links.Count == 0) getLinks();
List<Link> SpecialLinks = new List<Link>();
List<Link>.Enumerator i;
i = m_links.GetEnumerator();
int cnt = 0;
while (i.MoveNext() && cnt < count)
{
if (new Regex(pattern, RegexOptions.Multiline | RegexOptions.IgnoreCase).Match(i.Current.url).Success)
{
SpecialLinks.Add(i.Current);
cnt++;
}
}
return SpecialLinks;
}
/// <summary>
/// 此公有方法从本网页的链接中提取一定数量的链接,该链接的文字满足某正则式
/// </summary>
/// <param name="pattern">正则式</param>
/// <param name="count">返回的链接的个数</param>
/// <returns>List<Link></returns>
public List<Link> getSpecialLinksByText(string pattern, int count)
{
if (m_links.Count == 0) getLinks();
List<Link> SpecialLinks = new List<Link>();
List<Link>.Enumerator i;
i = m_links.GetEnumerator();
int cnt = 0;
while (i.MoveNext() && cnt < count)
{
if (new Regex(pattern, RegexOptions.Multiline | RegexOptions.IgnoreCase).Match(i.Current.text).Success)
{
SpecialLinks.Add(i.Current);
cnt++;
}
}
return SpecialLinks;
}
/// <summary>
/// 此公有方法获得所有链接中在一定IP范围的链接
/// </summary>
/// <param name="_ip_start">起始IP</param>
/// <param name="_ip_end">终止IP</param>
/// <returns></returns>
public List<Link> getSpecialLinksByIP(string _ip_start, string _ip_end)
{
IPAddress ip_start = IPAddress.Parse(_ip_start);
IPAddress ip_end = IPAddress.Parse(_ip_end);
if (m_links.Count == 0) getLinks();
List<Link> SpecialLinks = new List<Link>();
List<Link>.Enumerator i;
i = m_links.GetEnumerator();
while (i.MoveNext())
{
IPAddress ip;
try
{
ip = Dns.GetHostEntry(new Uri(i.Current.url).Host).AddressList[0];
}
catch { continue; }
if (getuintFromIP(ip) >= getuintFromIP(ip_start) && getuintFromIP(ip) <= getuintFromIP(ip_end))
{
SpecialLinks.Add(i.Current);
}
}
return SpecialLinks;
}
/// <summary>
/// 这公有方法提取本网页的纯文本中满足某正则式的文字
/// </summary>
/// <param name="pattern">正则式</param>
/// <returns>返回文字</returns>
public string getSpecialWords(string pattern)
{
if (m_outstr == "") getContext(Int16.MaxValue);
Regex regex = new Regex(pattern, RegexOptions.Multiline | RegexOptions.IgnoreCase);
Match mc = regex.Match(m_outstr);
if (mc.Success)
return mc.Groups[1].Value;
return string.Empty;
}
#endregion
#region 构造函数
private void Init(string _url)
{
try
{
m_uri = new Uri(_url);
m_links = new List<Link>();
m_html = "";
m_outstr = "";
m_title = "";
m_good = true;
if (_url.EndsWith(".rar") || _url.EndsWith(".dat") || _url.EndsWith(".msi"))
{
m_good = false;
return;
}
HttpWebRequest rqst = (HttpWebRequest)WebRequest.Create(m_uri);
rqst.AllowAutoRedirect = true;
rqst.MaximumAutomaticRedirections = 3;
rqst.UserAgent = "Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)";
rqst.KeepAlive = true;
rqst.Timeout = 30000;
lock (WebPage.webcookies)
{
if (WebPage.webcookies.ContainsKey(m_uri.Host))
rqst.CookieContainer = WebPage.webcookies[m_uri.Host];
else
{
CookieContainer cc = new CookieContainer();
WebPage.webcookies[m_uri.Host] = cc;
rqst.CookieContainer = cc;
}
}
HttpWebResponse rsps = (HttpWebResponse)rqst.GetResponse();
Stream sm = rsps.GetResponseStream();
if (!rsps.ContentType.ToLower().StartsWith("text/") || rsps.ContentLength > 1 << 22)
{
rsps.Close();
m_good = false;
return;
}
Encoding cding = System.Text.Encoding.Default;
string contenttype = rsps.ContentType.ToLower();
int ix = contenttype.IndexOf("charset=");
if (ix != -1)
{
try
{
cding = System.Text.Encoding.GetEncoding(rsps.ContentType.Substring(ix + "charset".Length + 1));
}
catch
{
cding = Encoding.Default;
}
m_html = new StreamReader(sm, cding).ReadToEnd();
}
else
{
m_html = new StreamReader(sm, cding).ReadToEnd();
Regex regex = new Regex("charset=(?<cding>[^=]+)?\"", RegexOptions.IgnoreCase);
string strcding = regex.Match(m_html).Groups["cding"].Value;
try
{
cding = Encoding.GetEncoding(strcding);
}
catch
{
cding = Encoding.Default;
}
byte[] bytes = Encoding.Default.GetBytes(m_html.ToCharArray());
m_html = cding.GetString(bytes);
if (m_html.Split('?').Length > 100)
{
m_html = Encoding.Default.GetString(bytes);
}
}
m_pagesize = m_html.Length;
m_uri = rsps.ResponseUri;
rsps.Close();
}
catch (Exception ex)
{
Console.WriteLine(ex.Message + m_uri.ToString());
m_good = false;
}
}
public WebPage(string _url)
{
string uurl = "";
try
{
uurl = Uri.UnescapeDataString(_url);
_url = uurl;
}
catch { };
Regex re = new Regex("(?<h>[^\x00-\xff]+)");
Match mc = re.Match(_url);
if (mc.Success)
{
string han = mc.Groups["h"].Value;
_url = _url.Replace(han, System.Web.HttpUtility.UrlEncode(han, Encoding.GetEncoding("GB2312")));
}
Init(_url);
}
public WebPage(string _url, string _loginurl, string _post)
{
string uurl = "";
try
{
uurl = Uri.UnescapeDataString(_url);
_url = uurl;
}
catch { };
Regex re = new Regex("(?<h>[^\x00-\xff]+)");
Match mc = re.Match(_url);
if (mc.Success)
{
string han = mc.Groups["h"].Value;
_url = _url.Replace(han, System.Web.HttpUtility.UrlEncode(han, Encoding.GetEncoding("GB2312")));
}
if (_loginurl.Trim() == "" || _post.Trim() == "" || WebPage.webcookies.ContainsKey(new Uri(_url).Host))
{
Init(_url);
}
else
{
#region 登陆
string indata = _post;
m_post = _post;
m_loginurl = _loginurl;
byte[] bytes = Encoding.Default.GetBytes(_post);
CookieContainer myCookieContainer = new CookieContainer();
try
{
//新建一个CookieContainer来存放Cookie集合
HttpWebRequest myHttpWebRequest = (HttpWebRequest)WebRequest.Create(_loginurl);
//新建一个HttpWebRequest
myHttpWebRequest.ContentType = "application/x-www-form-urlencoded";
myHttpWebRequest.AllowAutoRedirect = false;
myHttpWebRequest.UserAgent = "Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)";
myHttpWebRequest.Timeout = 60000;
myHttpWebRequest.KeepAlive = true;
myHttpWebRequest.ContentLength = bytes.Length;
myHttpWebRequest.Method = "POST";
myHttpWebRequest.CookieContainer = myCookieContainer;
//设置HttpWebRequest的CookieContainer为刚才建立的那个myCookieContainer
Stream myRequestStream = myHttpWebRequest.GetRequestStream();
myRequestStream.Write(bytes, 0, bytes.Length);
myRequestStream.Close();
HttpWebResponse myHttpWebResponse = (HttpWebResponse)myHttpWebRequest.GetResponse();
foreach (Cookie ck in myHttpWebResponse.Cookies)
{
myCookieContainer.Add(ck);
}
myHttpWebResponse.Close();
}
catch
{
Init(_url);
return;
}
#endregion
#region 登陆后再访问页面
try
{
m_uri = new Uri(_url);
m_links = new List<Link>();
m_html = "";
m_outstr = "";
m_title = "";
m_good = true;
if (_url.EndsWith(".rar") || _url.EndsWith(".dat") || _url.EndsWith(".msi"))
{
m_good = false;
return;
}
HttpWebRequest rqst = (HttpWebRequest)WebRequest.Create(m_uri);
rqst.AllowAutoRedirect = true;
rqst.MaximumAutomaticRedirections = 3;
rqst.UserAgent = "Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)";
rqst.KeepAlive = true;
rqst.Timeout = 30000;
rqst.CookieContainer = myCookieContainer;
lock (WebPage.webcookies)
{
WebPage.webcookies[m_uri.Host] = myCookieContainer;
}
HttpWebResponse rsps = (HttpWebResponse)rqst.GetResponse();
Stream sm = rsps.GetResponseStream();
if (!rsps.ContentType.ToLower().StartsWith("text/") || rsps.ContentLength > 1 << 22)
{
rsps.Close();
m_good = false;
return;
}
Encoding cding = System.Text.Encoding.Default;
int ix = rsps.ContentType.ToLower().IndexOf("charset=");
if (ix != -1)
{
try
{
cding = System.Text.Encoding.GetEncoding(rsps.ContentType.Substring(ix + "charset".Length + 1));
}
catch
{
cding = Encoding.Default;
}
}
m_html = new StreamReader(sm, cding).ReadToEnd();
m_pagesize = m_html.Length;
m_uri = rsps.ResponseUri;
rsps.Close();
}
catch (Exception ex)
{
Console.WriteLine(ex.Message + m_uri.ToString());
m_good = false;
}
#endregion
}
}
#endregion
#region 属性
/// <summary>
/// 通过此属性可获得本网页的网址,只读
/// </summary>
public string URL
{
get
{
return m_uri.AbsoluteUri;
}
}
/// <summary>
/// 通过此属性可获得本网页的标题,只读
/// </summary>
public string Title
{
get
{
if (m_title == "")
{
Regex reg = new Regex(@"(?m)<title[^>]*>(?<title>(?:\w|\W)*?)</title[^>]*>", RegexOptions.Multiline | RegexOptions.IgnoreCase);
Match mc = reg.Match(m_html);
if (mc.Success)
m_title = mc.Groups["title"].Value.Trim();
}
return m_title;
}
}
/// <summary>
/// 此属性获得本网页的所有链接信息,只读
/// </summary>
public List<Link> Links
{
get
{
if (m_links.Count == 0) getLinks();
return m_links;
}
}
/// <summary>
/// 此属性返回本网页的全部纯文本信息,只读
/// </summary>
public string Context
{
get
{
if (m_outstr == "") getContext(Int16.MaxValue);
return m_outstr;
}
}
/// <summary>
/// 此属性获得本网页的大小
/// </summary>
public int PageSize
{
get
{
return m_pagesize;
}
}
/// <summary>
/// 此属性获得本网页的所有站内链接
/// </summary>
public List<Link> InsiteLinks
{
get
{
return getSpecialLinksByUrl("^http://" + m_uri.Host, Int16.MaxValue);
}
}
/// <summary>
/// 此属性表示本网页是否可用
/// </summary>
public bool IsGood
{
get
{
return m_good;
}
}
/// <summary>
/// 此属性表示网页的所在的网站
/// </summary>
public string Host
{
get
{
return m_uri.Host;
}
}
/// <summary>
/// 此网页的登陆页所需的POST数据
/// </summary>
public string PostStr
{
get
{
return m_post;
}
}
/// <summary>
/// 此网页的登陆页
/// </summary>
public string LoginURL
{
get
{
return m_loginurl;
}
}
#endregion
}
/// <summary>
/// 链接类
/// </summary>
public class Link
{
public string url; //链接网址
public string text; //链接文字
public Link(string _url, string _text)
{
url = _url;
text = _text;
}
}
/*
* Author:Sunjoy at CCNU
* 如果您改进了这个类请发一份代码给我(ccnusjy 在gmail.com)
*/
using System;
using System.Data;
using System.Configuration;
using System.Net;
using System.IO;
using System.Text;
using System.Collections.Generic;
using System.Text.RegularExpressions;
using System.Threading;
using System.Web;
/// <summary>
/// 网页类
/// </summary>
public class WebPage
{
#region 私有成员
private Uri m_uri; //网址
private List<Link> m_links; //此网页上的链接
private string m_title; //此网页的标题
private string m_html; //此网页的HTML代码
private string m_outstr; //此网页可输出的纯文本
private bool m_good; //此网页是否可用
private int m_pagesize; //此网页的大小
private static Dictionary<string, CookieContainer> webcookies = new Dictionary<string, CookieContainer>();//存放所有网页的Cookie
private string m_post; //此网页的登陆页需要的POST数据
private string m_loginurl; //此网页的登陆页
#endregion
#region 私有方法
/// <summary>
/// 这私有方法从网页的HTML代码中分析出链接信息
/// </summary>
/// <returns>List<Link></returns>
private List<Link> getLinks()
{
if (m_links.Count == 0)
{
Regex[] regex = new Regex[2];
regex[0] = new Regex("(?m)<a[^><]+href=(\"|')?(?<url>([^>\"'\\s)])+)(\"|')?[^>]*>(?<text>(\\w|\\W)*?)</", RegexOptions.Multiline | RegexOptions.IgnoreCase);
regex[1] = new Regex("<[i]*frame[^><]+src=(\"|')?(?<url>([^>\"'\\s)])+)(\"|')?[^>]*>", RegexOptions.Multiline | RegexOptions.IgnoreCase);
for (int i = 0; i < 2; i++)
{
Match match = regex[i].Match(m_html);
while (match.Success)
{
try
{
string url = new Uri(m_uri, match.Groups["url"].Value).AbsoluteUri;
string text = "";
if (i == 0) text = new Regex("(<[^>]+>)|(\\s)|( )|&|\"", RegexOptions.Multiline | RegexOptions.IgnoreCase).Replace(match.Groups["text"].Value, "");
Link link = new Link(url, text);
m_links.Add(link);
}
catch (Exception ex) { Console.WriteLine(ex.Message); };
match = match.NextMatch();
}
}
}
return m_links;
}
/// <summary>
/// 此私有方法从一段HTML文本中提取出一定字数的纯文本
/// </summary>
/// <param name="instr">HTML代码</param>
/// <param name="firstN">提取从头数多少个字</param>
/// <param name="withLink">是否要链接里面的字</param>
/// <returns>纯文本</returns>
private string getFirstNchar(string instr, int firstN, bool withLink)
{
if (m_outstr == "")
{
m_outstr = instr.Clone() as string;
m_outstr = new Regex(@"(?m)<script[^>]*>(\w|\W)*?</script[^>]*>", RegexOptions.Multiline | RegexOptions.IgnoreCase).Replace(m_outstr, "");
m_outstr = new Regex(@"(?m)<style[^>]*>(\w|\W)*?</style[^>]*>", RegexOptions.Multiline | RegexOptions.IgnoreCase).Replace(m_outstr, "");
m_outstr = new Regex(@"(?m)<select[^>]*>(\w|\W)*?</select[^>]*>", RegexOptions.Multiline | RegexOptions.IgnoreCase).Replace(m_outstr, "");
if (!withLink) m_outstr = new Regex(@"(?m)<a[^>]*>(\w|\W)*?</a[^>]*>", RegexOptions.Multiline | RegexOptions.IgnoreCase).Replace(m_outstr, "");
Regex objReg = new System.Text.RegularExpressions.Regex("(<[^>]+?>)| ", RegexOptions.Multiline | RegexOptions.IgnoreCase);
m_outstr = objReg.Replace(m_outstr, "");
Regex objReg2 = new System.Text.RegularExpressions.Regex("(\\s)+", RegexOptions.Multiline | RegexOptions.IgnoreCase);
m_outstr = objReg2.Replace(m_outstr, " ");
}
return m_outstr.Length > firstN ? m_outstr.Substring(0, firstN) : m_outstr;
}
/// <summary>
/// 此私有方法返回一个IP地址对应的无符号整数
/// </summary>
/// <param name="x">IP地址</param>
/// <returns></returns>
private uint getuintFromIP(IPAddress x)
{
Byte[] bt = x.GetAddressBytes();
uint i = (uint)(bt[0] * 256 * 256 * 256);
i += (uint)(bt[1] * 256 * 256);
i += (uint)(bt[2] * 256);
i += (uint)(bt[3]);
return i;
}
#endregion
#region 公有文法
/// <summary>
/// 此公有方法提取网页中一定字数的纯文本,包括链接文字
/// </summary>
/// <param name="firstN">字数</param>
/// <returns></returns>
public string getContext(int firstN)
{
return getFirstNchar(m_html, firstN, true);
}
/// <summary>
/// 此公有方法提取网页中一定字数的纯文本,不包括链接文字
/// </summary>
/// <param name="firstN"></param>
/// <returns></returns>
public string getContextWithOutLink(int firstN)
{
return getFirstNchar(m_html, firstN, false);
}
/// <summary>
/// 此公有方法从本网页的链接中提取一定数量的链接,该链接的URL满足某正则式
/// </summary>
/// <param name="pattern">正则式</param>
/// <param name="count">返回的链接的个数</param>
/// <returns>List<Link></returns>
public List<Link> getSpecialLinksByUrl(string pattern, int count)
{
if (m_links.Count == 0) getLinks();
List<Link> SpecialLinks = new List<Link>();
List<Link>.Enumerator i;
i = m_links.GetEnumerator();
int cnt = 0;
while (i.MoveNext() && cnt < count)
{
if (new Regex(pattern, RegexOptions.Multiline | RegexOptions.IgnoreCase).Match(i.Current.url).Success)
{
SpecialLinks.Add(i.Current);
cnt++;
}
}
return SpecialLinks;
}
/// <summary>
/// 此公有方法从本网页的链接中提取一定数量的链接,该链接的文字满足某正则式
/// </summary>
/// <param name="pattern">正则式</param>
/// <param name="count">返回的链接的个数</param>
/// <returns>List<Link></returns>
public List<Link> getSpecialLinksByText(string pattern, int count)
{
if (m_links.Count == 0) getLinks();
List<Link> SpecialLinks = new List<Link>();
List<Link>.Enumerator i;
i = m_links.GetEnumerator();
int cnt = 0;
while (i.MoveNext() && cnt < count)
{
if (new Regex(pattern, RegexOptions.Multiline | RegexOptions.IgnoreCase).Match(i.Current.text).Success)
{
SpecialLinks.Add(i.Current);
cnt++;
}
}
return SpecialLinks;
}
/// <summary>
/// 此公有方法获得所有链接中在一定IP范围的链接
/// </summary>
/// <param name="_ip_start">起始IP</param>
/// <param name="_ip_end">终止IP</param>
/// <returns></returns>
public List<Link> getSpecialLinksByIP(string _ip_start, string _ip_end)
{
IPAddress ip_start = IPAddress.Parse(_ip_start);
IPAddress ip_end = IPAddress.Parse(_ip_end);
if (m_links.Count == 0) getLinks();
List<Link> SpecialLinks = new List<Link>();
List<Link>.Enumerator i;
i = m_links.GetEnumerator();
while (i.MoveNext())
{
IPAddress ip;
try
{
ip = Dns.GetHostEntry(new Uri(i.Current.url).Host).AddressList[0];
}
catch { continue; }
if (getuintFromIP(ip) >= getuintFromIP(ip_start) && getuintFromIP(ip) <= getuintFromIP(ip_end))
{
SpecialLinks.Add(i.Current);
}
}
return SpecialLinks;
}
/// <summary>
/// 这公有方法提取本网页的纯文本中满足某正则式的文字
/// </summary>
/// <param name="pattern">正则式</param>
/// <returns>返回文字</returns>
public string getSpecialWords(string pattern)
{
if (m_outstr == "") getContext(Int16.MaxValue);
Regex regex = new Regex(pattern, RegexOptions.Multiline | RegexOptions.IgnoreCase);
Match mc = regex.Match(m_outstr);
if (mc.Success)
return mc.Groups[1].Value;
return string.Empty;
}
#endregion
#region 构造函数
private void Init(string _url)
{
try
{
m_uri = new Uri(_url);
m_links = new List<Link>();
m_html = "";
m_outstr = "";
m_title = "";
m_good = true;
if (_url.EndsWith(".rar") || _url.EndsWith(".dat") || _url.EndsWith(".msi"))
{
m_good = false;
return;
}
HttpWebRequest rqst = (HttpWebRequest)WebRequest.Create(m_uri);
rqst.AllowAutoRedirect = true;
rqst.MaximumAutomaticRedirections = 3;
rqst.UserAgent = "Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)";
rqst.KeepAlive = true;
rqst.Timeout = 30000;
lock (WebPage.webcookies)
{
if (WebPage.webcookies.ContainsKey(m_uri.Host))
rqst.CookieContainer = WebPage.webcookies[m_uri.Host];
else
{
CookieContainer cc = new CookieContainer();
WebPage.webcookies[m_uri.Host] = cc;
rqst.CookieContainer = cc;
}
}
HttpWebResponse rsps = (HttpWebResponse)rqst.GetResponse();
Stream sm = rsps.GetResponseStream();
if (!rsps.ContentType.ToLower().StartsWith("text/") || rsps.ContentLength > 1 << 22)
{
rsps.Close();
m_good = false;
return;
}
Encoding cding = System.Text.Encoding.Default;
string contenttype = rsps.ContentType.ToLower();
int ix = contenttype.IndexOf("charset=");
if (ix != -1)
{
try
{
cding = System.Text.Encoding.GetEncoding(rsps.ContentType.Substring(ix + "charset".Length + 1));
}
catch
{
cding = Encoding.Default;
}
m_html = new StreamReader(sm, cding).ReadToEnd();
}
else
{
m_html = new StreamReader(sm, cding).ReadToEnd();
Regex regex = new Regex("charset=(?<cding>[^=]+)?\"", RegexOptions.IgnoreCase);
string strcding = regex.Match(m_html).Groups["cding"].Value;
try
{
cding = Encoding.GetEncoding(strcding);
}
catch
{
cding = Encoding.Default;
}
byte[] bytes = Encoding.Default.GetBytes(m_html.ToCharArray());
m_html = cding.GetString(bytes);
if (m_html.Split('?').Length > 100)
{
m_html = Encoding.Default.GetString(bytes);
}
}
m_pagesize = m_html.Length;
m_uri = rsps.ResponseUri;
rsps.Close();
}
catch (Exception ex)
{
Console.WriteLine(ex.Message + m_uri.ToString());
m_good = false;
}
}
public WebPage(string _url)
{
string uurl = "";
try
{
uurl = Uri.UnescapeDataString(_url);
_url = uurl;
}
catch { };
Regex re = new Regex("(?<h>[^\x00-\xff]+)");
Match mc = re.Match(_url);
if (mc.Success)
{
string han = mc.Groups["h"].Value;
_url = _url.Replace(han, System.Web.HttpUtility.UrlEncode(han, Encoding.GetEncoding("GB2312")));
}
Init(_url);
}
public WebPage(string _url, string _loginurl, string _post)
{
string uurl = "";
try
{
uurl = Uri.UnescapeDataString(_url);
_url = uurl;
}
catch { };
Regex re = new Regex("(?<h>[^\x00-\xff]+)");
Match mc = re.Match(_url);
if (mc.Success)
{
string han = mc.Groups["h"].Value;
_url = _url.Replace(han, System.Web.HttpUtility.UrlEncode(han, Encoding.GetEncoding("GB2312")));
}
if (_loginurl.Trim() == "" || _post.Trim() == "" || WebPage.webcookies.ContainsKey(new Uri(_url).Host))
{
Init(_url);
}
else
{
#region 登陆
string indata = _post;
m_post = _post;
m_loginurl = _loginurl;
byte[] bytes = Encoding.Default.GetBytes(_post);
CookieContainer myCookieContainer = new CookieContainer();
try
{
//新建一个CookieContainer来存放Cookie集合
HttpWebRequest myHttpWebRequest = (HttpWebRequest)WebRequest.Create(_loginurl);
//新建一个HttpWebRequest
myHttpWebRequest.ContentType = "application/x-www-form-urlencoded";
myHttpWebRequest.AllowAutoRedirect = false;
myHttpWebRequest.UserAgent = "Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)";
myHttpWebRequest.Timeout = 60000;
myHttpWebRequest.KeepAlive = true;
myHttpWebRequest.ContentLength = bytes.Length;
myHttpWebRequest.Method = "POST";
myHttpWebRequest.CookieContainer = myCookieContainer;
//设置HttpWebRequest的CookieContainer为刚才建立的那个myCookieContainer
Stream myRequestStream = myHttpWebRequest.GetRequestStream();
myRequestStream.Write(bytes, 0, bytes.Length);
myRequestStream.Close();
HttpWebResponse myHttpWebResponse = (HttpWebResponse)myHttpWebRequest.GetResponse();
foreach (Cookie ck in myHttpWebResponse.Cookies)
{
myCookieContainer.Add(ck);
}
myHttpWebResponse.Close();
}
catch
{
Init(_url);
return;
}
#endregion
#region 登陆后再访问页面
try
{
m_uri = new Uri(_url);
m_links = new List<Link>();
m_html = "";
m_outstr = "";
m_title = "";
m_good = true;
if (_url.EndsWith(".rar") || _url.EndsWith(".dat") || _url.EndsWith(".msi"))
{
m_good = false;
return;
}
HttpWebRequest rqst = (HttpWebRequest)WebRequest.Create(m_uri);
rqst.AllowAutoRedirect = true;
rqst.MaximumAutomaticRedirections = 3;
rqst.UserAgent = "Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)";
rqst.KeepAlive = true;
rqst.Timeout = 30000;
rqst.CookieContainer = myCookieContainer;
lock (WebPage.webcookies)
{
WebPage.webcookies[m_uri.Host] = myCookieContainer;
}
HttpWebResponse rsps = (HttpWebResponse)rqst.GetResponse();
Stream sm = rsps.GetResponseStream();
if (!rsps.ContentType.ToLower().StartsWith("text/") || rsps.ContentLength > 1 << 22)
{
rsps.Close();
m_good = false;
return;
}
Encoding cding = System.Text.Encoding.Default;
int ix = rsps.ContentType.ToLower().IndexOf("charset=");
if (ix != -1)
{
try
{
cding = System.Text.Encoding.GetEncoding(rsps.ContentType.Substring(ix + "charset".Length + 1));
}
catch
{
cding = Encoding.Default;
}
}
m_html = new StreamReader(sm, cding).ReadToEnd();
m_pagesize = m_html.Length;
m_uri = rsps.ResponseUri;
rsps.Close();
}
catch (Exception ex)
{
Console.WriteLine(ex.Message + m_uri.ToString());
m_good = false;
}
#endregion
}
}
#endregion
#region 属性
/// <summary>
/// 通过此属性可获得本网页的网址,只读
/// </summary>
public string URL
{
get
{
return m_uri.AbsoluteUri;
}
}
/// <summary>
/// 通过此属性可获得本网页的标题,只读
/// </summary>
public string Title
{
get
{
if (m_title == "")
{
Regex reg = new Regex(@"(?m)<title[^>]*>(?<title>(?:\w|\W)*?)</title[^>]*>", RegexOptions.Multiline | RegexOptions.IgnoreCase);
Match mc = reg.Match(m_html);
if (mc.Success)
m_title = mc.Groups["title"].Value.Trim();
}
return m_title;
}
}
/// <summary>
/// 此属性获得本网页的所有链接信息,只读
/// </summary>
public List<Link> Links
{
get
{
if (m_links.Count == 0) getLinks();
return m_links;
}
}
/// <summary>
/// 此属性返回本网页的全部纯文本信息,只读
/// </summary>
public string Context
{
get
{
if (m_outstr == "") getContext(Int16.MaxValue);
return m_outstr;
}
}
/// <summary>
/// 此属性获得本网页的大小
/// </summary>
public int PageSize
{
get
{
return m_pagesize;
}
}
/// <summary>
/// 此属性获得本网页的所有站内链接
/// </summary>
public List<Link> InsiteLinks
{
get
{
return getSpecialLinksByUrl("^http://" + m_uri.Host, Int16.MaxValue);
}
}
/// <summary>
/// 此属性表示本网页是否可用
/// </summary>
public bool IsGood
{
get
{
return m_good;
}
}
/// <summary>
/// 此属性表示网页的所在的网站
/// </summary>
public string Host
{
get
{
return m_uri.Host;
}
}
/// <summary>
/// 此网页的登陆页所需的POST数据
/// </summary>
public string PostStr
{
get
{
return m_post;
}
}
/// <summary>
/// 此网页的登陆页
/// </summary>
public string LoginURL
{
get
{
return m_loginurl;
}
}
#endregion
}
/// <summary>
/// 链接类
/// </summary>
public class Link
{
public string url; //链接网址
public string text; //链接文字
public Link(string _url, string _text)
{
url = _url;
text = _text;
}
}
Code
using System;
using System.Data;
using System.Configuration;
using System.Web;
using System.Web.Security;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.Web.UI.WebControls.WebParts;
using System.Web.UI.HtmlControls;
using System.Xml;
namespace sqlhelper
{
/**//// <summary>
/// XML相关通用功能
/// </summary>
public class Xml
{
public Xml() { }
/**//// <summary>
/// XML资源类型
/// </summary>
public enum XmlType
{
File,
String
};
读取XML资源到DataSet中#region 读取XML资源到DataSet中
/**//// <summary>
/// 读取XML资源到DataSet中
/// </summary>
/// <param name="source">XML资源,文件为路径,否则为XML字符串</param>
/// <param name="xmlType">XML资源类型</param>
/// <returns>DataSet</returns>
public static DataSet GetDataSet(string source, XmlType xmlType)
{
DataSet ds = new DataSet();
if (xmlType == XmlType.File)
{
ds.ReadXml(source);
}
else
{
XmlDocument xd = new XmlDocument();
xd.LoadXml(source);
XmlNodeReader xnr = new XmlNodeReader(xd);
ds.ReadXml(xnr);
}
return ds;
}
#endregion
读取XML资源到DataTable中#region 读取XML资源到DataTable中
/**//// <summary>
/// 读取XML资源到DataTable中
/// </summary>
/// <param name="source">XML资源,文件为路径,否则为XML字符串</param>
/// <param name="xmlType">XML资源类型:文件,字符串</param>
/// <param name="tableName">表名称</param>
/// <returns>DataTable</returns>
public static DataTable GetTable(string source, XmlType xmlType, string tableName)
{
DataSet ds = new DataSet();
if (xmlType == XmlType.File)
{
ds.ReadXml(source);
}
else
{
XmlDocument xd = new XmlDocument();
xd.LoadXml(source);
XmlNodeReader xnr = new XmlNodeReader(xd);
ds.ReadXml(xnr);
}
return ds.Tables[tableName];
}
#endregion
获取一个字符串xml文档中的ds#region 获取一个字符串xml文档中的ds
/**////<remarks>
///赵洪
///</remarks>
/// <summary>
/// 获取一个字符串xml文档中的ds
/// </summary>
/// <param name="xml_string">含有xml信息的字符串</param>
public static void get_XmlValue_ds(string xml_string, ref DataSet ds)
{
System.Xml.XmlDocument xd = new XmlDocument();
xd.LoadXml(xml_string);
XmlNodeReader xnr = new XmlNodeReader(xd);
ds.ReadXml(xnr);
xnr.Close();
int a = ds.Tables.Count;
}
#endregion
读取XML资源中指定的DataTable的指定行指定列的值#region 读取XML资源中指定的DataTable的指定行指定列的值
/**//// <summary>
/// 读取XML资源中指定的DataTable的指定行指定列的值
/// </summary>
/// <param name="source">XML资源</param>
/// <param name="xmlType">XML资源类型:文件,字符串</param>
/// <param name="tableName">表名</param>
/// <param name="rowIndex">行号</param>
/// <param name="colName">列名</param>
/// <returns>值,不存在时返回Null</returns>
public static object GetTableCell(string source, XmlType xmlType, string tableName, int rowIndex, string colName)
{
DataSet ds = new DataSet();
if (xmlType == XmlType.File)
{
ds.ReadXml(source);
}
else
{
XmlDocument xd = new XmlDocument();
xd.LoadXml(source);
XmlNodeReader xnr = new XmlNodeReader(xd);
ds.ReadXml(xnr);
}
return ds.Tables[tableName].Rows[rowIndex][colName];
}
#endregion
读取XML资源中指定的DataTable的指定行指定列的值#region 读取XML资源中指定的DataTable的指定行指定列的值
/**//// <summary>
/// 读取XML资源中指定的DataTable的指定行指定列的值
/// </summary>
/// <param name="source">XML资源</param>
/// <param name="xmlType">XML资源类型:文件,字符串</param>
/// <param name="tableName">表名</param>
/// <param name="rowIndex">行号</param>
/// <param name="colIndex">列号</param>
/// <returns>值,不存在时返回Null</returns>
public static object GetTableCell(string source, XmlType xmlType, string tableName, int rowIndex, int colIndex)
{
DataSet ds = new DataSet();
if (xmlType == XmlType.File)
{
ds.ReadXml(source);
}
else
{
XmlDocument xd = new XmlDocument();
xd.LoadXml(source);
XmlNodeReader xnr = new XmlNodeReader(xd);
ds.ReadXml(xnr);
}
return ds.Tables[tableName].Rows[rowIndex][colIndex];
}
#endregion
获取一个字符串xml文档中的一个table,指定行,指定列的值#region 获取一个字符串xml文档中的一个table,指定行,指定列的值
/**//// <summary>
/// 获取一个字符串xml文档中的一个table,指定行,指定列的值
/// </summary>
/// <param name="xml_string">含有xml信息的字符串</param>
/// <param name="tablename">表名</param>
/// <param name="row_index">指定行</param>
/// <param name="col_name">指定列名</param>
/// <returns>相应节点的值</returns>
public static string get_XmlValue(string xml_string, string tablename, int row_index, string col_name)
{
System.Xml.XmlDocument xd = new XmlDocument();
xd.LoadXml(xml_string);
XmlElement root = xd.DocumentElement;
XmlNode xe = root.CloneNode(false);
xe = root.SelectNodes(tablename).Item(row_index);
string val = "";
if (null == xe)
{
return "";
}
foreach (XmlNode xn in xe.ChildNodes)
{
if (xn.LocalName == col_name)
{
val = xn.InnerText;
break;
}
}
return val;
}
/**//// <summary>
/// 获取一个xml文件中的一个table,指定行,指定列的值
/// </summary>
/// <param name="xml_string">含有xml信息的字符串</param>
/// <param name="tablename">表名</param>
/// <param name="row_index">指定行</param>
/// <param name="col_name">指定列名</param>
/// <returns>相应节点的值</returns>
public static string get_XmlValue(string xml_string, string tablename, int row_index, string col_name, bool isfile)
{
System.Xml.XmlDocument xd = new XmlDocument();
if (isfile)
xd.Load(xml_string);
else
xd.LoadXml(xml_string);
XmlElement root = xd.DocumentElement;
XmlNode xe = root.CloneNode(false);
xe = root.SelectNodes(tablename).Item(row_index);
string val = "";
if (null == xe)
{
return "";
}
foreach (XmlNode xn in xe.ChildNodes)
{
if (xn.LocalName == col_name)
{
val = xn.InnerText;
break;
}
}
return val;
}
#endregion
获取一个字符串xml文档中的dt#region 获取一个字符串xml文档中的dt
/**////<remarks>
///
///</remarks>
/// <summary>
/// 获取一个字符串xml文档中的dt
/// </summary>
/// <param name="xml_string">含有xml信息的字符串</param>
public static void get_XmlValue_dt(string xml_string, ref DataTable dt, string table_name)
{
DataSet ds = new DataSet();
System.Xml.XmlDocument xd = new XmlDocument();
xd.LoadXml(xml_string);
//System.IO.FileStream fs = new System.IO.FileStream("c:\\aa.xml",System.IO.FileMode.Open);
//xd.Save(fs);
// System.Xml.XmlReader xr=(System.Xml.XmlReader)((object)sr);
//ds=new DataSet();
XmlNodeReader xnr = new XmlNodeReader(xd);
ds.ReadXml(xnr);
xnr.Close();
dt = ds.Tables[table_name];
}
#endregion
将DataTable写入XML文件中#region 将DataTable写入XML文件中
/**//// <summary>
/// 将DataTable写入XML文件中
/// </summary>
/// <param name="dt">含有数据的DataTable</param>
/// <param name="filePath">文件路径</param>
public static void SaveTableToFile(DataTable dt, string filePath)
{
DataSet ds = new DataSet("Config");
ds.Tables.Add(dt.Copy());
ds.WriteXml(filePath);
}
#endregion
将DataTable以指定的根结点名称写入文件#region 将DataTable以指定的根结点名称写入文件
/**//// <summary>
/// 将DataTable以指定的根结点名称写入文件
/// </summary>
/// <param name="dt">含有数据的DataTable</param>
/// <param name="rootName">根结点名称</param>
/// <param name="filePath">文件路径</param>
public static void SaveTableToFile(DataTable dt, string rootName, string filePath)
{
DataSet ds = new DataSet(rootName);
ds.Tables.Add(dt.Copy());
ds.WriteXml(filePath);
}
#endregion
使用DataSet方式更新XML文件节点#region 使用DataSet方式更新XML文件节点
/**//// <summary>
/// 使用DataSet方式更新XML文件节点
/// </summary>
/// <param name="filePath">XML文件路径</param>
/// <param name="tableName">表名称</param>
/// <param name="rowIndex">行号</param>
/// <param name="colName">列名</param>
/// <param name="content">更新值</param>
/// <returns>更新是否成功</returns>
public static bool UpdateTableCell(string filePath, string tableName, int rowIndex, string colName, string content)
{
bool flag = false;
DataSet ds = new DataSet();
ds.ReadXml(filePath);
DataTable dt = ds.Tables[tableName];
if (dt.Rows[rowIndex][colName] != null)
{
dt.Rows[rowIndex][colName] = content;
ds.WriteXml(filePath);
flag = true;
}
else
{
flag = false;
}
return flag;
}
#endregion
使用DataSet方式更新XML文件节点#region 使用DataSet方式更新XML文件节点
/**//// <summary>
/// 使用DataSet方式更新XML文件节点
/// </summary>
/// <param name="filePath">XML文件路径</param>
/// <param name="tableName">表名称</param>
/// <param name="rowIndex">行号</param>
/// <param name="colIndex">列号</param>
/// <param name="content">更新值</param>
/// <returns>更新是否成功</returns>
public static bool UpdateTableCell(string filePath, string tableName, int rowIndex, int colIndex, string content)
{
bool flag = false;
DataSet ds = new DataSet();
ds.ReadXml(filePath);
DataTable dt = ds.Tables[tableName];
if (dt.Rows[rowIndex][colIndex] != null)
{
dt.Rows[rowIndex][colIndex] = content;
ds.WriteXml(filePath);
flag = true;
}
else
{
flag = false;
}
return flag;
}
#endregion
读取XML资源中的指定节点内容#region 读取XML资源中的指定节点内容
/**//// <summary>
/// 读取XML资源中的指定节点内容
/// </summary>
/// <param name="source">XML资源</param>
/// <param name="xmlType">XML资源类型:文件,字符串</param>
/// <param name="nodeName">节点名称</param>
/// <returns>节点内容</returns>
public static object GetNodeValue(string source, XmlType xmlType, string nodeName)
{
XmlDocument xd = new XmlDocument();
if (xmlType == XmlType.File)
{
xd.Load(source);
}
else
{
xd.LoadXml(source);
}
XmlElement xe = xd.DocumentElement;
XmlNode xn = xe.SelectSingleNode("//" + nodeName);
if (xn != null)
{
return xn.InnerText;
}
else
{
return null;
}
}
/**//// <summary>
/// 读取XML资源中的指定节点内容
/// </summary>
/// <param name="source">XML资源</param>
/// <param name="nodeName">节点名称</param>
/// <returns>节点内容</returns>
public static object GetNodeValue(string source, string nodeName)
{
if (source == null || nodeName == null || source == "" || nodeName == "" || source.Length < nodeName.Length * 2)
{
return null;
}
else
{
int start = source.IndexOf("<" + nodeName + ">") + nodeName.Length + 2;
int end = source.IndexOf("</" + nodeName + ">");
if (start == -1 || end == -1)
{
return null;
}
else if (start >= end)
{
return null;
}
else
{
return source.Substring(start, end - start);
}
}
}
#endregion
更新XML文件中的指定节点内容#region 更新XML文件中的指定节点内容
/**//// <summary>
/// 更新XML文件中的指定节点内容
/// </summary>
/// <param name="filePath">文件路径</param>
/// <param name="nodeName">节点名称</param>
/// <param name="nodeValue">更新内容</param>
/// <returns>更新是否成功</returns>
public static bool UpdateNode(string filePath, string nodeName, string nodeValue)
{
bool flag = false;
XmlDocument xd = new XmlDocument();
xd.Load(filePath);
XmlElement xe = xd.DocumentElement;
XmlNode xn = xe.SelectSingleNode("//" + nodeName);
if (xn != null)
{
xn.InnerText = nodeValue;
flag = true;
}
else
{
flag = false;
}
return flag;
}
#endregion
操作xml文件中指定节点的数据#region 操作xml文件中指定节点的数据
/**//// <summary>
/// 获得xml文件中指定节点的节点数据
/// </summary>
/// <param name="TableName"></param>
/// <returns></returns>
public static string GetNodeInfoByNodeName(string path, string nodeName)
{
string XmlString = "";
XmlDocument xml = new XmlDocument();
xml.Load(path);
System.Xml.XmlElement root = xml.DocumentElement;
System.Xml.XmlNode node = root.SelectSingleNode("//" + nodeName);
if (node != null)
{
XmlString = node.InnerText;
}
return XmlString;
}
#endregion
/**//// <summary>
/// 根据xml路径获取DataSet。如果Table名为空:flag=false 返回所有xml的数据;flag=true 将xml中的table名作为数据项返回。否则根据table名获取相应的table信息返回。
/// </summary>
/// <param name="XmlPath">xml文件路径</param>
/// <param name="TableName">所要获取的Table名,可为空</param>
/// <param name="flag">若为true,则只将所有表名作为数据项返回;若为false,则返回所要获取的Table的所有数据</param>
/// <returns>返回所获取的DataSet</returns>
/// <summary>
public static DataSet GetTableByXml(string XmlPath, string TableName, bool flag)
{
DataSet ds = new DataSet();
if (TableName == "")
{
DataSet ds1 = new DataSet();
ds1.ReadXml(XmlPath);
if (ds1.Tables.Count > 0)
{
if (flag)
{
DataTable dt = new DataTable("typeTable");
dt.Columns.Add("TableName", typeof(string));
ds.Tables.Add(dt);
for (int i = 0; i < ds1.Tables.Count; i++)
{
DataRow dr = dt.NewRow();
dr["TableName"] = ds1.Tables[i].TableName;
ds.Tables["typeTable"].Rows.Add(dr);
}
}
else
{
ds = ds1.Copy();
}
}
}
else
{
DataSet ds2 = new DataSet();
ds2.ReadXml(XmlPath);
if (ds2.Tables[TableName] != null)
{
ds.Tables.Add(ds2.Tables[TableName].Copy());
}
}
return ds;
}
/**//// <summary>
/// escape invalid Unicode in XML
/// </summary>
/// <param name="str"></param>
/// <returns></returns>
public static string Replaceinvalid(string str)
{
System.Text.RegularExpressions.Regex r = new System.Text.RegularExpressions.Regex("[\x00-\x08|\x0b-\x0c|\x0e-\x1f]");
return r.Replace(str, " ");
}
/**//// <summary>
/// 获得接口错误信息
/// </summary>
/// <param name="errCode">错误编码</param>
/// <returns></returns>
public static string GetInterfaceErrorString(string errCode)
{
System.Text.StringBuilder sb = new System.Text.StringBuilder();
sb.Append("<?xml version=\"1.0\" encoding=\"GB2312\"?>");
sb.Append("<Root>");
sb.Append("<Result><return_result>" + errCode + "</return_result></Result>");
sb.Append("</Root>");
return sb.ToString();
}
}
}
using System;
using System.Data;
using System.Configuration;
using System.Web;
using System.Web.Security;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.Web.UI.WebControls.WebParts;
using System.Web.UI.HtmlControls;
using System.Xml;
namespace sqlhelper
{
/**//// <summary>
/// XML相关通用功能
/// </summary>
public class Xml
{
public Xml() { }
/**//// <summary>
/// XML资源类型
/// </summary>
public enum XmlType
{
File,
String
};
读取XML资源到DataSet中#region 读取XML资源到DataSet中
/**//// <summary>
/// 读取XML资源到DataSet中
/// </summary>
/// <param name="source">XML资源,文件为路径,否则为XML字符串</param>
/// <param name="xmlType">XML资源类型</param>
/// <returns>DataSet</returns>
public static DataSet GetDataSet(string source, XmlType xmlType)
{
DataSet ds = new DataSet();
if (xmlType == XmlType.File)
{
ds.ReadXml(source);
}
else
{
XmlDocument xd = new XmlDocument();
xd.LoadXml(source);
XmlNodeReader xnr = new XmlNodeReader(xd);
ds.ReadXml(xnr);
}
return ds;
}
#endregion
读取XML资源到DataTable中#region 读取XML资源到DataTable中
/**//// <summary>
/// 读取XML资源到DataTable中
/// </summary>
/// <param name="source">XML资源,文件为路径,否则为XML字符串</param>
/// <param name="xmlType">XML资源类型:文件,字符串</param>
/// <param name="tableName">表名称</param>
/// <returns>DataTable</returns>
public static DataTable GetTable(string source, XmlType xmlType, string tableName)
{
DataSet ds = new DataSet();
if (xmlType == XmlType.File)
{
ds.ReadXml(source);
}
else
{
XmlDocument xd = new XmlDocument();
xd.LoadXml(source);
XmlNodeReader xnr = new XmlNodeReader(xd);
ds.ReadXml(xnr);
}
return ds.Tables[tableName];
}
#endregion
获取一个字符串xml文档中的ds#region 获取一个字符串xml文档中的ds
/**////<remarks>
///赵洪
///</remarks>
/// <summary>
/// 获取一个字符串xml文档中的ds
/// </summary>
/// <param name="xml_string">含有xml信息的字符串</param>
public static void get_XmlValue_ds(string xml_string, ref DataSet ds)
{
System.Xml.XmlDocument xd = new XmlDocument();
xd.LoadXml(xml_string);
XmlNodeReader xnr = new XmlNodeReader(xd);
ds.ReadXml(xnr);
xnr.Close();
int a = ds.Tables.Count;
}
#endregion
读取XML资源中指定的DataTable的指定行指定列的值#region 读取XML资源中指定的DataTable的指定行指定列的值
/**//// <summary>
/// 读取XML资源中指定的DataTable的指定行指定列的值
/// </summary>
/// <param name="source">XML资源</param>
/// <param name="xmlType">XML资源类型:文件,字符串</param>
/// <param name="tableName">表名</param>
/// <param name="rowIndex">行号</param>
/// <param name="colName">列名</param>
/// <returns>值,不存在时返回Null</returns>
public static object GetTableCell(string source, XmlType xmlType, string tableName, int rowIndex, string colName)
{
DataSet ds = new DataSet();
if (xmlType == XmlType.File)
{
ds.ReadXml(source);
}
else
{
XmlDocument xd = new XmlDocument();
xd.LoadXml(source);
XmlNodeReader xnr = new XmlNodeReader(xd);
ds.ReadXml(xnr);
}
return ds.Tables[tableName].Rows[rowIndex][colName];
}
#endregion
读取XML资源中指定的DataTable的指定行指定列的值#region 读取XML资源中指定的DataTable的指定行指定列的值
/**//// <summary>
/// 读取XML资源中指定的DataTable的指定行指定列的值
/// </summary>
/// <param name="source">XML资源</param>
/// <param name="xmlType">XML资源类型:文件,字符串</param>
/// <param name="tableName">表名</param>
/// <param name="rowIndex">行号</param>
/// <param name="colIndex">列号</param>
/// <returns>值,不存在时返回Null</returns>
public static object GetTableCell(string source, XmlType xmlType, string tableName, int rowIndex, int colIndex)
{
DataSet ds = new DataSet();
if (xmlType == XmlType.File)
{
ds.ReadXml(source);
}
else
{
XmlDocument xd = new XmlDocument();
xd.LoadXml(source);
XmlNodeReader xnr = new XmlNodeReader(xd);
ds.ReadXml(xnr);
}
return ds.Tables[tableName].Rows[rowIndex][colIndex];
}
#endregion
获取一个字符串xml文档中的一个table,指定行,指定列的值#region 获取一个字符串xml文档中的一个table,指定行,指定列的值
/**//// <summary>
/// 获取一个字符串xml文档中的一个table,指定行,指定列的值
/// </summary>
/// <param name="xml_string">含有xml信息的字符串</param>
/// <param name="tablename">表名</param>
/// <param name="row_index">指定行</param>
/// <param name="col_name">指定列名</param>
/// <returns>相应节点的值</returns>
public static string get_XmlValue(string xml_string, string tablename, int row_index, string col_name)
{
System.Xml.XmlDocument xd = new XmlDocument();
xd.LoadXml(xml_string);
XmlElement root = xd.DocumentElement;
XmlNode xe = root.CloneNode(false);
xe = root.SelectNodes(tablename).Item(row_index);
string val = "";
if (null == xe)
{
return "";
}
foreach (XmlNode xn in xe.ChildNodes)
{
if (xn.LocalName == col_name)
{
val = xn.InnerText;
break;
}
}
return val;
}
/**//// <summary>
/// 获取一个xml文件中的一个table,指定行,指定列的值
/// </summary>
/// <param name="xml_string">含有xml信息的字符串</param>
/// <param name="tablename">表名</param>
/// <param name="row_index">指定行</param>
/// <param name="col_name">指定列名</param>
/// <returns>相应节点的值</returns>
public static string get_XmlValue(string xml_string, string tablename, int row_index, string col_name, bool isfile)
{
System.Xml.XmlDocument xd = new XmlDocument();
if (isfile)
xd.Load(xml_string);
else
xd.LoadXml(xml_string);
XmlElement root = xd.DocumentElement;
XmlNode xe = root.CloneNode(false);
xe = root.SelectNodes(tablename).Item(row_index);
string val = "";
if (null == xe)
{
return "";
}
foreach (XmlNode xn in xe.ChildNodes)
{
if (xn.LocalName == col_name)
{
val = xn.InnerText;
break;
}
}
return val;
}
#endregion
获取一个字符串xml文档中的dt#region 获取一个字符串xml文档中的dt
/**////<remarks>
///
///</remarks>
/// <summary>
/// 获取一个字符串xml文档中的dt
/// </summary>
/// <param name="xml_string">含有xml信息的字符串</param>
public static void get_XmlValue_dt(string xml_string, ref DataTable dt, string table_name)
{
DataSet ds = new DataSet();
System.Xml.XmlDocument xd = new XmlDocument();
xd.LoadXml(xml_string);
//System.IO.FileStream fs = new System.IO.FileStream("c:\\aa.xml",System.IO.FileMode.Open);
//xd.Save(fs);
// System.Xml.XmlReader xr=(System.Xml.XmlReader)((object)sr);
//ds=new DataSet();
XmlNodeReader xnr = new XmlNodeReader(xd);
ds.ReadXml(xnr);
xnr.Close();
dt = ds.Tables[table_name];
}
#endregion
将DataTable写入XML文件中#region 将DataTable写入XML文件中
/**//// <summary>
/// 将DataTable写入XML文件中
/// </summary>
/// <param name="dt">含有数据的DataTable</param>
/// <param name="filePath">文件路径</param>
public static void SaveTableToFile(DataTable dt, string filePath)
{
DataSet ds = new DataSet("Config");
ds.Tables.Add(dt.Copy());
ds.WriteXml(filePath);
}
#endregion
将DataTable以指定的根结点名称写入文件#region 将DataTable以指定的根结点名称写入文件
/**//// <summary>
/// 将DataTable以指定的根结点名称写入文件
/// </summary>
/// <param name="dt">含有数据的DataTable</param>
/// <param name="rootName">根结点名称</param>
/// <param name="filePath">文件路径</param>
public static void SaveTableToFile(DataTable dt, string rootName, string filePath)
{
DataSet ds = new DataSet(rootName);
ds.Tables.Add(dt.Copy());
ds.WriteXml(filePath);
}
#endregion
使用DataSet方式更新XML文件节点#region 使用DataSet方式更新XML文件节点
/**//// <summary>
/// 使用DataSet方式更新XML文件节点
/// </summary>
/// <param name="filePath">XML文件路径</param>
/// <param name="tableName">表名称</param>
/// <param name="rowIndex">行号</param>
/// <param name="colName">列名</param>
/// <param name="content">更新值</param>
/// <returns>更新是否成功</returns>
public static bool UpdateTableCell(string filePath, string tableName, int rowIndex, string colName, string content)
{
bool flag = false;
DataSet ds = new DataSet();
ds.ReadXml(filePath);
DataTable dt = ds.Tables[tableName];
if (dt.Rows[rowIndex][colName] != null)
{
dt.Rows[rowIndex][colName] = content;
ds.WriteXml(filePath);
flag = true;
}
else
{
flag = false;
}
return flag;
}
#endregion
使用DataSet方式更新XML文件节点#region 使用DataSet方式更新XML文件节点
/**//// <summary>
/// 使用DataSet方式更新XML文件节点
/// </summary>
/// <param name="filePath">XML文件路径</param>
/// <param name="tableName">表名称</param>
/// <param name="rowIndex">行号</param>
/// <param name="colIndex">列号</param>
/// <param name="content">更新值</param>
/// <returns>更新是否成功</returns>
public static bool UpdateTableCell(string filePath, string tableName, int rowIndex, int colIndex, string content)
{
bool flag = false;
DataSet ds = new DataSet();
ds.ReadXml(filePath);
DataTable dt = ds.Tables[tableName];
if (dt.Rows[rowIndex][colIndex] != null)
{
dt.Rows[rowIndex][colIndex] = content;
ds.WriteXml(filePath);
flag = true;
}
else
{
flag = false;
}
return flag;
}
#endregion
读取XML资源中的指定节点内容#region 读取XML资源中的指定节点内容
/**//// <summary>
/// 读取XML资源中的指定节点内容
/// </summary>
/// <param name="source">XML资源</param>
/// <param name="xmlType">XML资源类型:文件,字符串</param>
/// <param name="nodeName">节点名称</param>
/// <returns>节点内容</returns>
public static object GetNodeValue(string source, XmlType xmlType, string nodeName)
{
XmlDocument xd = new XmlDocument();
if (xmlType == XmlType.File)
{
xd.Load(source);
}
else
{
xd.LoadXml(source);
}
XmlElement xe = xd.DocumentElement;
XmlNode xn = xe.SelectSingleNode("//" + nodeName);
if (xn != null)
{
return xn.InnerText;
}
else
{
return null;
}
}
/**//// <summary>
/// 读取XML资源中的指定节点内容
/// </summary>
/// <param name="source">XML资源</param>
/// <param name="nodeName">节点名称</param>
/// <returns>节点内容</returns>
public static object GetNodeValue(string source, string nodeName)
{
if (source == null || nodeName == null || source == "" || nodeName == "" || source.Length < nodeName.Length * 2)
{
return null;
}
else
{
int start = source.IndexOf("<" + nodeName + ">") + nodeName.Length + 2;
int end = source.IndexOf("</" + nodeName + ">");
if (start == -1 || end == -1)
{
return null;
}
else if (start >= end)
{
return null;
}
else
{
return source.Substring(start, end - start);
}
}
}
#endregion
更新XML文件中的指定节点内容#region 更新XML文件中的指定节点内容
/**//// <summary>
/// 更新XML文件中的指定节点内容
/// </summary>
/// <param name="filePath">文件路径</param>
/// <param name="nodeName">节点名称</param>
/// <param name="nodeValue">更新内容</param>
/// <returns>更新是否成功</returns>
public static bool UpdateNode(string filePath, string nodeName, string nodeValue)
{
bool flag = false;
XmlDocument xd = new XmlDocument();
xd.Load(filePath);
XmlElement xe = xd.DocumentElement;
XmlNode xn = xe.SelectSingleNode("//" + nodeName);
if (xn != null)
{
xn.InnerText = nodeValue;
flag = true;
}
else
{
flag = false;
}
return flag;
}
#endregion
操作xml文件中指定节点的数据#region 操作xml文件中指定节点的数据
/**//// <summary>
/// 获得xml文件中指定节点的节点数据
/// </summary>
/// <param name="TableName"></param>
/// <returns></returns>
public static string GetNodeInfoByNodeName(string path, string nodeName)
{
string XmlString = "";
XmlDocument xml = new XmlDocument();
xml.Load(path);
System.Xml.XmlElement root = xml.DocumentElement;
System.Xml.XmlNode node = root.SelectSingleNode("//" + nodeName);
if (node != null)
{
XmlString = node.InnerText;
}
return XmlString;
}
#endregion
/**//// <summary>
/// 根据xml路径获取DataSet。如果Table名为空:flag=false 返回所有xml的数据;flag=true 将xml中的table名作为数据项返回。否则根据table名获取相应的table信息返回。
/// </summary>
/// <param name="XmlPath">xml文件路径</param>
/// <param name="TableName">所要获取的Table名,可为空</param>
/// <param name="flag">若为true,则只将所有表名作为数据项返回;若为false,则返回所要获取的Table的所有数据</param>
/// <returns>返回所获取的DataSet</returns>
/// <summary>
public static DataSet GetTableByXml(string XmlPath, string TableName, bool flag)
{
DataSet ds = new DataSet();
if (TableName == "")
{
DataSet ds1 = new DataSet();
ds1.ReadXml(XmlPath);
if (ds1.Tables.Count > 0)
{
if (flag)
{
DataTable dt = new DataTable("typeTable");
dt.Columns.Add("TableName", typeof(string));
ds.Tables.Add(dt);
for (int i = 0; i < ds1.Tables.Count; i++)
{
DataRow dr = dt.NewRow();
dr["TableName"] = ds1.Tables[i].TableName;
ds.Tables["typeTable"].Rows.Add(dr);
}
}
else
{
ds = ds1.Copy();
}
}
}
else
{
DataSet ds2 = new DataSet();
ds2.ReadXml(XmlPath);
if (ds2.Tables[TableName] != null)
{
ds.Tables.Add(ds2.Tables[TableName].Copy());
}
}
return ds;
}
/**//// <summary>
/// escape invalid Unicode in XML
/// </summary>
/// <param name="str"></param>
/// <returns></returns>
public static string Replaceinvalid(string str)
{
System.Text.RegularExpressions.Regex r = new System.Text.RegularExpressions.Regex("[\x00-\x08|\x0b-\x0c|\x0e-\x1f]");
return r.Replace(str, " ");
}
/**//// <summary>
/// 获得接口错误信息
/// </summary>
/// <param name="errCode">错误编码</param>
/// <returns></returns>
public static string GetInterfaceErrorString(string errCode)
{
System.Text.StringBuilder sb = new System.Text.StringBuilder();
sb.Append("<?xml version=\"1.0\" encoding=\"GB2312\"?>");
sb.Append("<Root>");
sb.Append("<Result><return_result>" + errCode + "</return_result></Result>");
sb.Append("</Root>");
return sb.ToString();
}
}
}