利用WebClient和WebRequest类获得网页源代码

关键是ValidateRequest="false",要不然会说request.form,报错

<%@ Page Language="C#" AutoEventWireup="true" CodeFile="Default3.aspx.cs" Inherits="test_Default3"  ValidateRequest="false"%>

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">

<html xmlns="http://www.w3.org/1999/xhtml" >
<head runat="server">
    
<title>无标题页</title>
</head>
<body>
    
<form id="form1" runat="server">
     
<div align="center" style="FONT-WEIGHT: bold">得到任意网页源代码</div>
      
<asp:TextBox id="UrlText" runat="server" Width="400px">http://boco.com.cn
          
</asp:TextBox>
      
<asp:Button id="WebClientButton" Runat="server" Text="用WebClient得到" OnClick="WebClientButton_Click"></asp:Button>
      
<asp:Button id="WebRequestButton" runat="server" Text="用WebRequest得到" OnClick="WebRequestButton_Click"></asp:Button>
        
<asp:Button ID="Button1" runat="server" OnClick="GetText_Click" Text="Button" /><br>
      
<asp:TextBox id="ContentHtml" runat="server" Width="100%" Height="360px" TextMode="MultiLine">
          
</asp:TextBox>

    
</form>
</body>
</html>

private string PageUrl = "";

protected void WebClientButton_Click(object sender, System.EventArgs e)
    
{
        PageUrl 
= UrlText.Text;
        WebClient wc 
= new WebClient();
        wc.Credentials 
= CredentialCache.DefaultCredentials;

        
///方法一:
        Byte[] pageData = wc.DownloadData(PageUrl);
        ContentHtml.Text 
= Encoding.Default.GetString(pageData);


        
/// 方法二:
        
/// ***************代码开始**********
        
/// Stream resStream = wc.OpenRead(PageUrl);
        
/// StreamReader sr = new StreamReader(resStream,System.Text.Encoding.Default);
        
/// ContentHtml.Text = sr.ReadToEnd();
        
/// resStream.Close();
        
/// **************代码结束********
        
/// 

        wc.Dispose();
    }


    
protected void WebRequestButton_Click(object sender, System.EventArgs e)
    
{
        PageUrl 
= UrlText.Text;
        WebRequest request 
= WebRequest.Create(PageUrl);
        WebResponse response 
= request.GetResponse();
        Stream resStream 
= response.GetResponseStream();
        StreamReader sr 
= new StreamReader(resStream, System.Text.Encoding.Default);
        ContentHtml.Text 
= sr.ReadToEnd();
        resStream.Close();
        sr.Close();
    }


    
protected void GetText_Click(object sender, System.EventArgs e)
    
{
        PageUrl 
= UrlText.Text;
        WebRequest request 
= WebRequest.Create(PageUrl);
        WebResponse response 
= request.GetResponse();
        Stream resStream 
= response.GetResponseStream();
        StreamReader sr 
= new StreamReader(resStream, System.Text.Encoding.Default);
        ContentHtml.Text 
= sr.ReadToEnd();
        resStream.Close();
        sr.Close();
        ContentHtml.Text 
= Regex.Replace(ContentHtml.Text, "<[^>]*>""");
        
//替换空格
        ContentHtml.Text = Regex.Replace(ContentHtml.Text, "\\s+"" ");
    }

posted on 2007-11-29 22:21  执法长老  阅读(246)  评论(0编辑  收藏  举报

导航