代码改变世界

利用 WebClient 实现下载并另存为txt 格式的文本文件

2010-08-07 18:08  音乐让我说  阅读(537)  评论(0编辑  收藏  举报

前几天看到同事在网上复制、粘贴管理方面的文章,一遍一遍地重复,这让我想到可不可写一个程序来完成呢,于是上网查资料,终于给他解决了,代码如下:

 

using System;
using System.Collections.Generic;
using System.Linq;
using System.Web;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.Text.RegularExpressions;
using System.IO;
using System.Text;

namespace WebUI
{
    public partial class TestWebClient : System.Web.UI.Page
    {
        protected void Page_Load(object sender, EventArgs e)
        {

        }

        protected void btnDownLoad_Click(object sender, EventArgs e)
        {
            for (int i = 1507; i <= 1507; i++)
            {
                string url = "http://www.ccmcsz.com/management/" + i + ".htm";
                Response.Write(url);
                SetLog(url, i.ToString());
                Response.Write("<br/>");
            }
        }
        public void SetLog(string url, string name)
        {
            try
            {
                string filepath = @"D:\Test163\";
                Encoding defaultencode = Encoding.GetEncoding("gb2312");
                string FileName = name + ".txt";

                string NewFilePath = Path.Combine(filepath, FileName);
                if (!Directory.Exists(filepath))
                {
                    Directory.CreateDirectory(filepath);
                }
                System.Net.WebClient wc = new System.Net.WebClient();
                Stream ss = wc.OpenRead(url);
                StreamReader rd = new StreamReader(ss, defaultencode);
                string message = rd.ReadToEnd();
                rd.Close();
                wc.Dispose();
                message = DelHTML(message);
                StreamWriter Sw = new StreamWriter(NewFilePath, true, defaultencode);
                Sw.Write(message);
                Sw.Flush();
                Sw.Close();
                Sw = null;
            }
            catch
            {
                this.Response.Write(url + "<br/>");
            }

        }

        public static string DelHTML(string Htmlstring)
        {
            //删除脚本
            Htmlstring = Regex.Replace(Htmlstring, @"([\r\n])[\s]+", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>", "", RegexOptions.IgnoreCase);
            //删除HTML
            Htmlstring = Regex.Replace(Htmlstring, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"([\r\n])[\s]+", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"-->", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"<!--.*", "", RegexOptions.IgnoreCase);
            //Htmlstring = Regex.Replace(Htmlstring,@"<A>.*</A>","");
            //Htmlstring = Regex.Replace(Htmlstring,@"<[a-zA-Z]*=\.[a-zA-Z]*\?[a-zA-Z]+=\d&\w=%[a-zA-Z]*|[A-Z0-9]","");
            Htmlstring = Regex.Replace(Htmlstring, @"&(quot|#34);", "\"", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&(amp|#38);", "&", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&(lt|#60);", "<", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&(gt|#62);", ">", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&(nbsp|#160);", " ", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&(iexcl|#161);", "\xa1", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&(cent|#162);", "\xa2", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&(pound|#163);", "\xa3", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&(copy|#169);", "\xa9", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&#(\d+);", "", RegexOptions.IgnoreCase);
            Htmlstring.Replace("<", "");
            Htmlstring.Replace(">", "");
            Htmlstring.Replace("\r\n", "");
            //Htmlstring=HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim();
            return Htmlstring;
        } 

    }
}

 

等待更新...