C#读取HTML文件内容写入记事本

try
            {
                int totalFile = 0;
                //string dirPath = @"E:\chfuMetarnet\BSC6810 alarm\";
                if (this.textBox1.Text.Trim() == "")
                {
                    MessageBox.Show("请输入HTML文件路径!");
                }
                else
                {
                    string dirPath = this.textBox1.Text.Trim();
                    if (!dirPath.Substring(dirPath.Length - 1).Contains("\\"))
                    {
                        dirPath = dirPath+"\\";
                    }
                    StreamWriter sw;

                    DirectoryInfo dirInfo = new DirectoryInfo(dirPath);
                    FileInfo[] files = dirInfo.GetFiles();
                    string filename = dirPath + "告警经验库信息.txt";
                    if (File.Exists(filename))
                    {
                        sw = File.AppendText(filename);
                    }
                    else
                    {
                        sw = File.CreateText(filename);
                    }
                    foreach (FileInfo fileinfo in files)
                    {
                        if (fileinfo.Extension.Equals(".htm"))//遍历所有htm文件
                        {
                            totalFile = totalFile + 1;
                            WebRequest myWebRequest = WebRequest.Create(dirPath + fileinfo.Name);
                            WebResponse myWebResponse = myWebRequest.GetResponse();
                            Stream myStream = myWebResponse.GetResponseStream();
                            Encoding encode = System.Text.Encoding.GetEncoding("gb2312");
                            StreamReader myStreamReader = new StreamReader(myStream, encode);
                            string strhtml = myStreamReader.ReadToEnd();
                            myWebResponse.Close();
                            string stroutput = strhtml;
                            Regex regex = new Regex(@"<[^>]+>|</[^>]+>");//去掉HTML标记的正则表达式
                            string tmpStr = "<h4>([^<]*)</h4>";        //获取<h4>之间内容的表达式
                            Match TitleMatch = Regex.Match(strhtml, tmpStr, RegexOptions.IgnoreCase | RegexOptions.Multiline);
                            string causename = TitleMatch.Value.ToString();//包含<h4>和</h4>标记
                            causename = Regex.Replace(causename, "[\n|\r|\t]", " ");//去掉换行和TAB键符号
                            causename = causename.Trim();
                            string cause = causename.Substring(4, causename.Length - 9);//得到告警原因
                            string titleStr = "<title>([^<]*)</title>";
                            TitleMatch = Regex.Match(strhtml, titleStr, RegexOptions.IgnoreCase | RegexOptions.Multiline);
                            string titlename = TitleMatch.Value.ToString();
                            titlename = Regex.Replace(titlename, "[\n|\r|\t]", "");//去掉换行和TAB键符号
                            titlename = titlename.Trim();
                            string regexStr = "<ul><li>(?<key>.*?)</ul>";//获取<ul><li>后边的内容,直到</ul>结尾
                            Regex r = new Regex(regexStr, RegexOptions.None);
                            strhtml = Regex.Replace(strhtml, "[\n|\r|\t]", "");//去掉换行和TAB键符号
                            Match mc = r.Match(strhtml);
                            string dataStr = mc.Groups["key"].Value;
                            dataStr = "<ul><li>" + dataStr + "</ul>";//得到完整的<ul></ul>之间的源码
                            strhtml = strhtml.Replace(dataStr, "");//将去掉换行符和tab键的源码中去除<ul></ul>部分源码
                            strhtml = strhtml.Replace(titlename, "");//去掉<title></title>
                            strhtml = regex.Replace(strhtml, " ");//过滤掉HTML标记
                            strhtml = strhtml.Replace("&nbsp;", "");//去掉空格字符
                            string[] arr = cause.Split(' ');
                            string zhCause = arr[arr.Length - 1];//获取数组最后一个元素:告警原因
                            sw.WriteLine("第" + totalFile + "个文件:" + fileinfo.Name);
                            sw.WriteLine("-----告警原因------:");
                            //sw.WriteLine(cause);// ALM-1 网元启动
                            zhCause=this.chinaString(zhCause);
                            sw.WriteLine(zhCause);//网元启动
                            sw.WriteLine("-----处理经验------:");
                            sw.WriteLine(strhtml);
                            sw.WriteLine();
                            sw.Flush();

                        }
                    }
                    sw.Close();
                    MessageBox.Show("操作成功!", "提示", MessageBoxButtons.OK, MessageBoxIcon.Information);
                }
            }
            catch (Exception ee)
            {
                MessageBox

posted on 2010-03-31 13:09  tianyaxiang  阅读(3231)  评论(0编辑  收藏  举报

导航