HtmlEntities

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#region GetOnlyTextFromHtmlCode + RemoveHtmlChars + RemoveTagFromHtmlCode
        /// <summary>
        /// http://www.codeproject.com/script/Content/ViewAssociatedFile.aspx?rzp=%2FKB%2Fedit%2FZetaHtmlEditControl%2F%2FZetaHtmlEditControl-Source.zip&zep=Control%2FHtmlEditControl.cs&obid=43954&obtid=2&ovid=13
        /// </summary>
        /// <param name="htmlCode"></param>
        /// <returns></returns>
        private static string getOnlyTextFromHtmlCode(string htmlCode)
        {
            //<br>
            htmlCode = htmlCode.Replace("\r\n", @" ");
            htmlCode = htmlCode.Replace("\r", @" ");
            htmlCode = htmlCode.Replace("\n", @" ");
 
            htmlCode = htmlCode.Replace(@"</p>", Environment.NewLine + Environment.NewLine);
            htmlCode = htmlCode.Replace(@"</P>", Environment.NewLine + Environment.NewLine);
 
            //html comment
            htmlCode = Regex.Replace(
                htmlCode,
                @"<!--.*?-->",
                string.Empty,
                RegexOptions.Singleline | RegexOptions.IgnoreCase);
 
            //<p>
            htmlCode = Regex.Replace(htmlCode,
                @"<br[^>]*>",
                Environment.NewLine,
                RegexOptions.Singleline | RegexOptions.IgnoreCase);
 
            //tags
            htmlCode = removeTagFromHtmlCode(@"style", htmlCode);
            htmlCode = removeTagFromHtmlCode(@"script", htmlCode);
 
            //html
            htmlCode = Regex.Replace(
                htmlCode,
                "<(.|\n)+?>",
                string.Empty,
                RegexOptions.Singleline | RegexOptions.IgnoreCase);
 
            //umlaute
            htmlCode = unescapeHtmlEntities(htmlCode);
 
            //whitespaces
            htmlCode = Regex.Replace(
                htmlCode,
                @" +",
                @" ",
                RegexOptions.Singleline | RegexOptions.IgnoreCase);
 
            return htmlCode;
        }
        /// <summary>
        /// http://dev.w3.org/html5/html-author/charref
        /// </summary>
        /// <param name="htmlCode"></param>
        /// <returns></returns>
        private static string unescapeHtmlEntities(string htmlCode)
        {            return htmlCode;
        }
 
        private static string removeTagFromHtmlCode(
            string tag,
            string htmlCode)
        {
            return Regex.Replace(
                htmlCode,
                string.Format(@"<{0}.*?</{1}>", tag, tag),
                string.Empty,
                RegexOptions.Singleline | RegexOptions.IgnoreCase);
        }
        #endregion

      htmlCode = htmlCode.Replace(@"&nbsp;", @" ");

      htmlCode = htmlCode.Replace(@"&Auml;", @"ä");
      htmlCode = htmlCode.Replace(@"&absp;", @"");
      htmlCode = htmlCode.Replace(@"&obsp;", @"");
      htmlCode = htmlCode.Replace(@"&Obsp;", @"");
      htmlCode = htmlCode.Replace(@"&ubsp;", @"");
      htmlCode = htmlCode.Replace(@"&Ubsp;", @"");
      htmlCode = htmlCode.Replace(@"&szlig;", @"ß");

      htmlCode = htmlCode.Replace(@"&pound;", @"£");
      htmlCode = htmlCode.Replace(@"&sect;", @"§");
      htmlCode = htmlCode.Replace(@"&copy;", @"©");
      htmlCode = htmlCode.Replace(@"&reg;", @"®");
      htmlCode = htmlCode.Replace(@"&micro;", @"µ");
      htmlCode = htmlCode.Replace(@"&para;", @"¶");
      htmlCode = htmlCode.Replace(@"&Oslash;", @"Ø");
      htmlCode = htmlCode.Replace(@"&oslash;", @"Ø");
      htmlCode = htmlCode.Replace(@"&divide;", @"÷");
      htmlCode = htmlCode.Replace(@"&times;", @"×");

  

posted @   ®Geovin Du Dream Park™  阅读(360)  评论(0编辑  收藏  举报
编辑推荐:
· AI与.NET技术实操系列:基于图像分类模型对图像进行分类
· go语言实现终端里的倒计时
· 如何编写易于单元测试的代码
· 10年+ .NET Coder 心语,封装的思维:从隐藏、稳定开始理解其本质意义
· .NET Core 中如何实现缓存的预热?
阅读排行:
· 25岁的心里话
· 闲置电脑爆改个人服务器(超详细) #公网映射 #Vmware虚拟网络编辑器
· 基于 Docker 搭建 FRP 内网穿透开源项目(很简单哒)
· 零经验选手,Compose 一天开发一款小游戏!
· 一起来玩mcp_server_sqlite,让AI帮你做增删改查!!
< 2025年3月 >
23 24 25 26 27 28 1
2 3 4 5 6 7 8
9 10 11 12 13 14 15
16 17 18 19 20 21 22
23 24 25 26 27 28 29
30 31 1 2 3 4 5
点击右上角即可分享
微信分享提示