HtmlEntities
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 | #region GetOnlyTextFromHtmlCode + RemoveHtmlChars + RemoveTagFromHtmlCode /// <summary> /// http://www.codeproject.com/script/Content/ViewAssociatedFile.aspx?rzp=%2FKB%2Fedit%2FZetaHtmlEditControl%2F%2FZetaHtmlEditControl-Source.zip&zep=Control%2FHtmlEditControl.cs&obid=43954&obtid=2&ovid=13 /// </summary> /// <param name="htmlCode"></param> /// <returns></returns> private static string getOnlyTextFromHtmlCode( string htmlCode) { //<br> htmlCode = htmlCode.Replace( "\r\n" , @" " ); htmlCode = htmlCode.Replace( "\r" , @" " ); htmlCode = htmlCode.Replace( "\n" , @" " ); htmlCode = htmlCode.Replace( @"</p>" , Environment.NewLine + Environment.NewLine); htmlCode = htmlCode.Replace( @"</P>" , Environment.NewLine + Environment.NewLine); //html comment htmlCode = Regex.Replace( htmlCode, @"<!--.*?-->" , string .Empty, RegexOptions.Singleline | RegexOptions.IgnoreCase); //<p> htmlCode = Regex.Replace(htmlCode, @"<br[^>]*>" , Environment.NewLine, RegexOptions.Singleline | RegexOptions.IgnoreCase); //tags htmlCode = removeTagFromHtmlCode( @"style" , htmlCode); htmlCode = removeTagFromHtmlCode( @"script" , htmlCode); //html htmlCode = Regex.Replace( htmlCode, "<(.|\n)+?>" , string .Empty, RegexOptions.Singleline | RegexOptions.IgnoreCase); //umlaute htmlCode = unescapeHtmlEntities(htmlCode); //whitespaces htmlCode = Regex.Replace( htmlCode, @" +" , @" " , RegexOptions.Singleline | RegexOptions.IgnoreCase); return htmlCode; } /// <summary> /// http://dev.w3.org/html5/html-author/charref /// </summary> /// <param name="htmlCode"></param> /// <returns></returns> private static string unescapeHtmlEntities( string htmlCode) { return htmlCode; } private static string removeTagFromHtmlCode( string tag, string htmlCode) { return Regex.Replace( htmlCode, string .Format( @"<{0}.*?</{1}>" , tag, tag), string .Empty, RegexOptions.Singleline | RegexOptions.IgnoreCase); } #endregion |
htmlCode = htmlCode.Replace(@" ", @" ");
htmlCode = htmlCode.Replace(@"Ä", @"ä");
htmlCode = htmlCode.Replace(@"&absp;", @"");
htmlCode = htmlCode.Replace(@"&obsp;", @"");
htmlCode = htmlCode.Replace(@"&Obsp;", @"");
htmlCode = htmlCode.Replace(@"&ubsp;", @"");
htmlCode = htmlCode.Replace(@"&Ubsp;", @"");
htmlCode = htmlCode.Replace(@"ß", @"ß");
htmlCode = htmlCode.Replace(@"£", @"£");
htmlCode = htmlCode.Replace(@"§", @"§");
htmlCode = htmlCode.Replace(@"©", @"©");
htmlCode = htmlCode.Replace(@"®", @"®");
htmlCode = htmlCode.Replace(@"µ", @"µ");
htmlCode = htmlCode.Replace(@"¶", @"¶");
htmlCode = htmlCode.Replace(@"Ø", @"Ø");
htmlCode = htmlCode.Replace(@"ø", @"Ø");
htmlCode = htmlCode.Replace(@"÷", @"÷");
htmlCode = htmlCode.Replace(@"×", @"×");
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· AI与.NET技术实操系列:基于图像分类模型对图像进行分类
· go语言实现终端里的倒计时
· 如何编写易于单元测试的代码
· 10年+ .NET Coder 心语,封装的思维:从隐藏、稳定开始理解其本质意义
· .NET Core 中如何实现缓存的预热?
· 25岁的心里话
· 闲置电脑爆改个人服务器(超详细) #公网映射 #Vmware虚拟网络编辑器
· 基于 Docker 搭建 FRP 内网穿透开源项目(很简单哒)
· 零经验选手,Compose 一天开发一款小游戏!
· 一起来玩mcp_server_sqlite,让AI帮你做增删改查!!