C# 去掉字符串中的html 标签,保留指定的标签和属性
/// <summary> /// 使用示例 /// </summary> public static void HtmlRemove() { string requestBody = "<html><head><title>Test</title></head><body><a lay-her='123' href=\"https://example.com\">Link</a><p>Not allowed</p><span class=\"developer\"><img src=\"https://www.luocore.com/assets/logo-dark.be3794d7.png\"> <span>LuoCore</span></span><img lay-her='123' data-luo='222' src=\"图片路径\" data=\"test\" /> <a data-luo='222' href=\"baidu.com\" /> <div><span>测试标签</span><img src=\"https://pic.cnblogs.com/face/646489/20140908123308.png\" class=\"avatar\" alt=\"博主头像\"></div></body></html>"; Dictionary<string, string[]> allowedTags = new Dictionary<string, string[]>() { { "a", new string[]{ "href", "data-luo" } }, { "img", new string[]{ "src", "lay-her", "data-luo" } } }; // 过滤HTML标签 string filteredRequestBody = HtmlRemoveTagsExcept(requestBody, allowedTags); Console.WriteLine(filteredRequestBody); } /// <summary> /// html 代码移除 /// </summary> /// <param name="html"></param> /// <param name="allowedTags"></param> /// <returns></returns> public static string HtmlRemoveTagsExcept(string html, Dictionary<string, string[]> allowedTags) { // 预编译正则表达式以提高性能 var tagsPattern = new Regex($"<(/?)(?!({string.Join("|", allowedTags.Keys)})(\\s|/?>))[^>]*>", RegexOptions.IgnoreCase | RegexOptions.Compiled); var attributePatterns = allowedTags.ToDictionary( tag => tag.Key, tag => new Regex($@"\s+({string.Join("|", tag.Value.Select(attr => Regex.Escape(attr)))})\s*=\s*(['""][^'""]*['""])", RegexOptions.IgnoreCase | RegexOptions.Compiled) ); // 移除不允许的标签 string intermediateResult = tagsPattern.Replace(html, ""); // 处理允许的标签,只保留允许的属性 foreach (var tag in allowedTags.Keys) { string fullPattern = $"<{tag}(\\s[^>]*?)?(/?>)"; intermediateResult = Regex.Replace(intermediateResult, fullPattern, match => { string insideTag = match.Groups[1].Value; string tagClose = match.Groups[2].Value; string filteredAttributes = attributePatterns[tag].Matches(insideTag) .Cast<Match>() .Aggregate("", (current, attrMatch) => current + attrMatch.Value); return $"<{tag}{filteredAttributes}{tagClose}"; }, RegexOptions.IgnoreCase); } // 移除多余的空格和修正属性格式 intermediateResult = Regex.Replace(intermediateResult, @"\s{2,}", " "); intermediateResult = Regex.Replace(intermediateResult, @"<(\w+)(\s[^>]*?)?\s*(/?>)", "<$1$2$3"); return intermediateResult; }
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· Manus重磅发布:全球首款通用AI代理技术深度解析与实战指南
· 被坑几百块钱后,我竟然真的恢复了删除的微信聊天记录!
· 没有Manus邀请码?试试免邀请码的MGX或者开源的OpenManus吧
· 园子的第一款AI主题卫衣上架——"HELLO! HOW CAN I ASSIST YOU TODAY
· 【自荐】一款简洁、开源的在线白板工具 Drawnix
2023-05-16 C#支持格式最多的解压缩开源库SharpCompress
2018-05-16 C# 各种控件实现可拖动和调整大小