删除脚本 删除HTML标签的方法
1 public static String RMHTML(String Htmlstring) 2 { 3 4 // 删除脚本 5 6 Htmlstring = RegexPattern("<\\s*?script[^>]*>[\\s\\S]*?<\\s*?/\\s*?script\\s*?>", "", Htmlstring); 7 8 // 删除HTML 9 10 Htmlstring = RegexPattern("<([^>]*)>", "", Htmlstring); 11 12 Htmlstring = RegexPattern("([\r\n])[\\s]+", "", Htmlstring); 13 14 Htmlstring = RegexPattern("-->", "", Htmlstring); 15 16 Htmlstring = RegexPattern("<!--.*", "", Htmlstring); 17 18 Htmlstring = RegexPattern("&(quot|#34);", "\"", Htmlstring); 19 20 Htmlstring = RegexPattern("&(amp|#38);", "&", Htmlstring); 21 22 Htmlstring = RegexPattern("&(lt|#60);", "<", Htmlstring); 23 24 Htmlstring = RegexPattern("&(gt|#62);", ">", Htmlstring); 25 26 Htmlstring = RegexPattern("&(nbsp|#160);", " ", Htmlstring); 27 28 Htmlstring = RegexPattern("&(iexcl|#161);", "\\xa1", Htmlstring); 29 30 Htmlstring = RegexPattern("&(cent|#162);", "\\xa2", Htmlstring); 31 32 Htmlstring = RegexPattern("&(pound|#163);", "\\xa3", Htmlstring); 33 34 Htmlstring = RegexPattern("&(copy|#169);", "\\xa9", Htmlstring); 35 36 Htmlstring = RegexPattern("&#(\\d+);", "", Htmlstring); 37 38 Htmlstring = RegexPattern("<", "", Htmlstring); 39 40 Htmlstring = RegexPattern(">", "", Htmlstring); 41 42 //Htmlstring.replace("\r\n", "",Htmlstring); 43 44 45 46 return Htmlstring; 47 48 } 49 public static String RegexPattern(String pattern, String str, String content) 50 { 51 52 if (pattern != null && !pattern.Equals("")) 53 { 54 //RegexOptions p = System.Text.RegularExpressions.RegexOptions.Compiled(pattern, 2); //参数2表示大小写不区分 55 //Math m = (content); 56 //content = m.replaceAll(str); 57 content = Regex.Replace(content, pattern, str, RegexOptions.IgnoreCase); 58 } 59 return content; 60 }
本文来自博客园,作者:mushishi,转载请注明原文链接:https://www.cnblogs.com/mushishi/archive/2013/05/31/3110005.html