C#字符串处理之清除Html&XML标签
///
///去除html标签
///思路:用正则匹配到html标签,然后replace即可
///
///
///
///
public static string ContentReplace(string input)
{
input = Regex.Replace(input, @"<([^>]*)>", "", RegexOptions.IgnoreCase);
input = Regex.Replace(input, @"([\r\n])[\s]+", "", RegexOptions.IgnoreCase);
input = Regex.Replace(input, @"&#(\d+);", "", RegexOptions.IgnoreCase);
input = Regex.Replace(input, @"&(quot);", """, RegexOptions.IgnoreCase);
input = Regex.Replace(input, @"&(amp);", "&", RegexOptions.IgnoreCase);
input = Regex.Replace(input, @"&(lt);", "<", RegexOptions.IgnoreCase);
input = Regex.Replace(input, @"&(gt);", ">", RegexOptions.IgnoreCase);
input = Regex.Replace(input, @"&(nbsp);", " ", RegexOptions.IgnoreCase);
//处理答案序号
input = Regex.Replace(input, @"$、", "", RegexOptions.IgnoreCase);
input.Replace("<", "");
input.Replace(">", "");
input.Replace("\r\n", "");
//去两端空格,中间多余空格
input = Regex.Replace(input.Trim(), "\s+", " ");
return input;
}
posted on 2019-08-15 21:18 NoMatterTryAgain 阅读(482) 评论(0) 编辑 收藏 举报