云在青天部落阁

独学而无友,则孤陋而寡闻。做一个灵魂有趣的人!
扩大
缩小

C#字符串处理之清除Html&XML标签

///


///去除html标签
///思路:用正则匹配到html标签,然后replace即可
///

///
///
///
public static string ContentReplace(string input)
{
input = Regex.Replace(input, @"<([^>]*)>", "", RegexOptions.IgnoreCase);
input = Regex.Replace(input, @"([\r\n])[\s]+", "", RegexOptions.IgnoreCase);
input = Regex.Replace(input, @"&#(\d+);", "", RegexOptions.IgnoreCase);
input = Regex.Replace(input, @"&(quot);", """, RegexOptions.IgnoreCase);
input = Regex.Replace(input, @"&(amp);", "&", RegexOptions.IgnoreCase);
input = Regex.Replace(input, @"&(lt);", "<", RegexOptions.IgnoreCase);
input = Regex.Replace(input, @"&(gt);", ">", RegexOptions.IgnoreCase);
input = Regex.Replace(input, @"&(nbsp);", " ", RegexOptions.IgnoreCase);
//处理答案序号
input = Regex.Replace(input, @"$、", "", RegexOptions.IgnoreCase);
input.Replace("<", "");
input.Replace(">", "");
input.Replace("\r\n", "");
//去两端空格,中间多余空格
input = Regex.Replace(input.Trim(), "\s+", " ");
return input;
}

posted on 2019-08-15 21:18  NoMatterTryAgain  阅读(482)  评论(0编辑  收藏  举报

导航