string regstr = @"(?i)(?<=<td.*?.*?>)[^<]+(?=</td>)"; //提取td的文字
string regstr = @"<a\s+href=(?<url>.+?)>(?<content>.+?)</a>"; //提取链接的内容
string regstr = @"<td.+?><a\s+href=(?<url>.+?)>(?<content>.+?)</a></td>"; //提取TD中链接的内容
string regstr = @"<td.+?><span.+?>(?<content>.+?)</span></td>"; //提取TD中span的内容
string regstr = @"<td.+?>(?<content>.+?)</td>"; //获取TD之间所有的内容
string regstr = @"<td>(?<content>.+?)-<font color=#0000ff>推荐</font></td>"; //获取内容
正则替换:
一循环内替换:
string regstr = @"(?i)[\<]td.*?[\>].*?(</td>)"; //提取页面所有TD内容
string regReplace = @"(?i)[\<]td.*?[\>]"; //将所有<td......> 替换成<td>
Regex reg = new Regex(regstr, RegexOptions.IgnoreCase | RegexOptions.Singleline);
MatchCollection mc = reg.Matches(data);
foreach (Match m in mc)
{
Console.WriteLine(m.Groups[0].ToString());
Console.WriteLine("------------------------------");
string s = Regex.Replace(m.Groups[0].ToString(), regReplace, "<td>", RegexOptions.IgnoreCase);
Console.WriteLine(s);
}
二统一替换
string regstr = @"(?i)[\<]td.*?[\>].*?(</td>)"; //提取页面所有TD内容
string regReplace = @"(?i)[\<]td.*?[\>]"; //将所有<td......> 替换成<td>
Regex reg = new Regex(regstr, RegexOptions.IgnoreCase | RegexOptions.Singleline);
string s = Regex.Replace(data, regReplace, "<td>", RegexOptions.IgnoreCase);
MatchCollection mc = reg.Matches(s);
foreach (Match m in mc)
{
Console.WriteLine(m.Groups[0].ToString());
Console.WriteLine("------------------------------");
}