背景:做一件代发业务的时候,订单中的产品描述含有大量广告,公司业务需要动态添加/删除指定的数据
Dictionary<string, string> keyValues = new Dictionary<string, string>();
foreach (DataRow dataRow in dataRowCollection)
{
var sp = dataRow["Specification"].ToString();
var sku = dataRow["ProductId"].ToString();
Document document = HtmlParser.Parse(sp);
var q = Selector.Bind(document);
var trs = q("tr");
foreach (var tr in trs)
{
var tds = q(tr).find("td");
if (tds.length >= 3)
{
keyValues.Add(sku, sp);
break;
}
}
}
if (!string.IsNullOrEmpty(detail.description))
{
Document document = HtmlParser.Parse(detail.description);
var q = HtmlParser.Query(detail.description);
var ps = q("p");
foreach (var item in ps)
{
if (Regex.IsMatch(item.textContent, @"[\u4e00-\u9fa5]"))
{
string s = Regex.Replace(item.textContent, @"[\u4e00-\u9fa5]", "");
item.textContent = s;
}
}
var spans = q("span");
foreach (var item in spans)
{
if (Regex.IsMatch(item.textContent, @"[\u4e00-\u9fa5]"))
{
string s = Regex.Replace(item.textContent, @"[\u4e00-\u9fa5]", "");
item.textContent = s;
}
}
var fonts = q("font");
foreach (var item in fonts)
{
if (Regex.IsMatch(item.textContent, @"[\u4e00-\u9fa5]"))
{
string s = Regex.Replace(item.textContent, @"[\u4e00-\u9fa5]", "");
item.textContent = s;
}
if (item.textContent.Contains("微信号"))
{
item.textContent = item.textContent.Replace("微信号", "");
}
if (item.textContent.ToLower().Contains("wechat"))
{
item.textContent = item.textContent.Replace("WeChat", "");
item.textContent = item.textContent.Replace("wechat", "");
}
if (item.textContent.ToLower().Contains("qq"))
{
item.textContent = item.textContent.Replace("qq", "");
item.textContent = item.textContent.Replace("QQ", "");
}
}
var imgs = q("img");
foreach (var item in imgs)
{
var src = item.getAttribute("src");
if (src.Contains("data:image"))
{
item.setAttribute("src", "");
}
if (src.Contains("btbimage"))
{
item.setAttribute("src", "");
}
if (src.Contains("https://timgsa.baidu.com/"))
{
item.setAttribute("src", "");
}
}
detail.description = document.ToString();
}