正则表达式匹配
由于工作需要,学习了正则表达式匹配。从MongoDB导出的数据文本大致是这样的:
{
"_id" : ObjectId("55370b4112db760740809f79"),
"CartKey" : {
"CustomerId" : NumberLong(471)
},
"LastUpdate" : ISODate("2015-06-04T08:21:24.307Z"),
"Baskets" : [{
"Items" : [{
"SKU" : "5170",
"CategoryName" : "Cables & Adapters "
}],
"CountryCode" : 32
}]
}
{
"_id" : ObjectId("55769cc512db760da847d639"),
"CartKey" : {
"CustomerId" : NumberLong(1002)
},
"LastUpdate" : ISODate("2015-06-01T00:00:00Z"),
"Baskets" : [{
"Items" : [{
"SKU" : "2716",
"CategoryName" : "iPhone iPad iPod"
}],
"CountryCode" : 46
}]
}
{
"_id" : ObjectId("54b5e9d412db761c388d6c48"),
"CartKey" : {
"CustomerId" : NumberLong(4398734)
},
"LastUpdate" : ISODate("2015-06-05T03:49:11.131Z"),
"Baskets" : [{
"Items" : [{
"SKU" : "33883",
"CategoryName" : "Plugs & Sockets"
}, {
"SKU" : "126095",
"CategoryName" : "Household Thermometers"
}],
"CountryCode" : 46
}]
}
目标是将"CustomerId" : NumberLong(4398734)转化为"CustomerId" : 4398734 ,"LastUpdate" : ISODate("2015-06-05T03:49:11.131Z")转换为"LastUpdate" : "2015-06-05T03:49:11.131Z",
我使用的是NotePad++编辑器的正则替换。
查找目标的正则表达式是:"CustomerId" : NumberLong\((.*)\)
替换为:"CustomerId" : \1
日期是:"LastUpdate" : ISODate\((.*)\)
替换为"LastUpdate" : \1
"_id" : ObjectId\((.*)\),
为什么是\1了?是因为我们都知道使用()是因为group的原因。
系列文章:
http://zhoufoxcn.blog.51cto.com/792419/281956/
http://www.crifan.com/files/doc/docbook/rec_soft_npp/release/htmls/npp_func_regex_replace.html
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using CartTools.Modela;
using Newtonsoft.Json;
using CartTools.Modela.CMS;
using System.IO;
using System.Text.RegularExpressions;
namespace CartTools
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
private void button1_Click(object sender, EventArgs e)
{
string jsonStr = GetLegalJsonSource(textBox1.Text);
string content = GetExcelContent(jsonStr);
textBox2.Text = content;
}
private string GetLegalJsonSource(string text)
{
string pattern1 = @"""LastUpdate"" : ISODate\((.*)\)";
string pattern2 = @"""CustomerId"" : NumberLong\((.*)\)";
string pattern3 = @"""_id"" : ObjectId\((.*)\),";
string replaceStr1 = @"""LastUpdate"" : $1";
string replaceStr2 = @"""CustomerId"" : $1";
string replaceStr3 = "";
IDictionary<Regex, string> dict = new Dictionary<Regex, string>();
System.Text.RegularExpressions.Regex reg1 = new System.Text.RegularExpressions.Regex(pattern1, RegexOptions.IgnoreCase | RegexOptions.Multiline);
System.Text.RegularExpressions.Regex reg2 = new System.Text.RegularExpressions.Regex(pattern2, RegexOptions.IgnoreCase | RegexOptions.Multiline);
System.Text.RegularExpressions.Regex reg3 = new System.Text.RegularExpressions.Regex(pattern3, RegexOptions.IgnoreCase | RegexOptions.Multiline);
dict.Add(reg1, replaceStr1);
dict.Add(reg2, replaceStr2);
dict.Add(reg3, replaceStr3);
foreach (KeyValuePair<Regex, string> kv in dict)
{
text = kv.Key.Replace(text, kv.Value);
}
return "[" + text + "]";
}
private string GetExcelContent(string str)
{
StringBuilder sb = new StringBuilder();
IList<ShoppingCartModel> list = JsonConvert.DeserializeObject<IList<ShoppingCartModel>>(str);
sb.Append("Country,cicid,日期,SKU,分类\n");
foreach (var item in list)
{
if (item.Baskets != null && item.Baskets.Any())
{
sb.Append(item.Baskets.First().CountryCode);
}
sb.AppendFormat(",{0}", item.CartKey.CustomerId);
string skus = "";
string categories = "";
foreach (var b in item.Baskets)
{
foreach (var sku in b.Items)
{
skus += sku.SKU + ";";
categories += sku.CategoryName + ";";
}
}
sb.AppendFormat(",{0},{1},{2}\n", item.LastUpdate, skus.TrimEnd(';'), categories.TrimEnd(';'));
}
return sb.ToString().Replace(",", "\t");//excel
}
}
}