淘宝店铺图片数据迁移核心代码

核心代码
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Data;
using System.Data.OleDb;
using System.Threading;
using System.Net;
using System.IO;
using System.Text.RegularExpressions;

namespace ConsoleA
{

    
class Image
    {
        
public string Key { getset; }
        
public string ImageUrl { getset; }
    }
    
class Program
    {
        
static Dictionary<int, List<string>> PathDic = new Dictionary<int, List<string>>();
        
static Dictionary<int, List<Image>> ImgDic = new Dictionary<int, List<Image>>();
        
static string DBPATH = @"E:\精品\1\318.xls";

        
//小叶
        static string tb1="<A href=\"http://amos.im.alisoft.com/msg.aw?v=2&amp;uid=billowye&amp;site=cntaobao&amp;s=1&amp;charset=utf-8\" target=_blank><IMG border=0 alt=客服在线:小叶 align=absMiddle src=\"http://amos.im.alisoft.com/online.aw?v=2&amp;uid=billowye&amp;site=cntaobao&amp;s=1&amp;charset=utf-8\"></A>";
        
static string rtb1 = "<a target=\"_blank\" href=\"http://amos1.taobao.com/msg.ww?v=2&uid=hubaochan&s=1\" ><img border=\"0\" src=\"http://amos1.taobao.com/online.ww?v=2&uid=hubaochan&s=1\" alt=\"点击这里给我发消息\" /></a>";
       
        
static string tb2="<A href=\"http://amos.im.alisoft.com/msg.aw?v=2&amp;uid=%E7%B1%B3%E5%BE%B7%E5%88%9B%E6%84%8F&amp;site=cntaobao&amp;s=1&amp;charset=utf-8\" target=_blank><IMG border=0 alt=客服在线:小米 align=absMiddle src=\"http://amos.im.alisoft.com/online.aw?v=2&amp;uid=米德创意&amp;site=cntaobao&amp;s=1&amp;charset=utf-8\" width=77 height=18></A>";
        
static string rtb2 = "<a target=\"_blank\" href=\"http://amos1.taobao.com/msg.ww?v=2&uid=zengrunfeng&s=1\" ><img border=\"0\" src=\"http://amos1.taobao.com/online.ww?v=2&uid=zengrunfeng&s=1\" alt=\"点击这里给我发消息\" /></a>";
       
        
static string tb3="<A href=\"http://amos.im.alisoft.com/msg.aw?v=2&amp;uid=happyluck77&amp;site=cntaobao&amp;s=1&amp;charset=utf-8\" target=_blank><IMG border=0 alt=客服在线:小伍 align=absMiddle src=\"http://amos.im.alisoft.com/online.aw?v=2&amp;uid=happyluck77&amp;site=cntaobao&amp;s=1&amp;charset=utf-8\"></A>";
        
static string rtb3 = "<a target=\"_blank\" href=\"http://amos1.taobao.com/msg.ww?v=2&uid=gensonzeng&s=1\" ><img border=\"0\" src=\"http://amos1.taobao.com/online.ww?v=2&uid=gensonzeng&s=1\" alt=\"点击这里给我发消息\" /></a>";
      

        
static string tb4="<A href=\"http://amos.im.alisoft.com/msg.aw?v=2&amp;uid=happyluck77&amp;site=cntaobao&amp;s=1&amp;charset=utf-8\" target=_blank><IMG border=0 alt=客服在线:小静 align=absMiddle src=\"http://amos.im.alisoft.com/online.aw?v=2&amp;uid=世纪天使的故事&amp;site=cntaobao&amp;s=1&amp;charset=utf-8\"></A>";
        
static string rtb4 = "<a target=\"_blank\" href=\"http://amos1.taobao.com/msg.ww?v=2&uid=zengrunfeng&s=1&zkaction=1000\" ><img border=\"0\" src=\"http://amos1.taobao.com/online.ww?v=2&uid=zengrunfeng&s=1\" alt=\"点击这里给我发消息\" /></a>";
        
static string dianpu = @"http://micc.taobao.com/view_page-64967943.htm";
        
static string rdianpu = @"http://fav365.taobao.com/view_page-70328410.htm";


        
static int replacecount = 1;
        
static void Main(string[] args)
        {

            
           
// DownLoad();

            T2DataTableToDict();
           
// Console.Read();

            DataTable dt 
= ExcelToDT(DBPATH, "t");

            
int count = 1;

            Dictionary
<stringstring> dict = T2DataTableToDict();
            
foreach (DataRow dr in dt.Rows)//原数据
            {
                
string strdesc = dr["宝贝描述"].ToString();
                List
<Image> pics = new List<Image>();
                var images 
= GetHtmlImageUrlList(strdesc);//取出图
                foreach (string item in images)
                {
                    Image myimg 
= new Image() { ImageUrl = item, Key = MiccFileKey(item) };
                    pics.Add(myimg);
                }

                
foreach (KeyValuePair<stringstring> item in dict)
                {
                    
foreach (Image img in pics)
                    {
                        
if (item.Key == img.Key)
                        {
                            strdesc 
= strdesc.Replace(img.ImageUrl, item.Value);
                            Console.WriteLine(img.ImageUrl 
+ "替换为" + item.Value);
                            replacecount
++;
                        }
                    }
                }

                strdesc 
= strdesc.Replace(dianpu, rdianpu);

                
//strdesc.Contains(tb1);

                
//strdesc = strdesc.Replace(tb1,rtb1);
                
//strdesc = strdesc.Replace(tb2, rtb2);
                
//strdesc = strdesc.Replace(tb3, rtb3);
                
//strdesc = strdesc.Replace(tb4, rtb4);


                strdesc 
= strdesc.Replace("uid=billowye""uid=hubaochan");
                strdesc 
= strdesc.Replace("uid=米德创意""uid=zengrunfeng");
                strdesc 
= strdesc.Replace("uid=%E7%B1%B3%E5%BE%B7%E5%88%9B%E6%84%8F""uid=zengrunfeng");
                strdesc 
= strdesc.Replace("uid=happyluck77""uid=gensonzeng");


                strdesc 
= strdesc.Replace("小叶""小胡");
                strdesc 
= strdesc.Replace("小米""小曾");

                UPdateDT(strdesc, dr[
"Id"].ToString(), DBPATH);

                Console.WriteLine(
"完成第" + count + "条替换");
                count
++;


            }
            Console.WriteLine(
"完成");
            Console.WriteLine(replacecount 
+ "条替换");
            Console.Read();

        }

        
public static void DownLoad()
        {
            DataTable dt 
= ExcelToDT(DBPATH, "t");
            SavePictureFromUrl(dt);
        }

        
/// <summary>
        
/// 获取t2  table
        
/// </summary>
        
/// <returns></returns>
        public static Dictionary<stringstring> T2DataTableToDict()
        {

            DataTable dt 
= ExcelToDT(DBPATH, "t2");


            Dictionary
<stringstring> dict = new Dictionary<stringstring>();
            
foreach (DataRow dr in dt.Rows)
            {
                
string str = dr[0].ToString();
                
string[] strArray = str.Split(new char[] { '^' }, StringSplitOptions.RemoveEmptyEntries);
                
string key = strArray[1].Replace(";""").Trim();
                
try
                {
                    dict.Add(key, strArray[
0].Trim());
                }
                
catch (Exception ex)
                {
                    Console.WriteLine(key);
                    Console.WriteLine(
"出错" + ex.Message);
                }

            }
            
return dict;
        }

        
/// <summary>
        
/// 取得HTML中所有图片的 URL。
        
/// </summary>
        
/// <param name="sHtmlText">HTML代码</param>
        
/// <returns>图片的URL列表</returns>
        public static string[] GetHtmlImageUrlList(string sHtmlText)
        {
            
// 定义正则表达式用来匹配 img 标签
            Regex regImg = new Regex(@"<img\b[^<>]*?\bsrc[\s\t\r\n]*=[\s\t\r\n]*[""']?[\s\t\r\n]*(?<imgUrl>[^\s\t\r\n""'<>]*)[^<>]*?/?[\s\t\r\n]*>", RegexOptions.IgnoreCase);

            
// 搜索匹配的字符串
            MatchCollection matches = regImg.Matches(sHtmlText);

            
int i = 0;
            
string[] sUrlList = new string[matches.Count];

            
// 取得匹配项列表
            foreach (Match match in matches)
            {

                sUrlList[i
++= match.Groups["imgUrl"].Value;
            }

            
return sUrlList;
        }



        
static string MiccFileKey(string miccfile)
        {
            miccfile 
= miccfile.Trim();
            
if (miccfile.StartsWith("http://", StringComparison.InvariantCulture))
            {
                
string tmp = miccfile.Replace("http://""");
                tmp 
= tmp.Replace("/""@");
                
if (tmp.Length > 50)
                {
                    tmp 
= tmp.Substring(050);
                }

                
return tmp;
            }
            
throw new Exception(miccfile);

        }


        
static DataTable ExcelToDT(string Path, string tableName)
        {

            
string strConn = "Provider=Microsoft.Jet.OLEDB.4.0;" + "Data Source=" + Path + ";" + "Extended Properties='Excel 8.0;'";
            OleDbConnection conn 
= new OleDbConnection(strConn);
            
if (conn.State != ConnectionState.Open)
            {
                conn.Open();
            }
            
string strExcel = "";
            OleDbDataAdapter myCommand 
= null;
            DataSet ds 
= null;
            strExcel 
= "select * from [" + tableName + "$]";
            myCommand 
= new OleDbDataAdapter(strExcel, strConn);
            ds 
= new DataSet();
            myCommand.Fill(ds, 
"table1");
            conn.Close();
            
return ds.Tables["table1"];

        }


        
static void UPdateDT(string desc, string id, string Path)
        {

            
string strConn = "Provider=Microsoft.Jet.OLEDB.4.0;" + "Data Source=" + Path + ";" + "Extended Properties='Excel 8.0;'";
            OleDbConnection conn 
= new OleDbConnection(strConn);
          
            conn.Open();
            
            OleDbCommand myCommand 
= null;

            
try
            {
                
string updatesql = "Update [t$] set 宝贝描述=@desc,城市='佛山',省='广东' where ID=" + id;
                myCommand 
= new OleDbCommand(updatesql, conn);
                myCommand.Parameters.Add(
new OleDbParameter("desc", desc));
                
int result = myCommand.ExecuteNonQuery();
                
if (result == 1)
                {
                    Console.WriteLine(
"更新数据库成功");
                }
            }
            
catch (Exception ex)
            {
                Console.WriteLine(id);
            }
            
finally
            {
               conn.Close();       
            }


        }

        
static void SavePictureFromUrl(DataTable dt)
        {
          
            List
<string> PathList = new List<string>();
            List
<string> tempPathList = new List<string>();
            
int pathTempNum = 0;
            
int DicKey = 0;
            
foreach (DataRow row in dt.Rows)
            {
                
if (pathTempNum == 0)
                {
                    tempPathList 
= new List<string>();
                }
                
string description = row["宝贝描述"].ToString();
                

                
foreach (string str in GetHtmlImageUrlList(description))
                {
                    PathList.Add(str);
                    tempPathList.Add(str);
                    pathTempNum
++;
                }

                
if (pathTempNum > 100)
                {
                    PathDic.Add(DicKey, tempPathList);
                    DicKey
++;
                    pathTempNum 
= 0;
                }
            }
            ThreadPool.SetMaxThreads(
100100);
            
foreach (int key in PathDic.Keys)
            {
                ThreadPool.QueueUserWorkItem(
new WaitCallback(SavePicFromDic), key);
            }
        }


        
static void SavePicFromDic(object DicKey)
        {
            
foreach (string path in PathDic[Convert.ToInt32(DicKey)])
            {
                Console.WriteLine(DicKey.ToString() 
+ path);
                SavePictureFromHTTP(path, 
@"E:\精品\excel\ConsoleA\ConsoleA\taobao18\" + path.Substring(7).Replace('/''@'));
            }
        }
        
static void SavePictureFromHTTP(string url, string path)
        {
            
try
            {
                
long fileLength = 0;
                WebRequest webReq 
= WebRequest.Create(url);
                WebResponse webRes 
= webReq.GetResponse();
                fileLength 
= webRes.ContentLength;
                Stream srm 
= webRes.GetResponseStream();
                StreamReader srmReader 
= new StreamReader(srm);
                
byte[] bufferbyte = new byte[fileLength];
                
int allByte = (int)bufferbyte.Length;
                
int startByte = 0;
                
while (fileLength > 0)
                {
                    
int downByte = srm.Read(bufferbyte, startByte, allByte);
                    
if (downByte == 0) { break; };
                    startByte 
+= downByte;
                    allByte 
-= downByte;
                }
                
if (!File.Exists(path))
                {
                    
string tempPath = path;
                    FileStream fs 
= new FileStream(tempPath, FileMode.OpenOrCreate, FileAccess.Write);
                    fs.Write(bufferbyte, 
0, bufferbyte.Length);
                    srm.Close();
                    srmReader.Close();
                    fs.Close();
                    Console.WriteLine(
"已经下载" + url);
                }
                
else
                {
                    
//path = path.Insert(path.LastIndexOf('.'), Guid.NewGuid().ToString());
                }

            }
            
catch (Exception ex)
            {
                Console.WriteLine(ex.Message);
            }
        }



    }
}
posted @ 2011-03-18 17:27  永不言败  阅读(718)  评论(0编辑  收藏  举报