一段刚刚出炉的CSV文件转换为DataTable对象的代码

CSV是以文本形式保存的表格数据,具体是每列数据使用逗号分割,每行数据使用CRLF(\r\n)来结尾,如果数据值包含逗号或CRLF则使用双引号将数值包裹,如果数据值包含双引号则使用两个双引号做为转义。

 

    public static class Common
    {
        public static DataTable CSVToDataTable(string path)
        {
            //第一行是否为列名
            bool firstColumnIsName = false;

            //文件读取的位置信息
            int current = 0;

            //当前是否为一个值的开始处
            bool isNewValueBegin = true;

            //当前是否为双引号范围
            bool isDoubleMarkMange = false;

            //存储数据
            List<List<string>> datatable = new List<List<string>>();

            #region 解析文件
            using (FileStream fs = new FileStream(path, System.IO.FileMode.Open, System.IO.FileAccess.Read, System.IO.FileShare.ReadWrite))
            {
                using (System.IO.StreamReader sr = new StreamReader(fs, Encoding.Default))
                {
                    List<string> row = new List<string>();

                    string csv = sr.ReadToEnd();

                    string temp = "";

                    while (current < csv.Length)
                    {
                        char buffer = csv[current];
                        bool bufferNextExist = current + 1 < csv.Length;
                        char bufferNext = '\0';
                        if (bufferNextExist)
                        {
                            bufferNext = csv[current + 1];
                        }

                        if (isNewValueBegin && buffer == '"')
                        {
                            isDoubleMarkMange = true;
                        }
                        else if (buffer == ',' && isDoubleMarkMange == false)
                        {
                            row.Add(temp);
                            temp = "";
                            isNewValueBegin = true;
                        }
                        else if (isDoubleMarkMange && buffer == '"' && bufferNextExist && bufferNext == '"')
                        {
                            //如果在双引号范围内并且连续出现两个双引号则表示值中包含一个双引号
                            temp += buffer;
                            current++;
                        }
                        else if (isDoubleMarkMange && buffer == '"' && (bufferNextExist == false || (bufferNextExist && bufferNext != '"')))
                        {
                            //如果在双引号范围内并出现一个双引号则表示结束双引号范围
                            isDoubleMarkMange = false;
                        }
                        else if (buffer == '\r' && bufferNextExist && bufferNext == '\n')
                        {
                            //表示一个换行符,但是如果在双引号内则表示正常的文字换行
                            if (isDoubleMarkMange)
                            {
                                temp += "\r\n";
                            }
                            else
                            {
                                row.Add(temp);
                                datatable.Add(row);
                                temp = "";
                                row = new List<string>();
                                isNewValueBegin = true;
                            }
                            current++;
                        }
                        else if (buffer == '\r' && isDoubleMarkMange == false)
                        {
                            row.Add(temp);
                            datatable.Add(row);
                            temp = "";
                            row = new List<string>();
                            isNewValueBegin = true;
                        }
                        else
                        {
                            //正常字符
                            temp += buffer;
                        }

                        current++;
                    }
                }
            }
            #endregion

            #region 转换为DataTable
            DataTable dt = new DataTable();

            if (datatable.Count > 0)
            {
                int columns = datatable[0].Count;
                for (int i = 0; i < datatable.Count; i++)
                {
                    columns = Math.Max(columns, datatable[i].Count);
                }

                for (int i = 0; i < columns; i++)
                {
                    DataColumn dc = new DataColumn("column" + i);
                    if (firstColumnIsName)
                        if (datatable[0].Count > i)
                            dc.ColumnName = datatable[0][i];
                    dt.Columns.Add(dc);
                }

                for (int i = firstColumnIsName ? 1 : 0; i < datatable.Count; i++)
                {
                    DataRow dr = dt.NewRow();

                    for (int j = 0; j < datatable[i].Count; j++)
                    {
                        dr[j] = datatable[i][j];
                    }
                    dt.Rows.Add(dr);
                }
            }
            #endregion

            return dt;
        }
View Code

 

posted @ 2016-10-17 16:08  jgjg2323  阅读(285)  评论(0编辑  收藏  举报