Hello,World

细说CSV

        CSV全称是Comma-Separated Values(逗号分隔值)。作为一种数据传输与存储的格式,它显然没有xml,json强大,只能进行一些二维数组的数据处理,但它在项目还是经常会用到.

CSV的字符规则:

1 开头是不留空,以行为单位。

2 可含或不含列名,含列名则居文件第一行。

3 一行数据不跨行,无空行。

4 以半角逗号(即,)作分隔符,列为空也要表达其存在。

5 列内容如存在半角逗号(即,)换行符\r(10)\n(13)则用半角引号(即"")将该字段值包含起来。

6 列内容如存在半角引号(即")则应替换成半角双引号("")转义,并用半角引号(即"")将该字段值包含起来。

7 文件读写时引号,逗号操作规则互逆。

8 内码格式不限,可为 ASCII、Unicode 或者其他。

9 数据结束外,有\r\n作为结束标记

说明:这个规则对于excel完全适合

算法实现

namespace CSV
{
    /// <summary> 
    /// CSVUtil 用来处理CSV格式的文件内容成一二维数组。 
    /// </summary> 
    public class CSVUtil
    {
        private string content;
        public CSVUtil()
        { }
        public CSVUtil(string _content)
        {
            content = _content;
        }
        public CSVUtil(string _content, bool _hasHeader)
        {
            content = _content;
            hasHeader = _hasHeader;
        }

        private bool hasHeader = true;
        public bool HasHeader
        {
            get { return hasHeader; }
            set { hasHeader = value; }
        }

        private string[] headr;
        public string[] Header
        {
            get { return headr; }
        }

        private string[][] data;
        public string[][] Data
        {
            get { return data; }
        }

        /// <summary> 

        /// 分割 CVS 文件内容为一个二维数组。 

        /// </summary> 

        /// <param name="src">CVS 文件内容字符串</param> 

        /// <returns>二维数组。String[line count][column count]</returns> 

        public void Parse()
        {

            // 如果输入为空,返回 0 长度字符串数组 

            if (content == null || content.Length == 0) return;
            string st = "";
            List<List<string>> lines = new List<List<string>>(); // 行集合。其元素为行 
            List<string> cells = new List<string>(); // 单元格集合。其元素为一个单元格 
            bool beginWithQuote = false;
            int maxColumns = 0;
            // 遍历字符串的字符 
            for (int i = 0; i < content.Length; i++)
            {
                char ch = content[i];
                #region CR 或者 LF
                //A record separator may consist of a line feed (ASCII/LF=0x0A), 
                //or a carriage return and line feed pair (ASCII/CRLF=0x0D 0x0A). 
                // 这里我不明白CR为什么不作为separator呢,在Mac OS上好像是用CR的吧。
                // 这里我“容错”一下,CRLF、LFCR、CR、LF都作为separator 

                if (ch == '\r')
                {
                    #region CR
                    if (beginWithQuote)
                    {
                        st += ch;
                    }
                    else
                    {
                        if (i + 1 < content.Length && content[i + 1] == '\n')
                        { // 如果紧接的是LF,那么直接把LF吃掉 
                            i++;
                        }


                        //line = new String[cells.Count]; 
                        //System.Array.Copy (cells.ToArray(typeof(String)), line, line.Length); 
                        //lines.Add(line); // 把上一行放到行集合中去 

                        cells.Add(st);
                        st = "";
                        beginWithQuote = false;
                        maxColumns = (cells.Count > maxColumns ? cells.Count : maxColumns);
                        lines.Add(cells);
                        st = "";
                        cells = new List<string>();
                    }
                    #endregion CR
                }

                else if (ch == '\n')
                {
                    #region LF
                    if (beginWithQuote)
                    {
                        st += ch;
                    }
                    else
                    {
                        if (i + 1 < content.Length && content[i + 1] == '\r')
                        { // 如果紧接的是LF,那么直接把LF吃掉 
                            i++;
                        }
                        //line = new String[cells.Count]; 
                        //System.Array.Copy (cells.ToArray(typeof(String)), line, line.Length); 
                        //lines.Add(line); // 把上一行放到行集合中去 

                        cells.Add(st);
                        st = "";
                        beginWithQuote = false;
                        maxColumns = (cells.Count > maxColumns ? cells.Count : maxColumns);
                        lines.Add(cells);
                        st = "";
                        cells = new List<string>();

                    }
                    #endregion LF
                }

                #endregion CR 或者 LF
                else if (ch == '\"')
                { // 双引号 
                    #region 双引号

                    if (beginWithQuote)
                    {
                        i++;
                        if (i >= content.Length)
                        {
                            cells.Add(st);
                            st = "";
                            beginWithQuote = false;
                        }
                        else
                        {
                            ch = content[i];
                            if (ch == '\"')
                            {
                                st += ch;
                            }
                            else if (ch == ',')
                            {
                                cells.Add(st);
                                st = "";
                                beginWithQuote = false;
                            }
                            else
                            {
                                throw new Exception("Single double-quote char mustnt exist in filed " + (cells.Count + 1) + " while it is begined with quote\nchar at:" + i);
                            }
                        }
                    }

                    else if (st.Length == 0)
                    {
                        beginWithQuote = true;
                    }

                    else
                    {
                        throw new Exception("Quote cannot exist in a filed which doesnt begin with quote!\nfield:" + (cells.Count + 1));
                    }
                    #endregion 双引号
                }

                else if (ch == ',')
                {
                    #region 逗号
                    if (beginWithQuote)
                    {
                        st += ch;
                    }
                    else
                    {
                        cells.Add(st);
                        st = "";
                        beginWithQuote = false;
                    }
                    #endregion 逗号
                }

                else
                {
                    #region 其它字符
                    st += ch;
                    #endregion 其它字符
                }

            }

            if (st.Length != 0)
            {
                if (beginWithQuote)
                {
                    throw new Exception("last field is begin with but not end with double quote");
                }
                else
                {
                    cells.Add(st);
                    maxColumns = (cells.Count > maxColumns ? cells.Count : maxColumns);
                    lines.Add(cells);
                }
            }
            int dataRowCount = hasHeader ? lines.Count - 1 : lines.Count;
            data = new string[dataRowCount][];
            for (int i = 0; i < lines.Count; i++)
            {
                cells = (List<string>)lines[i];
                try
                {
                    if (hasHeader == true && i == 0)
                    {
                        headr = new string[maxColumns];
                        for (int j = 0; j < maxColumns; j++)
                        {
                            headr[j] = cells[j];
                        }
                    }
                    else
                    {
                        int dataIndex = hasHeader ? i - 1 : i;
                        data[dataIndex] = new string[maxColumns];
                        for (int j = 0; j < maxColumns; j++)
                        {
                            data[dataIndex][j] = cells[j];
                        }
                    }
                }
                catch (Exception ex)
                {

                    throw new Exception(ex.Message + "\nfield:" + (i + 1));
                }
            }
            //System.Array.Copy(lines.ToArray(typeof(String[])), ret, ret.Length); 
            return;

        }

        public static string FormatField(object obj)
        {
            string result = string.Empty;
            if (obj != null)
            {
                string old = obj.ToString();

                if (old.IndexOf('\"') > -1 || old.IndexOf(',') > -1 || old.IndexOf('\n') > -1 || old.IndexOf('\r') > -1)
                {
                    result = "\"" + old.Replace("\"", "\"\"") + "\"";
                }
                else
                {
                    result = old;
                }
            }
            return result;
        }

        public static string FormatList<T>(IEnumerable<T> source, List<string> outputPropertys)
        {
            StringBuilder sbResult = new StringBuilder();
            Dictionary<string, MethodInfo> methods = new Dictionary<string, MethodInfo>();
            object val = null;
            foreach (string propertyName in outputPropertys)
            {
                PropertyInfo p = typeof(T).GetProperty(propertyName);
                methods.Add(propertyName, p.GetGetMethod());
                sbResult.Append(propertyName + ",");
            }
            sbResult.Remove(sbResult.Length - 1, 1);
            sbResult.Append(Environment.NewLine);
            foreach (T item in source)
            {
                foreach (KeyValuePair<string, MethodInfo> method in methods)
                {
                    val = method.Value.Invoke(item, null);
                    sbResult.Append(FormatField(val) + ",");
                }
                sbResult.Remove(sbResult.Length - 1, 1);
                sbResult.Append(Environment.NewLine);
            }
            return sbResult.ToString();
        }
    }
}
posted @ 2014-03-10 21:20  Barlow Du  阅读(409)  评论(0编辑  收藏  举报