CSV文件读取器

首先看一下CSV文件格式定义
CSV : Comma Separate Values
CSV 英文文档 : http://www.creativyst.com/Doc/Articles/CSV/CSV01.htm
CSV即Comma Separate Values,这种文件格式经常用来作为不同程序之间的数据交互的格式。
具体文件格式 :

每条记录占一行
以逗号为分隔符
逗号前后的空格会被忽略
字段中包含有逗号,该字段必须用双引号括起来
字段中包含有换行符,该字段必须用双引号括起来
字段前后包含有空格,该字段必须用双引号括起来
字段中的双引号用两个双引号表示
字段中如果有双引号,该字段必须用双引号括起来
第一条记录,可以是字段名

 

//CsvReader.cs源代码

using System;
using System.Collections;
using System.Text;
using System.IO;

namespace ConsoleTest
{
   public class CsvReader:StreamReader
    {
        const char m_CellSeparator = ',';
        static string m_LineSeparator = Environment.NewLine;
        private int m_CellLengthMax = 64;

        /// <summary>
        /// 单元格的最大长度,超出抛异常,缺省为64字节
        /// </summary>
        public int CellLengrhMax
        {
            get { return m_CellLengthMax; }
            set { m_CellLengthMax = value; }
        }

        #region Ctors

        /// <summary>
        /// Initializes a new instance of the <see cref="StreamReader"/> class for
        /// the specified stream.
        /// </summary>
        /// <param name="stream">The stream to be read.</param>
        public CsvReader(Stream stream)
            : base(stream)
        {
            this.Initialize();
        }

        /// <summary>
        /// Initializes a new instance of the <see cref="StreamReader"/> class for
        /// the specified file name.
        /// </summary>
        /// <param name="path">The complete file path to be read.</param>
        public CsvReader(string path)
            : base(path)
        {

            this.Initialize();
        }

        /// <summary>
        /// Initializes a new instance of the <see cref="StreamReader"/> class for
        /// the specified file name, with the specified byte order mark detection option.
        /// </summary>
        /// <param name="path">The complete file path to be read.</param>
        /// <param name="detectEncodingFromByteOrderMarks">Indicates whether to look
        /// for byte order marks at the beginning of the file.</param>
        public CsvReader(string path, bool detectEncodingFromByteOrderMarks)
            : base(path, detectEncodingFromByteOrderMarks)
        {
            this.Initialize();
        }

        /// <summary>
        /// Initializes a new instance of the <see cref="StreamReader"/> class for
        /// the specified file name, with the specified character encoding.
        /// </summary>
        /// <param name="path">The complete file path to be read.</param>
        /// <param name="encoding">The character encoding to use.</param>
        public CsvReader(string path, Encoding encoding)
            : base(path, encoding)
        {
            this.Initialize();
        }

        /// <summary>
        /// Initializes a new instance of the <see cref="StreamReader"/> class for the
        /// specified stream, with the specified byte order mark detection option.
        /// </summary>
        /// <param name="stream">The stream to be read.</param>
        /// <param name="detectEncodingFromByteOrderMarks">Indicates whether to look
        /// for byte order marks at the beginning of the file.</param>
        public CsvReader(Stream stream, bool detectEncodingFromByteOrderMarks)
            : base(stream, detectEncodingFromByteOrderMarks)
        {
            this.Initialize();
        }

        /// <summary>
        /// Initializes a new instance of the <see cref="StreamReader"/> class for the
        /// specified stream, with the specified character encoding.
        /// </summary>
        /// <param name="stream">The stream to be read.</param>
        /// <param name="encoding">The character encoding to use.</param>
        public CsvReader(Stream stream, Encoding encoding)
            : base(stream, encoding)
        {
            this.Initialize();
        }

        /// <summary>
        /// Initializes a new instance of the <see cref="StreamReader"/> class for
        /// the specified stream, with the specified character encoding and byte
        /// order mark detection option.
        /// </summary>
        /// <param name="stream">The stream to be read.</param>
        /// <param name="encoding">The character encoding to use.</param>
        /// <param name="detectEncodingFromByteOrderMarks">Indicates whether to look
        /// for byte order marks at the beginning of the file.</param>
        public CsvReader(Stream stream, Encoding encoding,
            bool detectEncodingFromByteOrderMarks)
            : base(stream, encoding, detectEncodingFromByteOrderMarks)
        {
            this.Initialize();
        }

        /// <summary>
        /// Initializes a new instance of the <see cref="StreamReader"/> class for
        /// the specified file name, with the specified character encoding and byte
        /// order mark detection option.
        /// </summary>
        /// <param name="path">The complete file path to be read.</param>
        /// <param name="encoding">The character encoding to use.</param>
        /// <param name="detectEncodingFromByteOrderMarks">Indicates whether to look
        /// for byte order marks at the beginning of the file.</param>
       public CsvReader(string path, Encoding encoding,
            bool detectEncodingFromByteOrderMarks)
            : base(path, encoding, detectEncodingFromByteOrderMarks)
        {
            this.Initialize();
        }

 

        private void Initialize()
        {
            if (this.BaseStream.Length > int.MaxValue)
            {
                throw new NotSupportedException(
                    "This stream reader cannot process very big files.");
            }
            m_LineSeparator = Environment.NewLine;
        }

        #endregion

        /// <summary>
        /// 流中的下一数据行;如果到达了流的末尾,则为 string[0]
        /// </summary>
        /// <returns>一行中的所有单元格</returns>
        public string[] ReadRow()
        {
            if (Peek() == -1) return new string[0];
            ArrayList cells = new ArrayList();
            StringBuilder readBuffer = new StringBuilder(16, m_CellLengthMax);

            int c;
            while ((c = Read()) != -1)
            {
                #region 处理特殊带(")的单元格
                if (c == '"' && readBuffer.Length == 0)
                {
                    //StringBuilder quoteBuffer = new StringBuilder(16, m_CellLengthMax);
                    //quoteBuffer.Append((char)c);  //首引号
                    try
                    {
                        while ((c = Read()) != -1)
                        {
                            if (c == 34)                // '"'的值为34;
                            {
                                if (Peek() == 34)       //忽略一个连续双引号
                                {
                                    Read();             //等效于c = Read();
                                }
                                else                    //非连续双引号,为结束引号,
                                {
                                    //quote = quoteBuffer.ToString();
                                    break;              //goto continue
                                }
                            }
                            readBuffer.Append((char)c);//双引号中的内容原样输出;
                        }
                        continue;
                    }
                    catch (ArgumentOutOfRangeException)
                    {
                        //this.Close();
                        throw new Exception("The cell Length overed maximum value. ");
                    }
                }
                #endregion

                #region //单元格分隔
                if (c == m_CellSeparator)
                {
                    cells.Add(readBuffer.ToString());
                    readBuffer.Remove(0, readBuffer.Length);
                    continue;
                }
                #endregion

                #region //行分隔
                if (c == '\r' || c == '\n')
                {
                    if (c == '\r' && Peek() == '\n')       //忽略'\n'
                    {
                        Read();
                    }
                    cells.Add(readBuffer.ToString());
                    //readBuffer.Remove(0, readBuffer.Length);
                    return (string[])(cells.ToArray(string.Empty.GetType()));
                }
                #endregion

                readBuffer.Append((char)c);
            }
            cells.Add(readBuffer.ToString());
            return (string[])(cells.ToArray(string.Empty.GetType()));
        }
    }
}

posted @ 2008-09-02 23:40  边城浪  阅读(2611)  评论(1)    收藏  举报