java CSV文件解析和生成

一、CSV概念

　　逗号分隔值（Comma-Separated Values，CSV），其文件以纯文本形式存储表格数据（数字和文本），文件的每一行都是一个数据记录。每个记录由一个或多个字段组成，用逗号分隔。使用逗号作为字段分隔符是此文件格式的名称的来源，因为分隔字符也可以不是逗号（如使用|进行分隔），有时也称为字符分隔值。

　　CSV广泛用于不同体系结构的应用程序之间交换数据表格信息，解决不兼容数据格式的互通问题，一般按照传输双方既定标准进行格式定义，而其本身并无明确格式标准。

　　CSV用逗号分隔字段的基本思想是清楚的，但是当字段数据也可能包含逗号或者甚至嵌入换行符时，该想法变得复杂。 CSV实现可能无法处理这些字段数据，或者可能会使用引号来包围字段。引用并不能解决所有问题：有些字段可能需要嵌入引号，因此CSV实现可能包含转义字符或转义序列。

CSV的格式规范

具体文件格式：

1.每条记录占一行以逗号为分隔符

2.逗号前后的空格会被忽略

3.字段中包含有逗号，该字段必须用双引号括起来

4.字段中包含有换行符，该字段必须用双引号括起来

5.字段前后包含有空格，该字段必须用双引号括起来

6.字段中的双引号用两个双引号表示

7.字段中如果有双引号，该字段必须用双引号括起来

8.第一条记录，可以是字段名

二、CSV文件生成和解析

解析：

CSV并不是一种单一的、定义明确的格式，因此在实践中，术语“CSV”泛指具有以下特征的任何文件：

纯文本，使用某个字符集，比如ASCII、Unicode、EBCDIC或GB2312；
由记录组成（典型的是每行一条记录）；
每条记录被分隔符分隔为字段（典型分隔符有逗号、分号或制表符；有时分隔符可以包括可选的空格）；
每条记录都有同样的字段序列。

CSV文件一般用于将数据库数据直接通过文件传输，而不是通过接口。

工具类

package com.ljxx;

import com.alibaba.fastjson2.JSON;
import org.apache.commons.beanutils.BeanUtils;

import java.io.*;
import java.util.*;

public class CsvUtil {

    /**
     * 读取流中前面的字符，看是否有bom，如果有bom，将bom头先读掉丢弃
     *
     * @param in
     * @return
     * @throws IOException
     */
    public static InputStream getInputStream(InputStream in) throws IOException {

        PushbackInputStream testin = new PushbackInputStream(in);
        int ch = testin.read();
        if (ch != 0xEF) {
            testin.unread(ch);
        } else if ((ch = testin.read()) != 0xBB) {
            testin.unread(ch);
            testin.unread(0xef);
        } else if ((ch = testin.read()) != 0xBF) {
            throw new IOException("错误的UTF-8格式文件");
        } else {
        }
        return testin;
    }

    /**
     * 解析csv
     *
     * @param file
     * @param clazz
     * @param <T>
     * @return
     * @throws Exception
     */
    public static <T> List<T> parseCsv(File file, Class<T> clazz) throws Exception {
        FileInputStream fileInputStream = new FileInputStream(file);
        //读取csv文件
        //读取流中前面的字符，看是否有bom，如果有bom，将bom头先读掉丢弃
        InputStream inputStream = getInputStream(fileInputStream);
        BufferedInputStream in = new BufferedInputStream(inputStream);
        BufferedReader br = new BufferedReader(new InputStreamReader(in, getCharSet(in)));
        String line = "";
        List<Map<String, Object>> lists = new ArrayList<>();
        String firstLine = br.readLine();
        String[] firstLines = firstLine.split(",");

        while ((line = br.readLine()) != null) {
            String[] lines = line.split(",(?=([^\\\"]*\\\"[^\\\"]*\\\")*[^\\\"]*$)");
            Map<String, Object> map = new HashMap<>();
            for (int i = 0; i < lines.length; i++) {
                if (firstLines[i].startsWith("#")) {
                    firstLines[i] = firstLines[i].replaceAll("#", "");
                }
                String s = lines[i];
                s = s.trim().replaceAll("\"\"", "\"");
                if (s.startsWith("\"")) {
                    s = s.substring(1, s.length() - 1);
                }
                map.put(firstLines[i], s);
            }
            lists.add(map);
        }
        inputStream.close();
        in.close();
        br.close();
        if (null != lists) {
            //使用泛型方式转换集合
            List<T> list = JSON.parseArray(JSON.toJSONString(lists), clazz);
            return list;
        }
        return null;
    }

    /**
     * 解析csv
     *
     * @param file
     * @param clazz
     * @param <T>
     * @param startLine 开始行
     * @param endLine   结束行
     * @return
     * @throws Exception
     */
    public static <T> List<T> parseCsv(File file, Class<T> clazz, Integer startLine, Integer endLine) throws Exception {
        FileInputStream fileInputStream = new FileInputStream(file);
        //读取csv文件
        //读取流中前面的字符，看是否有bom，如果有bom，将bom头先读掉丢弃
        InputStream inputStream = getInputStream(fileInputStream);
        BufferedInputStream in = new BufferedInputStream(inputStream);
        BufferedReader br = new BufferedReader(new InputStreamReader(in, getCharSet(in)));
        String line = "";
        List<Map<String, Object>> lists = new ArrayList<>();
        String firstLine = br.readLine();
        String[] firstLines = firstLine.split(",");
        Integer l = 0;
        while ((line = br.readLine()) != null) {
            l++;
            if (l <= endLine && l >= startLine) {
                String[] lines = line.split(",(?=([^\\\"]*\\\"[^\\\"]*\\\")*[^\\\"]*$)");
                Map<String, Object> map = new HashMap<>();
                for (int i = 0; i < lines.length; i++) {
                    if (firstLines[i].startsWith("#")) {
                        firstLines[i] = firstLines[i].replaceAll("#", "");
                    }
                    String s = lines[i];
                    s = s.trim().replaceAll("\"\"", "\"");
                    if (s.startsWith("\"")) {
                        s = s.substring(1, s.length() - 1);
                    }
                    map.put(firstLines[i], s);
                }
                lists.add(map);
            } else if (l > endLine) {
                break;
            }
        }
        inputStream.close();
        in.close();
        br.close();
        if (null != lists) {
            //使用泛型方式转换集合
            List<T> list = JSON.parseArray(JSON.toJSONString(lists), clazz);
            return list;
        }
        return null;
    }

    /**
     * 获取编码类型，解决乱码问题
     *
     * @param stream
     * @return
     * @throws Exception
     */
    private static String getCharSet(BufferedInputStream stream) throws Exception {
        String charSet;
        byte[] buffer = new byte[3];
        //因流读取后再读取可能会缺少内容，此处需要先读，然后再还原
        stream.mark(stream.available() + 1);
        stream.read(buffer);
        stream.reset();
        String b0 = Integer.toHexString(buffer[0] & 0xFF);
        String b1 = Integer.toHexString(buffer[1] & 0xFF);
        String b2 = Integer.toHexString(buffer[2] & 0xFF);
        String header = b0 + b1 + b2;
        switch (header) {
            case "d5cbba":
                charSet = "GB2312";
                break;
//            case "236167":
//                charSet = "GB2312";
//                break;
            default:
                charSet = "UTF-8";
                break;
        }
        return charSet;
    }
    /**
     * 创建csv文件
     * @param exportData
     * @param map
     * @param outPutPath
     * @param fileName
     * @return
     */
    public static File createCSVFile(List exportData, HashMap map, String outPutPath,
                                     String fileName) {
        File csvFile = null;
        BufferedWriter csvFileOutputStream = null;
        try {
            File file = new File(outPutPath);
            if (!file.exists()) {
                file.mkdir();
            }
            //定义文件名格式并创建
            csvFile = File.createTempFile(fileName, ".csv", new File(outPutPath));
            System.out.println("csvFile：" + csvFile);
            // utf8使正确读取分隔符","
            csvFileOutputStream = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(
                    csvFile), "UTF-8"), 1024);
            System.out.println("csvFileOutputStream：" + csvFileOutputStream);
            // 写入文件头部
            for (Iterator propertyIterator = map.entrySet().iterator(); propertyIterator.hasNext();) {
                java.util.Map.Entry propertyEntry = (java.util.Map.Entry) propertyIterator.next();
                csvFileOutputStream.write("\"" + (String) propertyEntry.getValue() != null ? (String) propertyEntry.getValue() : "" + "\"");
                if (propertyIterator.hasNext()) {
                    //使用,分隔
                    csvFileOutputStream.write(",");
                }
            }
            csvFileOutputStream.newLine();
            // 写入文件内容
            for (Iterator iterator = exportData.iterator(); iterator.hasNext();) {
                Object row = (Object) iterator.next();
                for (Iterator propertyIterator = map.entrySet().iterator(); propertyIterator.hasNext();) {
                    java.util.Map.Entry propertyEntry = (java.util.Map.Entry) propertyIterator.next();
                    csvFileOutputStream.write((String) BeanUtils.getProperty(row,
                            (String) propertyEntry.getKey()));
                    if (propertyIterator.hasNext()) {
                        //使用,分隔
                        csvFileOutputStream.write(",");
                    }
                }
                if (iterator.hasNext()) {
                    csvFileOutputStream.newLine();
                }
            }
            csvFileOutputStream.flush();
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            try {
                csvFileOutputStream.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        return csvFile;
    }
}

使用工具类解析CSV文件

public class demo9 {
    public static void main(String[] args) throws Exception {
        String filePath = "C:\\Users\\miracle\\Desktop\\caigou测试1.csv";
        File file = new File(filePath);
        // 解析xml文件得到一个List集合
        List<Map> maps = CsvUtil.parseCsv(file, Map.class);
        System.out.println(maps);
    }
}

结果如下：

[{purchaseOrderNo=12343563, uom=箱, supplierNo=1461, purchaseNum=100, purchasePrice=15.22, planArriveDate=2022-01-09, orderDate=2022-01-07, productNo=675432131, productName=测试}, 
{purchaseOrderNo=12342563, uom=箱, supplierNo=1461, purchaseNum=100, purchasePrice=15.22, planArriveDate=2022-01-09, orderDate=2022-01-07, productNo=675432131, productName=测试}]

使用工具类生成CSV文件

import com.alibaba.fastjson2.JSONObject;
import com.ljxx.service.IDrugStockService;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.test.context.junit4.SpringRunner;

import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;

@RunWith(SpringRunner.class)
@SpringBootTest(classes = CooperativeServerApplication.class)
public class demo9 {
    @Autowired
    private IDrugStockService drugStockService;
    @Test
    public void test() {
        JSONObject jsonObject = new JSONObject();
        jsonObject.put("drugName","金银花");
        List<Map> stockList = drugStockService.queryByDrug(jsonObject);
        System.out.println(stockList);
        LinkedHashMap<String, String> headMap = new LinkedHashMap<String, String>();
        Map map = (Map) stockList.get(0);
        Iterator it = map.entrySet().iterator();
        while(it.hasNext()){
            Map.Entry<String, String> entry = (Map.Entry<String, String>) it.next();
            String key = entry.getKey();
            headMap.put(key,key);
        }
        CsvUtil.createCSVFile(stockList,headMap,"C:\\Users\\miracle\\Desktop\\","query_content_test_data");
    }
}

结果如下：

entityId,drugCount
3668692603610206316,100.0
3671511592266764396,-88.8933
3681983755071393892,11.56
3683146055765594217,11.56

如下所示：

posted on 2022-09-26 17:56 周文豪阅读(4341) 评论(0) 编辑收藏举报