java csv list cant not repeat

require:

/**
 * before:
 * file A1.csv {1,2,3,4,5}
 * file A2.csv {2,3,9,10,11}
 * file B1.csv {5,12,13,14,15}
 * file B2.csv {16,14,15,4,9,20,30}
 * A1.csv A2.csv A3.csv A4.csv cant not repeat
 *
 * after:
 * file A1.csv {1,4}
 * file A2.csv {2,3,10,11}
 * file B1.csv {12,13}
 * file B2.csv {16,9,20,30}
 */

  

tangxin@tangxin:~/csvrepeat$ ls
A1.csv  A2.csv  B1.csv  B2.csv

  

 

CSVUtilVersion2.java

import lombok.extern.slf4j.Slf4j;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.LineIterator;
import org.springframework.util.StringUtils;

import java.io.*;
import java.lang.reflect.Array;
import java.util.*;


/**
 * before:
 * file A1.csv {1,2,3,4,5}
 * file A2.csv {2,3,9,10,11}
 * file B1.csv {5,12,13,14,15}
 * file B2.csv {16,14,15,4,9,20,30}
 * A1.csv A2.csv A3.csv A4.csv cant not repeat
 *
 * after:
 * file A1.csv {1,4}
 * file A2.csv {2,3,10,11}
 * file B1.csv {12,13}
 * file B2.csv {16,9,20,30}
 */
@Slf4j
public class CSVUtilVersion2 {

    private static final String CSV_PATH = "/home/tangxin/csvrepeat/";
    private static final boolean CREATE_SWITCH = true;


    /**
     * read single column data list
     * @param path
     * @return
     */
    public static List<String> ids(String path) {
        List<String> result = new ArrayList<>();
        File csv = new File(path);  // CSV文件路径
        LineIterator it = null;
        try {
            it = FileUtils.lineIterator(csv);
            while (it.hasNext()) {
                String line = it.nextLine();
                if (line.trim().contains("ID")) {
                    continue;
                }
                String[] arr = line.split(",");
                String ID = arr[0];
                ID = ID.replaceAll("\"", "").trim();
                if (!StringUtils.isEmpty(ID)) {
                    result.add(ID);
                }
            }
        } catch (Exception e) {
            log.error("读取ID csv文件失败:{}", e.getMessage());
        } finally {
            LineIterator.closeQuietly(it);
        }
        return result;
    }


    /**
     * from src delete oth
     * @param src
     * @param oth
     * @return
     */
    public static List removeAll(List src, List oth) {
        LinkedList result = new LinkedList(src);
        HashSet othHash = new HashSet(oth);
        Iterator iter = result.iterator();
        while (iter.hasNext()) {
            if (othHash.contains(iter.next())) {
                iter.remove();
            }
        }
        return result;
    }


    /**
     * -Xms1g -Xmx1g -XX:PermSize=128m -XX:SurvivorRatio=2 -XX:+UseParallelGC
     * @param args
     * @throws Exception
     */
    public static void main(String[] args) throws Exception {


        //∑=1+2+3+...+(n-1) group

        LinkedList<String> fileList = new LinkedList<>();
        fileList.add("A1.csv");
        fileList.add("A2.csv");
        fileList.add("B1.csv");
        fileList.add("B2.csv");
//        fileList.add("C1.csv");


        DescartesRepeat(fileList);

        ded(fileList);

    }

    private static void DescartesRepeat(LinkedList<String> fileList) {
        Set<String> repeatList = new HashSet<>();

        Set<String> groupSet = new HashSet<>();

        Set<String> goONList = new HashSet<>();


        //A1->A2,B1,B2
        for (int i = 0; i < fileList.size(); i++) {

            String itemI = fileList.get(i);

            for (int j = 0; j < fileList.size(); j++) {

                String itemJ = fileList.get(j);

                if (!itemI.equals(itemJ)) {

                    String groupR1 = itemI + "->" + itemJ;
                    String groupR2 = itemJ + "->" + itemI;

                    if (groupSet.contains(groupR1) || groupSet.contains(groupR2)){
                        continue;
                    }

                    groupSet.add(groupR1);


                    String repeatT = repeat(CSV_PATH + itemI, CSV_PATH + itemJ);
                    if(!StringUtils.isEmpty(repeatT)){
                        repeatList.add(repeatT);
                        //System.out.println(groupR1+"->"+repeatT);
                    }


                }

            }
        }

        if (CollectionUtils.isNotEmpty(repeatList)) {
//            System.out.println(repeatList);
            for (String repeatItem : repeatList) {
                Iterator<String> iterator = fileList.iterator();
                while (iterator.hasNext()) {
                    String oldItem = iterator.next();

                    String oldS = oldItem.replace(".csv", "").replace("-new","");
                    String repeatS = repeatItem.replace(".csv","").replace("-new","");
                    if (repeatS.contains(oldS)) {
                        iterator.remove();
                        goONList.add(repeatItem);
                    }
                }
            }
            fileList.addAll(goONList);
            System.out.println(fileList);
            DescartesRepeat(fileList);
        }
    }


    public static void ded(List<String> args) {

        //保证指定csv列表每组都不能有重复数据
        for (int i = 0; i < args.size(); i++) {
//            if(i>0){
//                continue;
//            }

            String source = CSV_PATH + args.get(i);

            for (int j = 0; j < args.size(); j++) {

                if (i == j) {
                    continue;
                }

                String target = CSV_PATH + args.get(j);
                intersection(source, target);
            }


        }


    }


    public static void intersection(String sourcePath, String targetPath) {
        List<String> ids1 = ids(sourcePath);
        List<String> ids2 = ids(targetPath);
        List<String> inter = (List<String>) CollectionUtils.intersection(ids1, ids2);
        System.out.println(sourcePath + "和" + targetPath + "的重复数据大小" + inter.size());
    }



    public static String repeat(String source, String target){
        //cdd fund xyd

        List<String> ids1 = ids(source);
        List<String> ids2 = ids(target);

//        System.out.println(source + "集合大小" + ids1.size());
//        System.out.println(target + "集合大小" + ids2.size());


        List<String> inter = (List<String>) CollectionUtils.intersection(ids1, ids2);

//        System.out.println("去重数据大小:" + inter.size());



        if (inter != null && inter.size() > 0) {


            if (ids1.size() > ids2.size()) {
                return repeatInner(source, ids1, inter);
            } else if (ids2.size() > ids1.size()) {
                return repeatInner(target, ids2, inter);
            } else {
                return repeatInner(source, ids1, inter);
            }


        }

        return "";
    }

    private static String repeatInner(String source, List<String> ids, List<String> inter) {
        String newPath = source.replace(".csv", "-new.csv");
        List<String> ids1new = removeAll(ids, inter);
        createCSV(ids1new, newPath);
        return newPath.replace(CSV_PATH,"");
    }



    /**
     * 创建CSV文件
     */
    public static void createCSV(List<String> list, String fileName) {


        if(!CREATE_SWITCH){
//            System.out.println("创建csv开关关闭");
            return;
        }else{
//            System.out.println("创建csv开关开启");
        }

        // 表格头
        Object[] head = {"ID"};
        List<Object> headList = Arrays.asList(head);

        //数据
        List<List<Object>> dataList = new ArrayList<>();
        List<Object> rowList = null;
        for (int i = 0; i < list.size(); i++) {
            rowList = new ArrayList<>();
            rowList.add(list.get(i));
            dataList.add(rowList);
        }

        File csvFile;
        BufferedWriter csvWtriter = null;
        try {
            csvFile = new File(fileName);
            File parent = csvFile.getParentFile();
            if (parent != null && !parent.exists()) {
                parent.mkdirs();
            }
            csvFile.createNewFile();

            // GB2312使正确读取分隔符","
            csvWtriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(csvFile), "GB2312"), 1024);


            // 写入文件头部
            writeRow(headList, csvWtriter);

            // 写入文件内容
            for (List<Object> row : dataList) {
                writeRow(row, csvWtriter);
            }
            csvWtriter.flush();
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            try {
                csvWtriter.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }


    /**
     * 写一行数据
     *
     * @param row       数据列表
     * @param csvWriter
     * @throws IOException
     */
    private static void writeRow(List<Object> row, BufferedWriter csvWriter) throws IOException {
        for (Object data : row) {
            StringBuffer sb = new StringBuffer();
            String rowStr = sb.append("\"").append(data).append("\",").toString();
            csvWriter.write(rowStr);
        }
        csvWriter.newLine();
    }

}

  

tangxin@tangxin:~/csvrepeat$ ls
A1.csv  A1-new.csv  A1-new-new.csv  A2.csv  A2-new.csv  B1.csv  B2.csv  B2-new.csv  B2-new-new.csv

  

 

posted @ 2018-01-29 20:01  佛法无边  阅读(183)  评论(0编辑  收藏  举报