csv文件导入导出小工具

最近有做一个csv文件导入导出的小工具,觉得还蛮方便的,记录下来,方便自己以后查阅。

本地测试时,直接运行main方法,给其他人用,可以用maven打包成jar

主要的几个文件

public class Start {

public static void main(String[] args) throws SQLException {
CsvHandleProV csvHandle = new CsvHandleProV();
     //打包时用这个
/*String table = "";
if (args.length == 6){
csvHandle.operateTable(args);
}else{
System.out.println("文件名个数不对");
}*/

//本地测试方法
String[] argsTest = new String[]{"E:\\task\\10.202.15.159-1701.csv","E:\\task\\10.202.15.159-1702.csv",
"E:\\task\\10.202.15.159-1703.csv","E:\\task\\10.202.15.159-1704.csv","E:\\task\\aaa.csv","E:\\new"};
csvHandle.operateTable(argsTest);

}
}
}
以下是导入导出的代码,有写了两种方式,一种是涉及到数据库的操作,去重都在数据库中操作,另一种是直接在内存中操作,推荐使用直接在内存中操作。
第一种
**
* 五个表,一个父表,四个子表,需要将四个子表的数据与父表进行比对,求补集,并以csv文件导出
* 数据比对工具,内存中操作
*/
public class CsvHandleProV {

Logger logger = Logger.getLogger("缺失库工具PRO");
public static String PATH = "";
List<String[]> csvRows;
String dataPackageFile;


public void operateTable(String[] args) {
long start_time = new Date().getTime();
//将主表的id放到一个set集合中
Set<String> uid1 = new HashSet<>();
//将四个子表的id放到一个set集合中
Set<String> uid2 = new HashSet<>();
for (String fileName : args) {
if (!fileName.contains(".csv")) {
PATH = fileName;
continue;
} else {
CsvIO csvReader = new CsvIO();
//if (fileName.indexOf("detail") > 0) {
if (!HasDigit(fileName)) {
this.dataPackageFile = fileName;
csvRows = csvReader.read(dataPackageFile, true);

for (String[] line : csvRows) {
String id = line[0];
uid1.add(id);
}
System.out.println("uid1***********" + uid1.size());
} else {
this.dataPackageFile = fileName;
csvRows = csvReader.read(dataPackageFile, true);
for (String[] line : csvRows) {
String id = line[0];
uid2.add(id);
}

}
}

}
System.out.println("uid2*****************" + uid2.size());
Set<String> result = new HashSet<>();
result.clear();
result.addAll(uid1);
//result.retainAll(uid2);//取交集
result.removeAll(uid2);//取补集
//result.addAll(uid2);//取并集
System.out.println("result*****************" + result.size());
//重新读取主表数据
BufferedReader br = null;
BufferedWriter writer = null;
try {
File f = new File(PATH + File.separator + "result.csv");

if (!f.exists()) {
f.createNewFile();
}
br = new BufferedReader(new InputStreamReader(new FileInputStream(dataPackageFile), "GBK"), 1024);
writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(f), "GBK"), 1024);
String line = "";
ArrayList<String> strList = new ArrayList<String>();

while ((line = br.readLine()) != null) {
strList.add(line);
}
for (int i = 0; i < strList.size(); i++) {
writer.write(strList.get(0));//写文件
writer.newLine();
break;
}
//去重后的数据与主表中的数据匹配
Iterator<String> it = result.iterator();
while (it.hasNext()) {
String str = it.next();
for (int i = 1; i < strList.size(); i++) {
String temp = strList.get(i).substring(0, strList.get(i).indexOf(","));
if (str.equals(temp)) {
writer.write(strList.get(i));//写文件
writer.newLine();
writer.flush();
}
}
}
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
br.close();
writer.close();
} catch (IOException e) {
e.printStackTrace();
}
}

logger.info("任务执行完成!耗时【" + (new Date().getTime() - start_time) / 1000 + "】秒");
}

// 判断一个字符串是否含有数字(包含返回true,不包含返回false)
public boolean HasDigit(String content) {
boolean flag = false;
Pattern p = Pattern.compile(".*\\d+.*");
Matcher m = p.matcher(content);
if (m.matches()) {
flag = true;
}
return flag;
}
}

第二种
/**
* 五个表,一个父表,四个子表,需要将四个子表的数据与父表进行比对,求补集,并以csv文件导出
* 数据比对工具,数据库中操作
*/
public class CsvHandleProV {

Logger logger = Logger.getLogger("缺失库工具PRO");
String table = "";
public static String IMPORT_CSV_HEADERS_DETAIL = "id,old_id,address_id,norm_address,sim_address,adcode,sss_word,address_freq,dept_code,zno_code,team_code,worker_code,group_id,key_word,word_freq,tag";
public static String IMPORT_CSV_HEADERS_ASSIST = "id,norm_address,search_address,key1,type1,match_id,match_x,match_y,gl_level,filter,match_addr,match_key,key_index";
public static String PATH = "";
public static String FILENAME = "result";

// public static final String DETAIL = "detail";

ApplicationContext ctx = new ClassPathXmlApplicationContext("spring.xml");
JdbcTemplate jdbcTemplate = (JdbcTemplate) ctx.getBean("jdbcTemplate");
List<String[]> csvRows;
//String method;
String dataPackageFile;

private void doReadFile(String fileName){

//Set<String> uid = new HashSet<>();
if(fileName.indexOf("detail")>0){
table = "tb_detail";
}else{
if(fileName.endsWith("1.csv")){
table = "tb_assist1";
}else if(fileName.endsWith("2.csv")){
table = "tb_assist2";
}else if(fileName.endsWith("3.csv")){
table = "tb_assist3";
}else{
table = "tb_assist4";
}
}
this.dataPackageFile = fileName;
logger.info("本次操作表对象\r\n****************\r\n" + table + "\r\n****************");

CsvIO csvReader = new CsvIO();

logger.info("开始读取文本对象");
if(table.equals("tb_detail")){
csvReader.addHeadFieldString(IMPORT_CSV_HEADERS_DETAIL);
}else{
csvReader.addHeadFieldString(IMPORT_CSV_HEADERS_ASSIST);
}

csvRows = csvReader.read(dataPackageFile, true);
// List<String[]> csvRowsTmp = new ArrayList<>();
// for(String[] line : csvRows){
// String id = line[0];
// if(!uid.contains(id)) {
// csvRowsTmp.add(line);
// uid.add(id);
// }else{
// uid.add(id);
// }
// }

addNormalTable();
}

public void operateTable(String[] args) {
//新增数据前,先清空之前表数据
delTable();
long start_time = new Date().getTime();
// String file1 = args[0];
for(String fileName:args){
if(!fileName.contains(".csv")){
PATH = fileName;
continue;
}
doReadFile(fileName);
}

try {
String sql = "SELECT * FROM tb_detail WHERE id NOT IN (select distinct(id) from tb_assist1) AND id NOT IN (select distinct(id) from tb_assist2) AND id NOT IN (select distinct(id) from tb_assist3) AND id NOT IN (select distinct(id) from tb_assist4)";
jdbcTemplate.beginTranstaion();
List<Map<String,Object>> list = jdbcTemplate.queryForList(sql);
AssembledData(list);
} catch (Exception e) {
jdbcTemplate.rollback();
logger.warning("执行出错,数据被回滚" + e.getLocalizedMessage());
}
logger.info("任务执行完成!耗时【" + (new Date().getTime() - start_time) / 1000 + "】秒");
}

//导出CSV文件
public static File createCSVFile(List<Object> head,List<List<Object>> dataList, String outPutPath, String filename) {

File csvFile = null;
BufferedWriter csvWtriter = null;
try {
//文件储存位置
csvFile = new File(outPutPath + File.separator + filename + ".csv");
File parent = csvFile.getParentFile();
if (parent != null && !parent.exists()) {
parent.mkdirs();
}
csvFile.createNewFile();

// GB2312使正确读取分隔符","
csvWtriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(csvFile), "GB2312"), 1024);
// 写入文件头部
writeRow(head, csvWtriter);
// 写入文件内容
for (List<Object> row : dataList) {
writeRow(row, csvWtriter);
}
csvWtriter.flush();
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
csvWtriter.close();
} catch (IOException e) {
e.printStackTrace();
}
}
return csvFile;
}
/**
* 写一行数据方法
*
* @param row
* @param csvWriter
* @throws IOException
*/
private static void writeRow(List<Object> row, BufferedWriter csvWriter)
throws IOException {
// 写入文件头部
for (Object data : row) {
// StringBuffer sb = new StringBuffer();
String rowStr="";
// rowStr= sb.append("\"").append(data).append("\",").toString();
if(data.toString().contains("\"")){
rowStr="\""+data.toString().replaceAll("\"","\"\"")+"\",";
}else{
rowStr="\""+data.toString()+"\",";
}
csvWriter.write(rowStr);
}
csvWriter.newLine();
}
/**
* 拼装数据方法
* @param retList 数据库查询结果集合
*/
public static void AssembledData(List<Map<String,Object>> retList) {
if(retList==null||retList.size()==0){
//
}else{
List<Object> head=new ArrayList<Object>();
List<List<Object>> dataList=new ArrayList<List<Object>>();
for (Map<String, Object> map : retList) {
List<Object> date=new ArrayList<Object>();
for(Object key:map.keySet()){
date.add(map.get(key));
}
dataList.add(date);
}
for(Object ob:retList.get(0).keySet()){
head.add(ob);
}
System.out.println(head);
//System.out.println(dataList);

createCSVFile(head,dataList,PATH,FILENAME);
}

}

private void delTable() {
try {
jdbcTemplate.beginTranstaion();
//新增前,先删掉之前的数据
String sql1 = "TRUNCATE table tb_assist1";
String sql2 = "TRUNCATE table tb_assist2";
String sql3 = "TRUNCATE table tb_assist3";
String sql4 = "TRUNCATE table tb_assist4";
String sql5 = "TRUNCATE table tb_detail";
jdbcTemplate.execute(sql1);
jdbcTemplate.execute(sql2);
jdbcTemplate.execute(sql3);
jdbcTemplate.execute(sql4);
jdbcTemplate.execute(sql5);
jdbcTemplate.commit();
} catch (Exception e) {
jdbcTemplate.rollback();
logger.warning("执行出错,数据被回滚" + e.getLocalizedMessage());
}
}

private void addNormalTable() {
String sql_insert = "";
ReadColumnAdd readColumns = null;
if(table.equals("tb_detail")){
sql_insert = "insert into " + table + " (id,old_id,address_id,norm_address,sim_address,adcode,sss_word,address_freq,dept_code,zno_code,team_code,worker_code,group_id,key_word,word_freq,tag) values " +
"(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)";
readColumns = new ReadColumnAdd(csvRows).invoke();
}else{
sql_insert = "insert into " + table + " (id,norm_address,search_address,key1,type1,match_id,match_x,match_y,gl_level,filter,match_addr,match_key,key_index) values " +
"(?,?,?,?,?,?,?,?,?,?,?,?,?)";
readColumns = new ReadColumnAdd(csvRows).invoke1();
}

List<Object[]> values = readColumns.values;
try {
jdbcTemplate.beginTranstaion();
//新增前,先删掉之前的数据
/*String sql = "DELETE FROM tb_assist1,tb_assist2,tb_assist3,tb_assist4,tb_detail USING tb_assist1,tb_assist2,tb_assist3,tb_assist4,tb_detail";
jdbcTemplate.execute(sql);
jdbcTemplate.commit();*/
if (values.size() > 0) {
int count = 100000;
int maxSize = values.size() / count + 1;
for (int i = 0; i < maxSize; i++) {
int start = i * count;
int end = (start + count) > values.size() ? values.size() : (start + count);
List<Object[]> subList = values.subList(start, end);
jdbcTemplate.batchUpdate(sql_insert, subList);
// logger.info("执行新增【" + end + "】条数据");
logger.info("执行新增【" + end/count + "】万条数据");
}
}
jdbcTemplate.commit();
} catch (Exception e) {
jdbcTemplate.rollback();
logger.warning("执行出错,数据被回滚" + e.getLocalizedMessage());
}
}

private class ReadColumnAdd {
private List<String[]> list;
private Object[] value;
private List<Object[]> values;

public ReadColumnAdd(List<String[]> list) {
this.list = list;
}

public Object[] getValue() {
return value;
}

public ReadColumnAdd invoke() {
this.values = new ArrayList<Object[]>();
for (int index = 0; index < list.size(); index++) {
String[] line = list.get(index);
String id = line[0];
if (StringUtils.isBlank(id))
continue;
String old_id = line[1];
String address_id = line[2];
String norm_address = line[3];
String sim_address = line[4];
String adcode = line[5];
String sss_word = line[6];
String address_freq = line[7];
String dept_code = line[8];
String zno_code = line[9];
String team_code = line[10];
String worker_code = line[11];
String group_id = line[12];
String key_word = line[13];
String word_freq = line[14];
String tag = line[15];
value = new Object[]{id,old_id,
address_id, norm_address, sim_address, adcode, sss_word, address_freq, dept_code, zno_code,
team_code, worker_code,
group_id, key_word, word_freq, tag};
this.values.add(value);
}
return this;
}

public ReadColumnAdd invoke1() {
this.values = new ArrayList<Object[]>();
for (int index = 0; index < list.size(); index++) {
String[] line = list.get(index);
//,,,,,,,,,,
String id = line[0];
if (StringUtils.isBlank(id))
continue;
String norm_address = line[1];
String search_address = line[2];
String key1 = line[3];
String type1 = line[4];
String match_id = line[5];
String match_x = line[6];
String match_y = line[7];
String gl_level = line[8];
String filter = line[9];
String match_addr = line[10];
String match_key = line[11];
String key_index = line[12];
value = new Object[]{id,norm_address,search_address,key1,type1,match_id,match_x,match_y,gl_level,filter,match_addr,match_key,key_index};
this.values.add(value);
}
return this;
}
}
} 

打包后,直接运行startup.bat文件即可

set csvpath=e:\task\10.202.15.159-1701.csv e:\task\10.202.15.159-1702.csv e:\task\10.202.15.159-1703.csv e:\task\10.202.15.159-1704.csv e:\task\755_detail.csv d:\new
java -Dfile.encoding=gbk -jar tool-1.0.jar %csvpath%
@pause

这里主要是对文件路径的一些配置,以及调用jar文件

 
 



posted @ 2017-07-19 15:28  xmiaomiao1019  阅读(350)  评论(1编辑  收藏  举报