将大量数据批量插入Oracle表的类,支持停止续传
之前用create table select * from XXTable无疑是创建庞大表的最快方案之一,但是数据重复率是个问题,且数据难以操控。
于是我在之前批量插数据的基础上更新了一个类,让它具有了Resume的能力,这样可以利用碎片时间能插一点是一点。
以后此类还可能改进,先留一个版本在这里。
数据库连接参数类:
class DBParam { public final static String Driver = "oracle.jdbc.driver.OracleDriver"; public final static String DbUrl = "jdbc:oracle:thin:@127.0.0.1:1521:orcl"; public final static String User = "ufo"; public final static String Pswd = "1234"; }
HugeTbBatchInserter类:
import java.sql.Connection; import java.sql.DriverManager; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Calendar; import java.util.Date; import java.util.List; import java.util.Random; class TypeField{ String type; String field; } // Insert huge records to a table public class HugeTbBatchInserter { private final int BatchSize=250;// Batch insert size,可以根据机器性能提高 private final int Total_Record_Count=100000000;// 最好是BatchSize的整倍数 // 如果是多个表,扩充数组即可 // PK:主键 CH:文字 DT:Datetime,RND:百以内随机数 还可以根据需要扩充代号,在getInsertSql函数中则根据代号来设置值 private final String[][] tableArray= { {"score:"+Total_Record_Count,"PK:ID","RND:stuid","RND:courseid","RND:score"}, }; /** * 批量插值 */ public void batchInsert() { Connection conn = null; Statement stmt = null; try{ Class.forName(DBParam.Driver).newInstance(); conn = DriverManager.getConnection(DBParam.DbUrl, DBParam.User, DBParam.Pswd); stmt = conn.createStatement(); System.out.println("Begin to access "+DBParam.DbUrl+" as "+DBParam.User+"..."); for(String[] innerArr:tableArray) { String tableName=innerArr[0].split(":")[0]; System.out.println("Table:"+tableName); int existCount=fetchExistCount(tableName,stmt); System.out.println("Exist record count:"+existCount); int maxId=fetchMaxId(tableName,stmt); System.out.println("Max id:"+maxId); int count=Integer.parseInt(innerArr[0].split(":")[1])-existCount; System.out.println("准备向表"+tableName+"插入"+count+"条记录."); // 是否需要插值前先清空,自行判断再放开 //truncateTable(tableName,stmt); // 真正插入数据 insertTestDataTo(tableName,maxId+1,count,innerArr,stmt); } } catch (Exception e) { System.out.print(e.getMessage()); } finally { try { stmt.close(); conn.close(); } catch (SQLException e) { System.out.print("Can't close stmt/conn because of " + e.getMessage()); } } } /** * 以当前时间为基准减去数十秒 * @param n * @return */ private static String getDatetimeBefore(int n) { try { Calendar now = Calendar.getInstance(); now.add(Calendar.SECOND,-n*10);//日期减去n*10秒 Date newDate=now.getTime(); SimpleDateFormat sdf=new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); String retval = sdf.format(newDate); return retval; } catch(Exception ex) { ex.printStackTrace(); return null; } } /** * 清空一个表的数据,注意此功能有破坏性,不可恢复,注意备份好数据 * @param tableName * @param conn * @param stmt * @throws SQLException */ private void truncateTable(String tableName,Statement stmt) throws SQLException{ String sql="truncate table "+tableName; stmt.execute(sql); System.out.println("truncated table:"+tableName); } /** * 得到表中已有的最大ID值 * @param tableName * @param conn * @param stmt * @return * @throws SQLException */ private int fetchMaxId(String tableName,Statement stmt) throws SQLException{ String sql="select max(id) as max from "+tableName+""; ResultSet rs = stmt.executeQuery(sql); while (rs.next()) { int max = rs.getInt("max"); return max; } return 0; } /** * 得到表中现存数量 * @param tableName * @param conn * @param stmt * @return * @throws SQLException */ private int fetchExistCount(String tableName,Statement stmt) throws SQLException{ String sql="select count(*) as cnt from "+tableName+""; ResultSet rs = stmt.executeQuery(sql); while (rs.next()) { int cnt = rs.getInt("cnt"); return cnt; } return 0; } /** * 向一个表插入数据 * @param tableName * @param count * @param innerArr * @param conn * @param stmt * @throws SQLException */ private void insertTestDataTo(String tableName,int startId,int count,String[] innerArr,Statement stmt) throws SQLException{ // 得到字段名和字段类型 List<TypeField> typefields=new ArrayList<TypeField>(); for(int i=1;i<innerArr.length;i++) { String temp=innerArr[i]; String[] arrTmp=temp.split(":"); TypeField tf=new TypeField(); tf.type=arrTmp[0]; tf.field=arrTmp[1]; typefields.add(tf); } List<String> fields=new ArrayList<String>(); List<String> values=new ArrayList<String>(); int index=0; for(TypeField tf:typefields) { fields.add(tf.field); values.add("''{"+index+"}''"); index++; } index=0; int times=count/BatchSize; for(int i=0;i<times;i++) { long startTime = System.currentTimeMillis(); StringBuilder sb=new StringBuilder(); sb.append("INSERT ALL "); for(int j=0;j<BatchSize;j++) { index=i*BatchSize+j+startId; sb.append(getInsertSql(tableName,typefields,index)); } sb.append(" select * from dual"); String sql = sb.toString(); stmt.executeUpdate(sql); long endTime = System.currentTimeMillis(); System.out.println("#"+i+"/"+times+" "+BatchSize+" records inserted to Table:'"+tableName+"',time elapsed:"+(endTime-startTime)+"ms."); } } /** * 得到批量插入语句 * @param tableName * @param typefields * @param index * @return */ private String getInsertSql(String tableName,List<TypeField> typefields,int index) { String currTime=getDatetimeBefore(index); StringBuilder sb=new StringBuilder(); sb.append(" INTO "+tableName+"("); List<String> fields=new ArrayList<String>(); for(TypeField tf:typefields) { fields.add(tf.field); } sb.append(String.join(",",fields)); sb.append(") values("); List<String> values=new ArrayList<String>(); for(TypeField tf:typefields) { if(tf.type.equals("PK")) { values.add("'"+String.valueOf(index)+"'"); }else if(tf.type.equals("CH")) { values.add("'0'"); }else if(tf.type.equals("RND")) { values.add("'"+getRND()+"'"); }else if(tf.type.equals("DT")) { values.add("to_date('"+currTime+"','yyyy-MM-dd HH24:mi:ss')"); } } sb.append(String.join(",",values)); sb.append(")"); String insertSql=sb.toString(); return insertSql; } private static String getRND() { return getRandom(0,100); } private static String getRandom(int min, int max){ Random random = new Random(); int s = random.nextInt(max) % (max - min + 1) + min; return String.valueOf(s); } /** * 将秒转化为日时分秒 * @param secondCount * @return */ private static String sec2DHMS(long secondCount) { String retval = null; long days = secondCount / (60 * 60 * 24); long hours = (secondCount % (60 * 60 * 24)) / (60 * 60); long minutes = (secondCount % (60 * 60)) / 60; long seconds = secondCount % 60; String strSeconds=""; if(seconds!=0) { strSeconds=seconds + "s"; } if (days > 0) { retval = days + "d" + hours + "h" + minutes + "m" + strSeconds; } else if (hours > 0) { retval = hours + "h" + minutes + "m" + strSeconds; } else if (minutes > 0) { retval = minutes + "m" + strSeconds; } else { retval = strSeconds; } return retval; } public static void main(String[] args) { HugeTbBatchInserter mi=new HugeTbBatchInserter(); long startTime = System.currentTimeMillis(); mi.batchInsert(); long endTime = System.currentTimeMillis(); System.out.println("Time elapsed:" + sec2DHMS((endTime - startTime)/1000) ); } }
这个类运行起来是这样的:
Begin to access jdbc:oracle:thin:@127.0.0.1:1521:orcl as ufo... Table:score Exist record count:3351500 Max id:3351499 准备向表score插入96648500条记录. #0/386594 250 records inserted to Table:'score',time elapsed:284ms. #1/386594 250 records inserted to Table:'score',time elapsed:282ms. #2/386594 250 records inserted to Table:'score',time elapsed:324ms. #3/386594 250 records inserted to Table:'score',time elapsed:284ms. #4/386594 250 records inserted to Table:'score',time elapsed:302ms. #5/386594 250 records inserted to Table:'score',time elapsed:330ms. #6/386594 250 records inserted to Table:'score',time elapsed:291ms. #7/386594 250 records inserted to Table:'score',time elapsed:335ms. #8/386594 250 records inserted to Table:'score',time elapsed:372ms. #9/386594 250 records inserted to Table:'score',time elapsed:374ms.
下面这个类虽然更快些,但插入总量有限,需要改进,也留一个版本在这里吧:
import java.sql.Connection; import java.sql.DriverManager; import java.sql.SQLException; import java.sql.Statement; import java.text.DecimalFormat; class DBParam { public final static String Driver = "oracle.jdbc.driver.OracleDriver"; public final static String DbUrl = "jdbc:oracle:thin:@127.0.0.1:1521:orcl"; public final static String User = "ufo"; public final static String Pswd = "1234"; } // Insert records to srcore table public class ScoreInserter { private final String Table="score"; private final int Total=1000000; public boolean fillTable() { Connection conn = null; Statement stmt = null; try{ Class.forName(DBParam.Driver).newInstance(); conn = DriverManager.getConnection(DBParam.DbUrl, DBParam.User, DBParam.Pswd); conn.setAutoCommit(false); stmt = conn.createStatement(); long startMs = System.currentTimeMillis(); clearTable(stmt,conn); fillDataInTable(stmt,conn); long endMs = System.currentTimeMillis(); System.out.println("It takes "+ms2DHMS(startMs,endMs)+" to fill "+toEastNumFormat(Total)+" records to table:'"+Table+"'."); } catch (Exception e) { e.printStackTrace(); } finally { try { stmt.close(); conn.close(); } catch (SQLException e) { System.out.print("Can't close stmt/conn because of " + e.getMessage()); } } return false; } private void clearTable(Statement stmt,Connection conn) throws SQLException { stmt.executeUpdate("truncate table "+Table); conn.commit(); System.out.println("Cleared table:'"+Table+"'."); } private void fillDataInTable(Statement stmt,Connection conn) throws SQLException { StringBuilder sb=new StringBuilder(); sb.append(" Insert into "+Table); sb.append(" select dbms_random.value(0,200),dbms_random.value(1,10),dbms_random.value(0,101) from dual "); sb.append(" connect by level<="+Total); sb.append(" order by dbms_random.random"); String sql=sb.toString(); stmt.executeUpdate(sql); conn.commit(); } // 将整数在万分位以逗号分隔表示 public static String toEastNumFormat(long number) { DecimalFormat df = new DecimalFormat("#,####"); return df.format(number); } // change seconds to DayHourMinuteSecond format private static String ms2DHMS(long startMs, long endMs) { String retval = null; long secondCount = (endMs - startMs) / 1000; String ms = (endMs - startMs) % 1000 + "ms"; long days = secondCount / (60 * 60 * 24); long hours = (secondCount % (60 * 60 * 24)) / (60 * 60); long minutes = (secondCount % (60 * 60)) / 60; long seconds = secondCount % 60; if (days > 0) { retval = days + "d" + hours + "h" + minutes + "m" + seconds + "s"; } else if (hours > 0) { retval = hours + "h" + minutes + "m" + seconds + "s"; } else if (minutes > 0) { retval = minutes + "m" + seconds + "s"; } else { retval = seconds + "s"; } return retval + ms; } public static void main(String[] args) { ScoreInserter si=new ScoreInserter(); si.fillTable(); } }
--END-- 2020年1月4日16点57分