将大量数据批量插入Oracle表的类,支持停止续传

之前用create table select * from XXTable无疑是创建庞大表的最快方案之一,但是数据重复率是个问题,且数据难以操控。

于是我在之前批量插数据的基础上更新了一个类,让它具有了Resume的能力,这样可以利用碎片时间能插一点是一点。

以后此类还可能改进,先留一个版本在这里。

 

数据库连接参数类:

class DBParam {
    public final static String Driver = "oracle.jdbc.driver.OracleDriver";
    public final static String DbUrl = "jdbc:oracle:thin:@127.0.0.1:1521:orcl";
    public final static String User = "ufo";
    public final static String Pswd = "1234";
}

HugeTbBatchInserter类:

import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.List;
import java.util.Random;

class TypeField{
    String type;
    String field;
}

// Insert huge records to a table
public class HugeTbBatchInserter {
    private final int BatchSize=250;// Batch insert size,可以根据机器性能提高
    private final int Total_Record_Count=100000000;// 最好是BatchSize的整倍数
    
    // 如果是多个表,扩充数组即可
    // PK:主键 CH:文字 DT:Datetime,RND:百以内随机数 还可以根据需要扩充代号,在getInsertSql函数中则根据代号来设置值
    private final String[][] tableArray= {
        {"score:"+Total_Record_Count,"PK:ID","RND:stuid","RND:courseid","RND:score"},
    };
    
    /**
     * 批量插值
     */
    public void batchInsert() {
        Connection conn = null;
        Statement stmt = null;
        
        try{
            Class.forName(DBParam.Driver).newInstance();
            conn = DriverManager.getConnection(DBParam.DbUrl, DBParam.User, DBParam.Pswd);
            stmt = conn.createStatement();
            System.out.println("Begin to access "+DBParam.DbUrl+" as "+DBParam.User+"...");
            
            for(String[] innerArr:tableArray) {
                String tableName=innerArr[0].split(":")[0];
                System.out.println("Table:"+tableName);
                
                int existCount=fetchExistCount(tableName,stmt);
                System.out.println("Exist record count:"+existCount);
                
                int maxId=fetchMaxId(tableName,stmt);
                System.out.println("Max id:"+maxId);
                
                int count=Integer.parseInt(innerArr[0].split(":")[1])-existCount;
                System.out.println("准备向表"+tableName+"插入"+count+"条记录.");
                
                // 是否需要插值前先清空,自行判断再放开
                //truncateTable(tableName,stmt);
                
                // 真正插入数据
                insertTestDataTo(tableName,maxId+1,count,innerArr,stmt);
            }
        } catch (Exception e) {
            System.out.print(e.getMessage());
        } finally {
            try {
                stmt.close();
                conn.close();
            } catch (SQLException e) {
                System.out.print("Can't close stmt/conn because of " + e.getMessage());
            }
        }
    }
    
    /**
     * 以当前时间为基准减去数十秒
     * @param n
     * @return
     */
    private static String getDatetimeBefore(int n) {
        try {
            Calendar now = Calendar.getInstance();
            now.add(Calendar.SECOND,-n*10);//日期减去n*10秒
            
            Date newDate=now.getTime();
            
            SimpleDateFormat sdf=new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
            String retval = sdf.format(newDate);
            return retval;
        }
        catch(Exception ex) {
            ex.printStackTrace();
            return null;
        }
    }
    
    /**
     * 清空一个表的数据,注意此功能有破坏性,不可恢复,注意备份好数据
     * @param tableName
     * @param conn
     * @param stmt
     * @throws SQLException
     */
    private void truncateTable(String tableName,Statement stmt) throws SQLException{
        String sql="truncate table "+tableName;
        stmt.execute(sql);
        System.out.println("truncated table:"+tableName);
    }
    
    /**
     * 得到表中已有的最大ID值
     * @param tableName
     * @param conn
     * @param stmt
     * @return
     * @throws SQLException
     */
    private int fetchMaxId(String tableName,Statement stmt)  throws SQLException{
        String sql="select max(id) as max from "+tableName+"";
        
        ResultSet rs = stmt.executeQuery(sql);
        
        while (rs.next()) {
            int max = rs.getInt("max");
            return max;
        }
        
        return 0;
    }
    
    /**
     * 得到表中现存数量
     * @param tableName
     * @param conn
     * @param stmt
     * @return
     * @throws SQLException
     */
    private int fetchExistCount(String tableName,Statement stmt)  throws SQLException{
        String sql="select count(*) as cnt from "+tableName+"";
        
        ResultSet rs = stmt.executeQuery(sql);
        
        while (rs.next()) {
            int cnt = rs.getInt("cnt");
            return cnt;
        }
        
        return 0;
    }
    
    /**
     * 向一个表插入数据
     * @param tableName
     * @param count
     * @param innerArr
     * @param conn
     * @param stmt
     * @throws SQLException
     */
    private void insertTestDataTo(String tableName,int startId,int count,String[] innerArr,Statement stmt) throws SQLException{
        // 得到字段名和字段类型
        List<TypeField> typefields=new ArrayList<TypeField>();
        for(int i=1;i<innerArr.length;i++) {
            String temp=innerArr[i];
            String[] arrTmp=temp.split(":");
            
            TypeField tf=new TypeField();
            tf.type=arrTmp[0];
            tf.field=arrTmp[1];
            typefields.add(tf);
        }
        
        List<String> fields=new ArrayList<String>();
        List<String> values=new ArrayList<String>();
        int index=0;
        for(TypeField tf:typefields) {
            fields.add(tf.field);
            values.add("''{"+index+"}''");
            index++;
        }
        
        index=0;
        int times=count/BatchSize;
        for(int i=0;i<times;i++) {
            long startTime = System.currentTimeMillis();
            StringBuilder sb=new StringBuilder();
            sb.append("INSERT ALL ");
            
            for(int j=0;j<BatchSize;j++) {
                index=i*BatchSize+j+startId;
                sb.append(getInsertSql(tableName,typefields,index));
            }
            
            sb.append(" select * from dual");
            String sql = sb.toString();
            stmt.executeUpdate(sql);
            
            long endTime = System.currentTimeMillis();
            System.out.println("#"+i+"/"+times+" "+BatchSize+" records inserted to Table:'"+tableName+"',time elapsed:"+(endTime-startTime)+"ms.");
        }
    }
    
    /**
     * 得到批量插入语句
     * @param tableName
     * @param typefields
     * @param index
     * @return
     */
    private String getInsertSql(String tableName,List<TypeField> typefields,int index) {
        String currTime=getDatetimeBefore(index);
        
        StringBuilder sb=new StringBuilder();
        sb.append(" INTO "+tableName+"(");
        List<String> fields=new ArrayList<String>();
        for(TypeField tf:typefields) {
            fields.add(tf.field);
        }
        sb.append(String.join(",",fields));
        
        sb.append(") values(");
        List<String> values=new ArrayList<String>();
        for(TypeField tf:typefields) {
            if(tf.type.equals("PK")) {
                values.add("'"+String.valueOf(index)+"'");
            }else if(tf.type.equals("CH")) {
                values.add("'0'");
            }else if(tf.type.equals("RND")) {
                values.add("'"+getRND()+"'");
            }else if(tf.type.equals("DT")) {
                values.add("to_date('"+currTime+"','yyyy-MM-dd HH24:mi:ss')");
            }
        }
        sb.append(String.join(",",values));
        sb.append(")");
        
        String insertSql=sb.toString();
        return insertSql;
    }
    
    private static String getRND() {
        return getRandom(0,100);
    }
    
    private static String getRandom(int min, int max){
        Random random = new Random();
        int s = random.nextInt(max) % (max - min + 1) + min;
        return String.valueOf(s);
    }
    

    
    /**
     * 将秒转化为日时分秒
     * @param secondCount
     * @return
     */
    private static String sec2DHMS(long secondCount) {
        String retval = null;
    
        long days = secondCount / (60 * 60 * 24);
        long hours = (secondCount % (60 * 60 * 24)) / (60 * 60);
        long minutes = (secondCount % (60 * 60)) / 60;
        long seconds = secondCount % 60;
        
        String strSeconds="";
        if(seconds!=0) {
            strSeconds=seconds + "s";
        }
    
        if (days > 0) {
            retval = days + "d" + hours + "h" + minutes + "m" + strSeconds;
        } else if (hours > 0) {
            retval = hours + "h" + minutes + "m" + strSeconds;
        } else if (minutes > 0) {
            retval = minutes + "m" + strSeconds;
        } else {
            retval = strSeconds;
        }
    
        return retval;
    }
    
    public static void main(String[] args) {
        HugeTbBatchInserter mi=new HugeTbBatchInserter();
        long startTime = System.currentTimeMillis();
        mi.batchInsert();
        long endTime = System.currentTimeMillis();
        
        System.out.println("Time elapsed:" + sec2DHMS((endTime - startTime)/1000) );
    }
}

这个类运行起来是这样的: 

Begin to access jdbc:oracle:thin:@127.0.0.1:1521:orcl as ufo...
Table:score
Exist record count:3351500
Max id:3351499
准备向表score插入96648500条记录.
#0/386594 250 records inserted to Table:'score',time elapsed:284ms.
#1/386594 250 records inserted to Table:'score',time elapsed:282ms.
#2/386594 250 records inserted to Table:'score',time elapsed:324ms.
#3/386594 250 records inserted to Table:'score',time elapsed:284ms.
#4/386594 250 records inserted to Table:'score',time elapsed:302ms.
#5/386594 250 records inserted to Table:'score',time elapsed:330ms.
#6/386594 250 records inserted to Table:'score',time elapsed:291ms.
#7/386594 250 records inserted to Table:'score',time elapsed:335ms.
#8/386594 250 records inserted to Table:'score',time elapsed:372ms.
#9/386594 250 records inserted to Table:'score',time elapsed:374ms.

 

下面这个类虽然更快些,但插入总量有限,需要改进,也留一个版本在这里吧:

import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.SQLException;
import java.sql.Statement;
import java.text.DecimalFormat;

class DBParam {
    public final static String Driver = "oracle.jdbc.driver.OracleDriver";
    public final static String DbUrl = "jdbc:oracle:thin:@127.0.0.1:1521:orcl";
    public final static String User = "ufo";
    public final static String Pswd = "1234";
}
// Insert records to srcore table
public class ScoreInserter {
    private final String Table="score";
    private final int Total=1000000;
    
    public boolean fillTable() {
        Connection conn = null;
        Statement stmt = null;
        
        try{
            Class.forName(DBParam.Driver).newInstance();
            conn = DriverManager.getConnection(DBParam.DbUrl, DBParam.User, DBParam.Pswd);
            conn.setAutoCommit(false);
            stmt = conn.createStatement();
            
            long startMs = System.currentTimeMillis();
            clearTable(stmt,conn);
            fillDataInTable(stmt,conn);
            
            
            long endMs = System.currentTimeMillis();
            System.out.println("It takes "+ms2DHMS(startMs,endMs)+" to fill "+toEastNumFormat(Total)+" records to table:'"+Table+"'.");
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            try {
                stmt.close();
                conn.close();
            } catch (SQLException e) {
                System.out.print("Can't close stmt/conn because of " + e.getMessage());
            }
        }
        
        return false;
    }
    
    private void clearTable(Statement stmt,Connection conn) throws SQLException {
        stmt.executeUpdate("truncate table "+Table);
        conn.commit();
        System.out.println("Cleared table:'"+Table+"'.");
    }
    
    private void fillDataInTable(Statement stmt,Connection conn) throws SQLException {
        StringBuilder sb=new StringBuilder();
        sb.append(" Insert into "+Table);
        sb.append(" select dbms_random.value(0,200),dbms_random.value(1,10),dbms_random.value(0,101) from dual ");
        sb.append(" connect by level<="+Total);
        sb.append(" order by dbms_random.random");
        
        String sql=sb.toString();
        stmt.executeUpdate(sql);
        conn.commit();
        
    }
    
    // 将整数在万分位以逗号分隔表示
    public static String toEastNumFormat(long number) {
        DecimalFormat df = new DecimalFormat("#,####");
        return df.format(number);
    }
    
    // change seconds to DayHourMinuteSecond format
    private static String ms2DHMS(long startMs, long endMs) {
        String retval = null;
        long secondCount = (endMs - startMs) / 1000;
        String ms = (endMs - startMs) % 1000 + "ms";

        long days = secondCount / (60 * 60 * 24);
        long hours = (secondCount % (60 * 60 * 24)) / (60 * 60);
        long minutes = (secondCount % (60 * 60)) / 60;
        long seconds = secondCount % 60;

        if (days > 0) {
            retval = days + "d" + hours + "h" + minutes + "m" + seconds + "s";
        } else if (hours > 0) {
            retval = hours + "h" + minutes + "m" + seconds + "s";
        } else if (minutes > 0) {
            retval = minutes + "m" + seconds + "s";
        } else {
            retval = seconds + "s";
        }

        return retval + ms;
    }
    
    public static void main(String[] args) {
        ScoreInserter si=new ScoreInserter();
        si.fillTable();
    }
}

--END-- 2020年1月4日16点57分

posted @ 2020-01-04 16:57  逆火狂飙  阅读(362)  评论(0编辑  收藏  举报
生当作人杰 死亦为鬼雄 至今思项羽 不肯过江东