java读取文件批量插入记录

只是一个例子,方便以后查阅。

import ey.db.oracle.OracleHelper;
import ey.db.type.*;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileFilter;
import java.io.FileInputStream;
import java.io.FileOutputStream;
//import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URLDecoder;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.PreparedStatement;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.StringTokenizer;
import java.util.regex.Pattern;
import java.util.regex.Matcher;

import oracle.jdbc.OracleTypes;

public class ImpCNetData {
    static Configuration conf=new Configuration("/ProjectConfig.properties");
    static String db_host=conf.getValue("cdma_host");
    static String db_user=conf.getValue("cdma_user");
    static String db_passwd=conf.getValue("cdma_passwd");
    static String impFilePath=conf.getValue("ImpFilePath");
    static String impFileLog=conf.getValue("ImpFileLog");
    static String lastKeepString=conf.getValue("LastKeepString");
    static String lastFilterString=conf.getValue("LastFilterString");
    static String connStr="";
    static Pattern lastFilterRex=null;
    static Pattern lastKeepRex=null;
    private final String oracleDriverName = "oracle.jdbc.driver.OracleDriver";
    //批量插入数据
    public void InsertDataBatch(List<CNetData> cnds) throws Exception
    {        
        Connection conn=null;        
        try {  
              Class.forName(oracleDriverName);  
              conn = DriverManager.getConnection(db_host, db_user, db_passwd);  
              conn.setAutoCommit(false);  
              String sql = "insert into cn_visit(visitid, mobile, url, visittime, onlinetime, desip, desport, mobileip, mobileport,urltypeid, comefrom, bsid, username,housecityid) VALUES(seq_cn_visitid.Nextval,?,?,?,?,?,?,?,?,?,?,?,?,?)";  
              PreparedStatement prest = conn.prepareStatement(sql,ResultSet.TYPE_SCROLL_SENSITIVE,ResultSet.CONCUR_READ_ONLY);
              
              for(int x = 0; x < cnds.size(); x++){
                 CNetData cnd=cnds.get(x);
                 prest.setString(1, cnd.getMobile());  
                 prest.setString(2, cnd.getUrl());  
                 prest.setString(3, cnd.getVisitTime());  
                 prest.setString(4, cnd.getOnlineTime());  
                 prest.setString(5,cnd.getDesIp());  
                 prest.setInt(6,cnd.getDesPort());
                 prest.setString(7,cnd.getMobileIp());
                 prest.setInt(8,cnd.getMobilePort());
                 prest.setInt(9,cnd.getFileUrlId());
                 prest.setInt(10,cnd.getComeFrom());
                 prest.setString(11,cnd.getBsid());
                 prest.setString(12,cnd.getUsername());
                 prest.setInt(13,cnd.getHouseCityId());
                 prest.addBatch();  
              }  
              prest.executeBatch();  
              conn.commit();  
              conn.close();  
        } catch (SQLException ex) {  
           ex.printStackTrace();
        } catch (ClassNotFoundException ex) {  
             ex.printStackTrace();  
        }
        catch(Exception ex)
        {
            ex.printStackTrace();
        }
        finally
        {
            if (conn!=null && !conn.isClosed())
                conn.close();
        }
    }    
    //读取网址类型
    public DataTable GetUrlType()
    {
        try
        {
            OracleConnection conn= new OracleConnection(db_host+";"+db_user+";"+db_passwd);
            String cmd="FX114V01_CN_VISIT.GetUrlList";          
            Parameter pds=new Parameter("out_data",OracleTypes.CURSOR,null, ParameterDirection.OUT);        
            Parameter pcode=new Parameter("out_code",OracleTypes.INTEGER,null, ParameterDirection.OUT);        
            DataSet ds=OracleHelper.ExecuteDataSet(conn,CommandType.StoreProcedure, cmd, pds,pcode);
            DataTable dt=ds.Tables[0];
            return dt;
        }
        catch(Exception ex)
        {
            ex.printStackTrace();
        }
        return null;
    }
    //导入数据
    public void ImpData() throws Exception
    {
        //StringBuffer sb=new StringBuffer();
        String tempstr=null;
        String url="";        
        DataTable dt=GetUrlType();//URL分类表
        if(lastFilterString.length()>0 && lastFilterRex==null)
            lastFilterRex=Pattern.compile(lastFilterString);
        if(lastKeepString.length()>0 && lastKeepRex==null)
            lastKeepRex=Pattern.compile(lastKeepString);
        try
        {
            //网址类型不能为空
            if(dt!=null)
            {
                //String path="/usr/hadoop/bigdata/filterurl/part/";            
                File dir=new File(impFilePath);
                File[] files=null;
                if(dir.isDirectory())
                    files=dir.listFiles(new MyFileFilter());            
                for(int i=0;i<files.length;i++)
                {
                    //System.out.println(files[i].getName());
                    List<CNetData> cnds=new ArrayList<CNetData>();
                    FileInputStream fis=new FileInputStream(files[i]);
                    BufferedReader br=new BufferedReader(new InputStreamReader(fis,"UTF-8"));                    
                    //文件逐行读取
                    while((tempstr=br.readLine())!=null)
                    {
                        try
                        {
                            StringTokenizer itr=new StringTokenizer(tempstr,"|");
                            if(itr.hasMoreTokens() && itr.countTokens()==11)
                            {
                                CNetData cnd=new CNetData();
                                String mobile=itr.nextToken().trim();
                                if(mobile.length()>11)
                                    cnd.setMobile(mobile.substring(0,11));
                                else 
                                    cnd.setMobile(mobile);
                                url=itr.nextToken().trim();    
                                
                                //替换URL特殊编码
                                if(url.indexOf("%2F")>=0 || url.indexOf("%3F")>=0)
                                {
                                    url = url.replaceAll("%2F","/");  
                                    url = url.replaceAll("%3A",":");  
                                    url = url.replaceAll("%20"," ");  
                                    url = url.replaceAll("%3F","?");
                                    url = url.replaceAll("%3D","=");
                                }
                                //保留需要的
                                if(lastKeepString.length()>0 && lastKeepRex!=null)
                                {
                                    Matcher lastKeepMatcher=null;
                                    //防止引用或者搜索过来的
                                    if(url.length()<=30)
                                        lastKeepMatcher=lastKeepRex.matcher(url);
                                    else 
                                        lastKeepMatcher=lastKeepRex.matcher(url.substring(0,30));
                                    
                                    //没找到的话跳出本次循环
                                    if(!lastKeepMatcher.find())
                                        continue; //跳出本次循环
                                }
                                //过滤不需要的
                                if(lastFilterString.length()>0 && lastFilterRex!=null)
                                {
                                    Matcher lastMatcher=lastFilterRex.matcher(url);
                                    //找到需要过滤掉的字符串
                                    if(lastMatcher.find())
                                        continue; //跳出本次循环
                                }                                
                                cnd.setUrl(url);
                                cnd.setVisitTime(itr.nextToken());
                                cnd.setMobileIp(itr.nextToken());
                                cnd.setDesIp(itr.nextToken());
                                cnd.setMobilePort(Integer.parseInt(itr.nextToken().trim()));
                                cnd.setDesPort(Integer.parseInt(itr.nextToken().trim()));
                                cnd.setOnlineTime(itr.nextToken());
                                cnd.setBsid(itr.nextToken());
                                cnd.setUsername(itr.nextToken());
                                cnd.setComeFrom(Integer.parseInt(itr.nextToken().trim()));
                                cnd.setFileUrlId(0);//默认值
                                //查找所属URL类型
                                  for(int k=0;k<dt.Rows.length;k++)
                                  {
                                      if(url.indexOf(dt.Rows[k].Columns[1].colValue.toString())>=0)
                                      {
                                          cnd.setFileUrlId(Integer.parseInt(dt.Rows[k].Columns[0].colValue.toString().trim()));
                                          break;
                                      }
                                  }
                                  //判断房源或者链接的城市
                                  /*
                                      21    北京
                                    51    东莞
                                    52    珠海
                                    55    佛山
                                    56    广州
                                    59    惠州
                                    68    深圳
                                    162    武汉
                                    174    长沙
                                    341    上海
                                   * */
                                if(url.indexOf("sz.")>=0||url.indexOf("sz/")>=0)
                                      cnd.setHouseCityId(68);
                                  else if(url.indexOf("gz.")>=0||url.indexOf("gz/")>=0||url.indexOf("gz_")>=0)
                                      cnd.setHouseCityId(56);
                                  else if(url.indexOf("dg.")>=0||url.indexOf("dg/")>=0||url.indexOf("dg_")>=0)
                                      cnd.setHouseCityId(51);
                                  else if(url.indexOf("fs.")>=0||url.indexOf("fs/")>=0||url.indexOf("fs_")>=0)
                                      cnd.setHouseCityId(55);
                                  else if(url.indexOf("zh.")>=0||url.indexOf("zh/")>=0||url.indexOf("zh_")>=0)
                                      cnd.setHouseCityId(52);
                                  else if(url.indexOf("hz.")>=0||url.indexOf("hz/")>=0||url.indexOf("hz_")>=0)
                                      cnd.setHouseCityId(59);
                                 else if(url.indexOf("zs.")>=0||url.indexOf("zs/")>=0||url.indexOf("zs_")>=0)
                                      cnd.setHouseCityId(73);
                                  else if(url.indexOf("bj.")>=0||url.indexOf("bj/")>=0||url.indexOf("bj_")>=0)
                                      cnd.setHouseCityId(21);
                                  else if(url.indexOf("sh.")>=0||url.indexOf("sh/")>=0||url.indexOf("sh_")>=0)
                                      cnd.setHouseCityId(341);
                                 else if(url.indexOf("cs.")>=0||url.indexOf("cs/")>=0||url.indexOf("cs_")>=0)
                                      cnd.setHouseCityId(174);
                                 else if(url.indexOf("wh.")>=0||url.indexOf("wh/")>=0||url.indexOf("wh_")>=0)
                                      cnd.setHouseCityId(162);
                                  else 
                                      cnd.setHouseCityId(0);
                                
                                //上面的方式没找到的情况下才用下面的模式查找
                                if(cnd.getHouseCityId()<=0)
                                {
                                  if(url.contains("sz")||url.contains("shenzhen"))
                                      cnd.setHouseCityId(68);
                                  else if(url.contains("guangzhou"))
                                      cnd.setHouseCityId(56);
                                  else if(url.contains("dg")||url.contains("dongguan"))
                                      cnd.setHouseCityId(51);
                                  else if(url.contains("fs")||url.contains("foshan"))
                                      cnd.setHouseCityId(55);
                                  else if(url.contains("zhuhai"))
                                      cnd.setHouseCityId(52);
                                  else if(url.contains("hz")||url.contains("huizhou"))
                                      cnd.setHouseCityId(59);
                                  else if(url.indexOf("zs")>=0||url.indexOf("zhongshan")>=0)
                                      cnd.setHouseCityId(73);
                                  else if(url.indexOf("bj")>=0||url.indexOf("beijing")>=0)
                                      cnd.setHouseCityId(21);
                                  else if(url.indexOf("shanghai")>=0)
                                      cnd.setHouseCityId(341);
                                 else if(url.indexOf("cs")>=0||url.indexOf("changsha")>=0)
                                      cnd.setHouseCityId(174);
                                 else if(url.indexOf("wh")>=0||url.indexOf("wuhan")>=0)
                                      cnd.setHouseCityId(162);               
                                 else 
                                     cnd.setHouseCityId(0);
                                }
                                cnds.add(cnd);
                            }
                        }
                        catch(Exception ex)
                        {
                            ex.printStackTrace();
                        }
                    }
                    //System.out.println(cnds.size());
                  //插入数据
                    if(cnds.size()>0)
                    {
                        InsertDataBatch(cnds);
                        writeLog(files[i].getName()+"    " +cnds.size());
                        files[i].delete();
                    }
                    br.close();
                    fis.close();                    
                    //break;
                }
            }
        }
        catch(IOException ex)
        {
            System.out.println(ex.getStackTrace());
        }        
    }
     public static void writeLog(String str)
        {
            try
            {
            //String path="/usr/hadoop/bigdata/filterurl/importfile.log";
            File file=new File(impFileLog);
            if(!file.exists())
                file.createNewFile();
            SimpleDateFormat sdf=new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
            FileOutputStream out=new FileOutputStream(file,true); //如果追加方式用true        
            StringBuffer sb=new StringBuffer();
            //sb.append("-----------"+sdf.format(new Date())+"------------\n");
            sb.append(sdf.format(new Date())+"    "+str+"\n");
            out.write(sb.toString().getBytes("utf-8"));//注意需要转换对应的字符集
            out.close();
            }
            catch(IOException ex)
            {
                System.out.println(ex.getStackTrace());
            }
        }    
    public static void main(String[] args) throws Exception {
        try
        {
        ImpCNetData icd=new ImpCNetData();
        icd.ImpData();
        }
        catch(Exception ex)
        {
            ex.printStackTrace();            
        }
    }
}

记录类:

public class CNetData {
    private String mobile;
    public String getMobile() {
        return mobile;
    }
    public void setMobile(String mobile) {
        this.mobile = mobile;
    }
    public String getUrl() {
        return url;
    }
    public void setUrl(String url) {
        this.url = url;
    }
    public String getVisitTime() {
        return visitTime;
    }
    public void setVisitTime(String visitTime) {
        this.visitTime = visitTime;
    }
    public String getOnlineTime() {
        return onlineTime;
    }
    public void setOnlineTime(String onlineTime) {
        this.onlineTime = onlineTime;
    }
    public String getDesIp() {
        return desIp;
    }
    public void setDesIp(String desIp) {
        this.desIp = desIp;
    }
    public int getDesPort() {
        return desPort;
    }
    public void setDesPort(int desPort) {
        this.desPort = desPort;
    }
    public String getMobileIp() {
        return mobileIp;
    }
    public void setMobileIp(String mobileIp) {
        this.mobileIp = mobileIp;
    }
    public int getMobilePort() {
        return mobilePort;
    }
    public void setMobilePort(int mobilePort) {
        this.mobilePort = mobilePort;
    }
    public int getComeFrom() {
        return comeFrom;
    }
    public void setComeFrom(int comeFrom) {
        this.comeFrom = comeFrom;
    }
    public String getBsid() {
        return bsid;
    }
    public void setBsid(String bsid) {
        this.bsid = bsid;
    }
    public String getUsername() {
        return username;
    }
    public void setUsername(String username) {
        this.username = username;
    }
    private String url;
    private String visitTime;
    private String onlineTime;
    private String desIp;
    private int desPort;
    private String mobileIp;
    private int mobilePort;
    private int comeFrom;
    private String bsid;
    private String username;
    private int fileUrlId;
    private int houseCityId;
    public int getHouseCityId() {
        return houseCityId;
    }
    public void setHouseCityId(int houseCityId) {
        this.houseCityId = houseCityId;
    }
    public int getFileUrlId() {
        return fileUrlId;
    }
    public void setFileUrlId(int fileUrlId) {
        this.fileUrlId = fileUrlId;
    }
    
}

获取配置文件类:

import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.util.Properties;
/** 
 * 读取properties文件
 *
 */
public class Configuration
{
 private Properties propertie;
 private InputStream in;
 
 
 /** *//**
 * 初始化Configuration类
 */
 public Configuration()
 {
     propertie = new Properties();
 }
 
 /** *//**
 * 初始化Configuration类
 * @param filePath 要读取的配置文件的路径+名称
 */
 public Configuration(String filePath)
 {
     propertie = new Properties();
     try{
         in =Object.class.getResourceAsStream(filePath);
         propertie.load(in);
         in.close();
     } catch (FileNotFoundException ex){
         System.out.println("读取属性文件--->失败!- 原因:文件路径错误或者文件不存在");
         ex.printStackTrace();
     } catch (IOException ex){
         System.out.println("装载文件--->失败!");
         ex.printStackTrace();
     }
 }//end ReadConfigInfo(...)
 
 /** *//**
 * 重载函数,得到key的值
 * @param key 取得其值的键
 * @return key的值
 */
 public String getValue(String key)
 {
     if(propertie.containsKey(key)){
         String value = propertie.getProperty(key);//得到某一属性的值
         return value;
     }
     else 
     return "";
 }//end getValue(...)
 
 /** *//**
 * 重载函数,得到key的值
 * @param fileName properties文件的路径+文件名
 * @param key 取得其值的键
 * @return key的值
 */
 public String getValue(String fileName, String key)
 {
     try{
         String value = "";
          in = Object.class.getResourceAsStream(fileName);
         propertie.load(in);
         in.close();
     if(propertie.containsKey(key)){
         value = propertie.getProperty(key);
         return value;
     }else
         return value;
     } catch (FileNotFoundException e){
         e.printStackTrace();
         return "";
     } catch (IOException e){
         e.printStackTrace();
         return "";
     } catch (Exception ex){
         ex.printStackTrace();
         return "";
     }
 }//end getValue(...)
}

 

 oraclehelper.jar

ojdbc6

 

posted on 2014-06-12 09:36  ringwang  阅读(7107)  评论(0编辑  收藏  举报