读取日志并存入数据库

//逐行读取日志记录
package
com.expai.test; import java.io.BufferedReader; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; import com.expai.admin.util.logIn.InsertToDB; import com.expai.admin.util.logIn.ParseString; public class ReadData { private InsertToDB it=new InsertToDB(); private ParseString ps = new ParseString(); private static Map<String,Map<String,Object>> hashmap=new HashMap<String,Map<String,Object>>(); private static String regex="[0-9a-zA-Z]+((\\.com)|(\\.cn)|(\\.org)|(\\.net)|(\\.edu)|(\\.com.cn))"; public static void main(String[] args) throws IOException{ try { new ReadData().readFile("C:/Users/Administrator/Desktop/adverview.log.20140630","urlno",null,null); } catch (Exception e) { e.printStackTrace(); } } public void readFile(String filePath,String query,String[] parm,Map<String,String> map) throws IOException { List<Map<String,Object>> listMap = new ArrayList<Map<String,Object>>(); String str = "ip"+","+"address"+","+"network"+","+"imageUrl"+","+"urlno"+","+"visitTime"+","+"shortUrl"; String[] data={"imageUrl","urlno","keyword","shortUrl","ip","visitTime","domainId"}; int count = 0; BufferedReader br = new BufferedReader(new InputStreamReader( new FileInputStream(filePath),"gbk")); long startTime=System.currentTimeMillis();//统计计时开始 Integer n = 0; String strLine = br.readLine(); while(null!=strLine){ Map<String,Object> mapNew = ps.everyLogRead(strLine,data); if(mapNew.get("domainId")!=null){ str = str+",domainId"; } if(null!=map && map.size()>0){ Set<String> set=map.keySet(); for (Iterator<String> iterator = set.iterator(); iterator.hasNext();) { String key = (String) iterator.next(); String value=map.get(key); mapNew.put(value, mapNew.get(key)); } } hashmap.put( (String) mapNew.get(query),mapNew); if(null!=hashmap.get((String) mapNew.get(query))){ mapNew.put("flagId", hashmap.get((String) mapNew.get(query)).get("flagId")); } String imageUrl = (String)mapNew.get("imageUrl"); Pattern p = Pattern.compile(regex); Matcher m = p.matcher(imageUrl); List<String> strList = new ArrayList<String>(); while(m.find()){ strList.add(m.group()); } String shortUrl = strList.toString(); shortUrl = shortUrl.substring(1,shortUrl.length()-1); mapNew.put("shortUrl", shortUrl); listMap.add(mapNew); if(listMap!=null&&listMap.size()>0){ count ++ ; if(count%1000==0){ n = it.insertInto(listMap, "tb_advertise_appertime_detail",str.split(",")); System.err.println(" excute sql " + n); listMap = new ArrayList<Map<String,Object>>(); } } strLine = br.readLine(); str = "ip"+","+"address"+","+"network"+","+"imageUrl"+","+"urlno"+","+"visitTime"+","+"shortUrl"; } if(listMap!=null&&listMap.size()<1000){ n = it.insertInto(listMap, "tb_advertise_appertime_detail",str.split(",")); } long endTime=System.currentTimeMillis();//统计计时结束 System.out.println("read line:"+count); System.out.println("cost time:"+(endTime-startTime)+"ms"); br.close(); } }
//每1000条封装为一条sql插入数据库
public
Integer insertInto(List<Map<String,Object>> list,String tab,String[] parms){ int count=0; if(list!=null&&list.size()>0){ String sql=""; for (int i=0;i<list.size();i++) { StringBuffer insertSql=new StringBuffer(); Map<String,Object> map= list.get(i); if(null!=map){ insertSql.append(" insert into ").append(tab).append(" ( "); for (int a=0;a<parms.length;a++) { insertSql.append(parms[a]).append(" , "); } String insql=insertSql.substring(0,insertSql.lastIndexOf(","))+" ) values( "; insertSql=new StringBuffer(); for (int a=0;a<parms.length;a++) { insertSql.append("'").append(map.get(parms[a])).append("'").append(" , "); } insql=insql+insertSql.substring(0,insertSql.lastIndexOf(","))+" ) ;"; sql=sql+insql+"\n"; } } if(null!=sql){ // System.out.println(sql); count=dm.executUpdate(sql); } } log.info("插入行数:"+count); return count; }

 

posted @ 2014-07-01 20:05  箜篌  阅读(864)  评论(0编辑  收藏  举报