hadoop 数据抽取
#!/bin/bash if [ ! -z $2 ]; then start_time=$1 end_time=$2 else starttime=`date +%Y%m%d%H%M -d '-15 min'` #开始时间 startday=${starttime:0:10} startmi=`expr ${starttime:10:2} / 15 \* 15` if [ $startmi != 0 ];then start_time=$startday$startmi else start_time=$startday"00" fi #结束时间 endtime=`date +%Y%m%d%H%M` endday=${endtime:0:10} endmi=`expr ${endtime:10:2} / 15 \* 15` if [ $endmi != 0 ];then end_time=$endday$endmi else end_time=$endday"00" fi fi echo "数据时间为: $start_time--$end_time" #创建目录 hadoop fs -mkdir cache/O_RE_ST_XDR_PS_GN_HTTP/$start_time #上传数据时间段数据 for file in `ls /opt7/ftp/PS_Gn_HTTP_Event/${start_time:0:4}-${start_time:4:2}-${start_time:6:2}/*${start_time:0:10}*.CHK` do #351_TM07_PSGnHTTPEvent201410021235_3720202.CHK #351_TM07_PSGnHTTPEvent201410021235_3720202.AVL basefile=`basename $file` date=${basefile:22:12} if [ $date -ge $start_time ] && [ $date -lt $end_time ];then avlfile=${file/%CHK/AVL} echo $avlfile cp $avlfile /opt8/gz_data_temp/http/ #hadoop fs -put $avlfile cache/O_RE_ST_XDR_PS_GN_HTTP/${start_time}/ fi done #cat /opt8/gz_data_temp/http/*_*.AVL>/opt8/gz_data_temp/http/$start_time.AVL #split -b 512m /opt8/gz_data_temp/http/$start_time.AVL /opt8/gz_data_temp/http/$start_time.AVL. #echo `ls /opt8/gz_data_temp/http/$start_time.AVL` #rm -f /opt8/gz_data_temp/http/*.AVL `hadoop fs -put /opt8/gz_data_temp/http/*.AVL cache/O_RE_ST_XDR_PS_GN_HTTP/${start_time}/` rm -f /opt8/gz_data_temp/http/*.AVL echo "oozie--job-http-start!" cd /home/boco/program echo `java -cp .:../oozie/libserver/* com.boco.BSSystem.schedule.OozieRunner job_ods_a_xdr_ps_gn_http ${start_time}`