hadoop 数据抽取

#!/bin/bash

if [ ! -z $2 ]; then
        start_time=$1
        end_time=$2
else
        starttime=`date +%Y%m%d%H%M -d '-15 min'`
        #开始时间
        startday=${starttime:0:10}
        startmi=`expr ${starttime:10:2} / 15 \* 15`
        if [ $startmi != 0 ];then
                start_time=$startday$startmi
        else
                start_time=$startday"00"
        fi
        #结束时间
        endtime=`date +%Y%m%d%H%M`
        endday=${endtime:0:10}
        endmi=`expr ${endtime:10:2} / 15 \* 15`
        if [ $endmi != 0 ];then
                end_time=$endday$endmi
        else
                end_time=$endday"00"
        fi

fi
echo "数据时间为: $start_time--$end_time"
#创建目录
hadoop fs -mkdir cache/O_RE_ST_XDR_PS_GN_HTTP/$start_time
#上传数据时间段数据
for file in `ls /opt7/ftp/PS_Gn_HTTP_Event/${start_time:0:4}-${start_time:4:2}-${start_time:6:2}/*${start_time:0:10}*.CHK`
do
    #351_TM07_PSGnHTTPEvent201410021235_3720202.CHK
        #351_TM07_PSGnHTTPEvent201410021235_3720202.AVL
        basefile=`basename $file`
        date=${basefile:22:12}
        if [ $date -ge $start_time ] && [ $date -lt $end_time ];then
                avlfile=${file/%CHK/AVL}
                echo  $avlfile
        cp $avlfile /opt8/gz_data_temp/http/
                #hadoop fs -put $avlfile cache/O_RE_ST_XDR_PS_GN_HTTP/${start_time}/
        fi
done
#cat /opt8/gz_data_temp/http/*_*.AVL>/opt8/gz_data_temp/http/$start_time.AVL
#split -b 512m /opt8/gz_data_temp/http/$start_time.AVL /opt8/gz_data_temp/http/$start_time.AVL.
#echo `ls /opt8/gz_data_temp/http/$start_time.AVL`
#rm -f /opt8/gz_data_temp/http/*.AVL
`hadoop fs -put /opt8/gz_data_temp/http/*.AVL cache/O_RE_ST_XDR_PS_GN_HTTP/${start_time}/`
rm -f /opt8/gz_data_temp/http/*.AVL
echo "oozie--job-http-start!"
cd /home/boco/program
echo `java -cp .:../oozie/libserver/* com.boco.BSSystem.schedule.OozieRunner job_ods_a_xdr_ps_gn_http ${start_time}`

 

posted @ 2015-01-13 15:58  史红星-shihongxing  阅读(525)  评论(0编辑  收藏  举报