导航

sqoop job命令自动生成

Posted on 2016-02-27 10:13  ggzone  阅读(910)  评论(0编辑  收藏  举报

批量生成

#!/usr/bin/env python
import MySQLdb
import datetime
import time
import os


#source database
HOST='192.168.2.229'
PORT=3306
USER='root'
PASSWD='root'
DB='db_1'

#sqoop
FIELDS_TERMINATED_BY='\\t'
CHECK_COLUMN='__#alibaba_rds_row_id#__'  
#阿里隐藏主键,自行替换

conn=MySQLdb.connect(host=HOST,port=PORT,user=USER,passwd=PASSWD,db =DB,)
cur=conn.cursor()
ret=cur.execute("select table_name from information_schema.tables where TABLE_SCHEMA='%s'"%(DB))
ret=cur.fetchall()


for line in ret:
    tableName=line[0]

    print "sudo -u hdfs sqoop job --create %s_%s -- import --connect  jdbc:mysql://%s:%s/%s  --table %s \
         --username %s --password %s -m 1  --hive-import  --fields-terminated-by '%s' --hive-database %s \
         --incremental append --check-column %s --last-value 0;"%(DB,tableName,HOST,PORT,DB,tableName,USER,PASSWD,FIELDS_TERMINATED_BY,DB,CHECK_COLUMN)

cur.close()
conn.close()

生成一个表的job:


#!/bin/bash
set -e
#set -x

#file path
EXECFILE_PATH='/data/sqoop/sjob_exec.sh'

#source database
HOST='192.168.2.229'
PORT=3306
USER='root'
PASSWD='root'
DB='db_1'op

FIELDS_TERMINATED_BY='\t'
CHECK_COLUMN='__#alibaba_rds_row_id#__'
WORKER_NUM=1
tableName=$1

sjob="sudo -u hdfs sqoop job --create ${DB}_${tableName} -- import --connect  jdbc:mysql://${HOST}:${PORT}/${DB}  --table ${tableName} \
    --username ${USER} --password ${PASSWD} -m ${WORKER_NUM}  --hive-import  --fields-terminated-by '${FIELDS_TERMINATED_BY}' --hive-database ${DB} \
    --incremental append --check-column ${CHECK_COLUMN} --last-value 0;"

echo $sjob

echo "Add to sqoop?(y or n):"
read ret

if [ $ret == 'y' ];
then
        $($sjob)
        sjob="sqoop job --exec ${DB}_${tableName}"
        sed -i '/impala/i\'"$sjob"'' $EXECFILE_PATH
        echo "Add Completed!"
fi