批量生成
#!/usr/bin/env python
import MySQLdb
import datetime
import time
import os
#source database
HOST='192.168.2.229'
PORT=3306
USER='root'
PASSWD='root'
DB='db_1'
#sqoop
FIELDS_TERMINATED_BY='\\t'
CHECK_COLUMN='__#alibaba_rds_row_id#__'
#阿里隐藏主键,自行替换
conn=MySQLdb.connect(host=HOST,port=PORT,user=USER,passwd=PASSWD,db =DB,)
cur=conn.cursor()
ret=cur.execute("select table_name from information_schema.tables where TABLE_SCHEMA='%s'"%(DB))
ret=cur.fetchall()
for line in ret:
tableName=line[0]
print "sudo -u hdfs sqoop job --create %s_%s -- import --connect jdbc:mysql://%s:%s/%s --table %s \
--username %s --password %s -m 1 --hive-import --fields-terminated-by '%s' --hive-database %s \
--incremental append --check-column %s --last-value 0;"%(DB,tableName,HOST,PORT,DB,tableName,USER,PASSWD,FIELDS_TERMINATED_BY,DB,CHECK_COLUMN)
cur.close()
conn.close()
生成一个表的job:
#!/bin/bash
set -e
#set -x
#file path
EXECFILE_PATH='/data/sqoop/sjob_exec.sh'
#source database
HOST='192.168.2.229'
PORT=3306
USER='root'
PASSWD='root'
DB='db_1'op
FIELDS_TERMINATED_BY='\t'
CHECK_COLUMN='__#alibaba_rds_row_id#__'
WORKER_NUM=1
tableName=$1
sjob="sudo -u hdfs sqoop job --create ${DB}_${tableName} -- import --connect jdbc:mysql://${HOST}:${PORT}/${DB} --table ${tableName} \
--username ${USER} --password ${PASSWD} -m ${WORKER_NUM} --hive-import --fields-terminated-by '${FIELDS_TERMINATED_BY}' --hive-database ${DB} \
--incremental append --check-column ${CHECK_COLUMN} --last-value 0;"
echo $sjob
echo "Add to sqoop?(y or n):"
read ret
if [ $ret == 'y' ];
then
$($sjob)
sjob="sqoop job --exec ${DB}_${tableName}"
sed -i '/impala/i\'"$sjob"'' $EXECFILE_PATH
echo "Add Completed!"
fi