姜小嫌

  博客园  :: 首页  :: 新随笔  :: 联系 :: 订阅 订阅  :: 管理
# _*_ coding=utf-8 _*_

import sys
import os

hosts = {}
cmd1 = '''
curl -i -X PUT "http://%s:50070/webhdfs/v1%s?op=CREATE&user.name=op&overwrite=true&replication=3" | grep Location | awk -F ' ' '{print $2}'
'''
cmd2 = '''
curl -i -X PUT -T %s %s
'''


def put_file_to_hdfs(hostname, local_file, hdfs_path):
    try:
        # 组装hdfs要写入文件路径
        splits = local_file.split(r"/")
        sub_local_file = splits[len(splits) - 1]
        hdfs_path = hdfs_path + '/' + sub_local_file
        # 组装url1获得datanode url
        cmd3 = cmd1 % (hostname, hdfs_path)
        process = os.popen(cmd3)
        http_sub = process.read().strip()
        # 组装url2准备写文件
        cmd4 = cmd2 % (local_file, '"' + http_sub + '"')
        status = os.system(cmd4)
        if status == 0:
            print 'put %s successfully.' % (local_file)
        else:
            print 'put %s fail.' % (local_file)
    except Exception as e:
        print local_file, e
    finally:
        if process:
            process.close()


if __name__ == '__main__':
    local_file = sys.argv[1]
    datekey = sys.argv[2]
    hdfs_path = '/user/log/' + datekey
    webhdfs_host = '192.168.254.43'
    put_file_to_hdfs(webhdfs_host, local_file, hdfs_path)

posted on 2018-11-20 10:17  姜小嫌  阅读(471)  评论(0编辑  收藏  举报