# _*_ coding=utf-8 _*_
import sys
import os
hosts = {}
cmd1 = '''
curl -i -X PUT "http://%s:50070/webhdfs/v1%s?op=CREATE&user.name=op&overwrite=true&replication=3" | grep Location | awk -F ' ' '{print $2}'
'''
cmd2 = '''
curl -i -X PUT -T %s %s
'''
def put_file_to_hdfs(hostname, local_file, hdfs_path):
try:
# 组装hdfs要写入文件路径
splits = local_file.split(r"/")
sub_local_file = splits[len(splits) - 1]
hdfs_path = hdfs_path + '/' + sub_local_file
# 组装url1获得datanode url
cmd3 = cmd1 % (hostname, hdfs_path)
process = os.popen(cmd3)
http_sub = process.read().strip()
# 组装url2准备写文件
cmd4 = cmd2 % (local_file, '"' + http_sub + '"')
status = os.system(cmd4)
if status == 0:
print 'put %s successfully.' % (local_file)
else:
print 'put %s fail.' % (local_file)
except Exception as e:
print local_file, e
finally:
if process:
process.close()
if __name__ == '__main__':
local_file = sys.argv[1]
datekey = sys.argv[2]
hdfs_path = '/user/log/' + datekey
webhdfs_host = '192.168.254.43'
put_file_to_hdfs(webhdfs_host, local_file, hdfs_path)