hdfs文件传输到ods层的脚本

 

#!/usr/bin/python3
# coding=utf-8
import sys

from base import get_yesterday, APP
import subprocess

date = get_yesterday()

tables = ['ods_log_inc', 'ods_activity_info_full', 'ods_activity_rule_full', 'ods_base_category1_full',
          'ods_base_category2_full', 'ods_base_category3_full', 'ods_base_dic_full', 'ods_base_province_full',
          'ods_base_region_full', 'ods_base_trademark_full', 'ods_cart_info_full', 'ods_cart_info_inc',
          'ods_comment_info_inc', 'ods_coupon_info_full', 'ods_coupon_use_inc', 'ods_favor_info_inc',
          'ods_order_detail_activity_inc', 'ods_order_detail_coupon_inc', 'ods_order_detail_inc', 'ods_order_info_inc',
          'ods_order_refund_info_inc', 'ods_order_status_log_inc', 'ods_payment_info_inc', 'ods_refund_payment_inc',
          'ods_sku_attr_value_full', 'ods_sku_info_full', 'ods_sku_sale_attr_value_full', 'ods_spu_info_full',
          'ods_user_info_inc']


def get_sql(table):
    if table != 'ods_log_inc':
        sql = f"load data inpath '/origin_data/gmall/db/{table[4:]}/{date}' into table {APP}.{table} partition " \
              f"(dt='{date}'); "
    else:
        sql = f"load data inpath '/origin_data/gmall/log/topic_log/{date}' into table {APP}.ods_log_inc partition (" \
              f"dt='{date}'); "
    return sql


def import_data(table):
    sql = get_sql(table)
    print("正在传输 {:<15s}表的数据".format(table))
    subprocess.Popen(rf'hive -e "{sql}"', shell=True).communicate()
    print("传输完成 {:<15s}表的数据".format(table))


if __name__ == '__main__':
    if len(sys.argv) < 2:
        print("参数过少,请重新调用")
        exit(0)
    if len(sys.argv) == 3:
        date = sys.argv[2]
    argc_table = sys.argv[1]
    for table in tables:
        if argc_table == table or argc_table == 'all':
            import_data(table)

调用形式
hdfs_to_ods.py all 2020-06-15

posted @ 2024-02-28 22:07  啦啦啦one  阅读(19)  评论(0编辑  收藏  举报