Python读取Excel批量自动创建Hive数据表SQL - 灰色...天空

需要使用请自行下载使用说明和Excel
百度网盘链接地址：
链接：https://pan.baidu.com/s/1lCWMK81jQHDOCOJqSy9T2w?pwd=tyoo
提取码：tyoo
import xlrd


# 打开文件
data = xlrd.open_workbook("Create.xlsx")
# 获取第一个sheet内容
table = data.sheet_by_index(0)

tabNameList = []  # 表名称
tabColList = []  # 表列
tabTypeList = []  # 列类型
tabColCommList = []  # 列描述
tabCommList = []  # 表描述
tabParColList = []  # 分区字段
tabParTypeList = []  # 分区字段属性
tabParCommList = []  # 分区字段描述
index = [1]#下标值
result=[]#最终结果

def getIndex():
    number = 0
    name = table.col_values(0)[1:]  # 表名称
    for item in range(0, len(name)):
        if item + 1 == len(name):
            number = item + 2
            index.append(number)
        else:
            if name[item] != name[item + 1]:
                number = item + 2
                index.append(number)


def Create_table():
        creat = ("\nCREATE TABLE IF NOT EXISTS %s ( \n" % str(tabNameList[0]))  # 添加表名称
        col = []  # 存储列名和属性
        for it in range(0, len(tabTypeList)):
            col.append(tabColList[it] + " " + tabTypeList[it] + " COMMENT \'" + tabColCommList[it] + "\'")

        for item in range(0, len(col)):
            creat += col[item]
            if item != len(tabTypeList) - 1:
                creat += ",\n"
        creat = (creat + "\n) COMMENT \'%s\'\n" % str(tabCommList[0]))
        par = []  # 存储分区列名和属性和备注
        if len(tabParColList) > 0:
            for it in range(0, len(tabParColList)):
                par.append(tabParColList[it] + " " + tabParTypeList[it] + " COMMENT \'" + tabParCommList[it] + "\'")

            creat = creat + "PARTITIONED BY (\n"
            for item in range(0, len(par)):
                creat += par[item]
                if item != len(tabParColList) - 1:
                    creat += ",\n"
            creat = creat + ");\n" #stored as parquet可以添加压缩属性
        result.append(creat)
        print(result)

def getInFo():
    getIndex()
    for item in range(0, len(index)-1):
        tabNameList.clear()
        tabColList.clear()
        tabTypeList.clear()
        tabColCommList.clear()
        tabCommList.clear()
        tabParColList.clear()
        tabParTypeList.clear()
        tabParCommList.clear()

        if len(index)==2:
            left = index[0]
            right = index[1]
        else:
            left = index[item]
            right = index[item + 1]
        name = table.col_values(0)[left:right]  # 表名称
        for item in name:
            if (item not in tabNameList):
                tabNameList.append(item)
        col = table.col_values(1)[left:right]  # 表列
        for item in col:
            tabColList.append(item)
        type = table.col_values(2)[left:right]  # 列类型
        for item in type:
            if item != "":
                tabTypeList.append(item)
            else:
                tabTypeList.append("string")
        colcomm = table.col_values(3)[left:right]  # 列描述
        for item in colcomm:
            if item != "":
                tabColCommList.append(item)
            else:
                tabColCommList.append("")
        comm = table.col_values(4)[left:right]  # 表描述
        for item in comm:
            if item != "":
                tabCommList.append(item)
                break
            else:
                tabCommList.append("")
                break
        parcol = table.col_values(5)[left:right]  # 分区字段
        for item in parcol:
            if item != "":
                tabParColList.append(item)
        partype = table.col_values(6)[left:right]  # 分区字段属性
        for item in partype:
            if item != "":
                tabParTypeList.append(item)
            else:
                tabParTypeList.append("string")
        parcomm = table.col_values(7)[left:right]  # 分区字段描述
        for item in parcomm:
            if item != "":
                tabParCommList.append(item)
            else:
                tabParCommList.append("")
        Create_table()

getInFo()



# 输出文件设置
fname = "建表.sql"


for item in result:
    print(item)
    outFile = open(fname, 'a+',encoding = 'utf-8')
    outFile.write(item)
outFile.close()
posted on 2020-10-26 18:56 灰色...天空阅读(992) 评论(2) 收藏举报
刷新页面返回顶部