excelLoader.py

from DataCleaning.library.functions.floatFormatter import *
from DataCleaning.library.functions.list2SQL import *
from DataCleaning.library.functions.excelDateFormatter import *
from DataCleaning.library.functions.getStartAndEndLine import *
from DataCleaning.library.functions.processBar import *


def excelLoader(sh, cursor, titleList, typeList, tempTableName ="tempTableName", colsKeep = [], firstCell = "", appendOnly = False, firstCol = 0):
print("generating table " + tempTableName)
logging.info("generating table " + tempTableName)
start = datetime.datetime.now()
if firstCell == "":
firstCell = str(sh.cell(0, firstCol).value)
firstLine, lastLine = getStartAndEndLine(sh, firstCell, firstCol)
if len(colsKeep) == 0:
rounds = list(range(firstCol, sh.ncols))
else:
rounds = colsKeep
SQLtempList = []
processBar(context='preparing data...')
for i in range(firstLine, lastLine):
row = [""] * len(rounds)
for j in range(len(rounds)):
k = rounds[j]
if "DATETIME" in typeList[j]:
if str(sh.cell(i, k).value) != "":
row[j] = str(sh.cell(i, k).value)
else:
row[j] = "NULL"
elif "DATE" in typeList[j]:
row[j] = excelDateFormatter(sh, i, k, sep="-")
elif "TIME" in typeList[j]:
row[j] = excelTimeFormatter(sh, i, k, sep=":")
elif "FLOAT" in typeList[j]:
if str(sh.cell(i, k).value) != "":
row[j] = str(float(sh.cell(i, k).value))
elif "DECIMAL" in typeList[j]:
digits = int(typeList[j].split(",")[1][0])
try:
row[j] = str(round(float(sh.cell(i, k).value), digits))
except:
row[j] = "NULL"
elif "INT" in typeList[j]:
row[j] = floatFormatter(sh.cell(i, k).value)
else:
row[j] = floatFormatter(sh.cell(i, k).value).replace(".", " ").replace("'", "''").replace(",", " ").strip()
if row[j] == "" or sh.cell(i, k).ctype == 5:
row[j] = "NULL"
if not emptyLine(row):
SQLtempList.append(row)
processBar(i, lastLine, firstLine)
# write 2 sqlserver
list2SQL(cursor, SQLtempList, tempTableName, titleList, typeList, appendOnly)
print("successfully generated table " + tempTableName)
logging.info("successfully generated table " + tempTableName)
end = datetime.datetime.now()
logging.info("time spent on loading and inserting " + tempTableName + ": " + str(end - start))



def emptyLine(row):
sum = 0
for ele in row:
if ele != "NULL":
sum += len(ele)
if sum == 0:
return True
else:
return False



from DataCleaning.library.functions.floatFormatter import *
from DataCleaning.library.functions.list2SQL import *
from DataCleaning.library.functions.excelDateFormatter import *
from DataCleaning.library.functions.getStartAndEndLine import *


def excelLoaderExchangeRate(sh, cursor, titleList, typeList, tempTableName ="tempTableName", colsKeep = [], firstCell = "", appendOnly = False):
print("generating table " + tempTableName)
logging.info("generating table " + tempTableName)
start = datetime.datetime.now()
if firstCell == "":
firstCell = str(sh.cell(0, 0).value)
firstLine, lastLine = getStartAndEndLine(sh, firstCell)
if len(colsKeep) == 0:
rounds = list(range(sh.ncols))
else:
rounds = colsKeep
SQLtempList = []
processBar(context='preparing data...')
for i in range(firstLine, lastLine):
row = [""] * len(rounds)
for j in range(len(rounds)):
k = rounds[j]
if "DATE" in typeList[j]:
if sh.cell(i, j).value != "":
tempDate = str(sh.cell(i, j).value).replace(".", "")
row[j] = "-".join([tempDate[-4:], tempDate[2:4], tempDate[:2]])
elif "TIME" in typeList[j]:
row[j] = excelTimeFormatter(sh, i, k, sep=":")
elif "DECIMAL" in typeList[j]:
digits = int(typeList[j].split(",")[1][0])
try:
row[j] = str(round(float(sh.cell(i, k).value.replace("/", "").replace(".", "").replace(",", ".")), digits))
except:
row[j] = "NULL"
elif "INT" in typeList[j]:
row[j] = floatFormatter(sh.cell(i, k).value)
else:
row[j] = floatFormatter(sh.cell(i, k).value).replace(".", ":").replace("'", "''").strip()
if row[j] == "" or sh.cell(i, k).ctype == 5:
row[j] = "NULL"
if not emptyLine(row):
SQLtempList.append(row)
processBar(i, lastLine, firstLine)
# write 2 sqlserver
list2SQL(cursor, SQLtempList, tempTableName, titleList, typeList)
print("successfully generated table " + tempTableName)
logging.info("successfully generated table " + tempTableName)
end = datetime.datetime.now()
logging.info("time spent on loading and inserting " + tempTableName + ": " + str(end - start))


def excelLoaderPN_TOPMOST_MAPPING(sh, cursor, titleList, typeList, tempTableName ="tempTableName", colsKeep = [], firstCell = "", appendOnly = False):
print("generating table " + tempTableName)
logging.info("generating table " + tempTableName)
start = datetime.datetime.now()
if firstCell == "":
firstCell = str(sh.cell(0, 0).value)
firstLine, lastLine = getStartAndEndLine(sh, firstCell)
if len(colsKeep) == 0:
rounds = list(range(sh.ncols))
else:
rounds = colsKeep
PN2TOPMOST_Dic = {}
SQLtempList = []
processBar(context='preparing data...')
for i in range(firstLine, lastLine):
row = [""] * len(rounds)
for j in range(len(rounds)):
k = rounds[j]
if "DATETIME" in typeList[j]:
if str(sh.cell(i, k).value) != "":
row[j] = str(sh.cell(i, k).value)
else:
row[j] = "NULL"
elif "DATE" in typeList[j]:
row[j] = excelDateFormatter(sh, i, k, sep="-")
elif "TIME" in typeList[j]:
row[j] = excelTimeFormatter(sh, i, k, sep=":")
elif "FLOAT" in typeList[j]:
if str(sh.cell(i, k).value) != "":
row[j] = str(float(sh.cell(i, k).value))
elif "DECIMAL" in typeList[j]:
digits = int(typeList[j].split(",")[1][0])
try:
row[j] = str(round(float(sh.cell(i, k).value), digits))
except:
row[j] = "NULL"
elif "INT" in typeList[j]:
row[j] = floatFormatter(sh.cell(i, k).value)
else:
row[j] = floatFormatter(sh.cell(i, k).value).replace(".", " ").replace("'", "''").replace(",", " ").strip()
if row[j] == "" or sh.cell(i, k).ctype == 5:
row[j] = "NULL"
if row[1] in PN2TOPMOST_Dic.keys():
PN2TOPMOST_Dic[row[1]].append(row[0])
else:
PN2TOPMOST_Dic[row[1]] = [row[0]]

if not emptyLine(row):
SQLtempList.append(row)
processBar(i, lastLine, firstLine)
for i in range(len(SQLtempList)):
SQLtempList[i].append(";".join(PN2TOPMOST_Dic[SQLtempList[i][1]]))
# write 2 sqlserver
list2SQL(cursor, SQLtempList, tempTableName, titleList, typeList, appendOnly)
print("successfully generated table " + tempTableName)
logging.info("successfully generated table " + tempTableName)
end = datetime.datetime.now()
logging.info("time spent on loading and inserting " + tempTableName + ": " + str(end - start))  
posted @ 2021-10-05 20:25  石棠  阅读(58)  评论(0编辑  收藏  举报