python处理数据的导出到Excel
import datetime import json import os import pandas as pd from sqlalchemy import create_engine from sqlalchemy.sql import text # 数据库连接配置,请根据你的实际情况修改 db_config = { 'host': 'your_database_host', 'user': 'your_database_user', 'password': 'your_database_password', 'database': 'your_database_name' } # 使用 SQLAlchemy 创建数据库连接,并获取 Connection 对象 engine = create_engine ( f"mysql+mysqlconnector://{db_config['user']}:{db_config['password']}@{db_config['host']}/{db_config['database']}") connection = engine.connect () # 查询onboard_companies表中status等于1的数据 companies_query = text ("SELECT id,ap_cid,company_name FROM onboard_companies WHERE status = 1") # 尝试读取数据到DataFrame companies_df = pd.read_sql (companies_query, connection) # print(companies_df) # 查询labels表中status等于2的数据 labels_query = text ("SELECT id,name FROM labels WHERE status = 2 AND id IN (4,9)") # 尝试读取数据到DataFrame labels_df = pd.read_sql (labels_query, connection) # 查询label_fields表中与labels表关联的数据 label_fields_query = text (""" SELECT lf.id, lf.label_id, lf.name, lf.type, lf.field_type,lf.is_required, lf.options FROM label_fields lf INNER JOIN labels l ON lf.label_id = l.id WHERE l.status = 2 AND l.id IN (4,9) AND lf.status = 1 AND lf.type != 7 """) # 尝试读取数据到DataFrame label_fields_df = pd.read_sql (label_fields_query, connection) # 关闭数据库连接 connection.close () # 转换type和is_required的值 type_mapping = { 1: "Single-select dropdown list (Self-defined)", 2: "Single-select dropdown list (Company ID)", 3: "Multi-select dropdown list (Self-defined)", 4: "Multi-select dropdown list (Company ID)", 5: "Multi-select dropdown list (Trade Code)", 6: "Calendar", 8: "Free text", } field_type_mapping = { 1: "dropdown", 2: "checkbox", 3: "textarea", 4: "date", 5: "file", 6: "multidropdow", } is_required_mapping = { 0: "可选", 1: "必填", } # 处理options列 def process_options(options): if pd.notna (options): options_list = json.loads (options) options_str = ",".join (options_list) return options_str return None label_fields_df['type'] = label_fields_df['type'].map (type_mapping) label_fields_df['field_type'] = label_fields_df['field_type'].map (field_type_mapping) label_fields_df['is_required'] = label_fields_df['is_required'].map (is_required_mapping) label_fields_df['options'] = label_fields_df['options'].apply (process_options) # 合并数据 merged_df = pd.merge (labels_df, label_fields_df, left_on='id', right_on='label_id') # 选择需要的列 result_df = merged_df[['id_x', 'name_x', 'id_y', 'name_y', 'type', 'field_type', 'is_required', 'options']] # print(result_df) # 创建一个新的DataFrame来存储组合后的结果 combined_data = pd.DataFrame () # 使用嵌套的for循环组合数据 for _, company_row in companies_df.iterrows (): for _, result_row in result_df.iterrows (): # 创建一个新的行,包含公司数据和结果数据 new_row = { 'company_id': company_row['id'], 'company_ap_cid': company_row['ap_cid'], 'company_name': company_row['company_name'], 'result_id_x': result_row['id_x'], 'result_name_x': result_row['name_x'], 'result_id_y': result_row['id_y'], 'result_name_y': result_row['name_y'], 'result_type': result_row['type'], 'result_field_type': result_row['field_type'], 'result_is_required': result_row['is_required'], 'result_options': result_row['options'], 'input_options': '' } # 将新行添加到组合后的数据框中 combined_data = pd.concat ([combined_data, pd.DataFrame ([new_row])], ignore_index=True) # 打印结果 # print(combined_data) combined_data.columns = ['Company Id', 'Company AP ID', 'Company Name', 'Label Id', 'Label Name', 'Label Field Id', 'Label Field Name', 'Label Field Type', 'Label Field Val Type', 'is Required', 'Optional options', 'Input options'] # print(combined_data) # 获取当前日期和时间 current_datetime = datetime.datetime.now () # 把日期和事件格式化为字符串 formatted_datetime = current_datetime.strftime ("%Y%m%d_%H%M%S") # 在当前文件夹下的 "outFiles" 文件夹中保存 Excel 文件 output_folder = "outFiles" # 将格式化的日期时间与所需的文件名连接起来 output_file_name = f"outPutLabel_{formatted_datetime}.xlsx" # 如果 "outFiles" 文件夹不存在,则创建它 if not os.path.exists(output_folder): os.makedirs(output_folder) # 拼接输出文件的完整路径 output_file_path = os.path.join(output_folder, output_file_name) # 将数据框架写入 Excel 文件 combined_data.to_excel (output_file_path, index=False) print(f"Excel 文件已保存到 {output_file_path}")