python处理数据的导出到Excel
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 | import datetime import json import os import pandas as pd from sqlalchemy import create_engine from sqlalchemy.sql import text # 数据库连接配置,请根据你的实际情况修改 db_config = { 'host' : 'your_database_host' , 'user' : 'your_database_user' , 'password' : 'your_database_password' , 'database' : 'your_database_name' } # 使用 SQLAlchemy 创建数据库连接,并获取 Connection 对象 engine = create_engine ( f "mysql+mysqlconnector://{db_config['user']}:{db_config['password']}@{db_config['host']}/{db_config['database']}" ) connection = engine.connect () # 查询onboard_companies表中status等于1的数据 companies_query = text ( "SELECT id,ap_cid,company_name FROM onboard_companies WHERE status = 1" ) # 尝试读取数据到DataFrame companies_df = pd.read_sql (companies_query, connection) # print(companies_df) # 查询labels表中status等于2的数据 labels_query = text ( "SELECT id,name FROM labels WHERE status = 2 AND id IN (4,9)" ) # 尝试读取数据到DataFrame labels_df = pd.read_sql (labels_query, connection) # 查询label_fields表中与labels表关联的数据 label_fields_query = text ( """ SELECT lf.id, lf.label_id, lf.name, lf.type, lf.field_type,lf.is_required, lf.options FROM label_fields lf INNER JOIN labels l ON lf.label_id = l.id WHERE l.status = 2 AND l.id IN (4,9) AND lf.status = 1 AND lf.type != 7 """ ) # 尝试读取数据到DataFrame label_fields_df = pd.read_sql (label_fields_query, connection) # 关闭数据库连接 connection.close () # 转换type和is_required的值 type_mapping = { 1 : "Single-select dropdown list (Self-defined)" , 2 : "Single-select dropdown list (Company ID)" , 3 : "Multi-select dropdown list (Self-defined)" , 4 : "Multi-select dropdown list (Company ID)" , 5 : "Multi-select dropdown list (Trade Code)" , 6 : "Calendar" , 8 : "Free text" , } field_type_mapping = { 1 : "dropdown" , 2 : "checkbox" , 3 : "textarea" , 4 : "date" , 5 : "file" , 6 : "multidropdow" , } is_required_mapping = { 0 : "可选" , 1 : "必填" , } # 处理options列 def process_options(options): if pd.notna (options): options_list = json.loads (options) options_str = "," .join (options_list) return options_str return None label_fields_df[ 'type' ] = label_fields_df[ 'type' ]. map (type_mapping) label_fields_df[ 'field_type' ] = label_fields_df[ 'field_type' ]. map (field_type_mapping) label_fields_df[ 'is_required' ] = label_fields_df[ 'is_required' ]. map (is_required_mapping) label_fields_df[ 'options' ] = label_fields_df[ 'options' ]. apply (process_options) # 合并数据 merged_df = pd.merge (labels_df, label_fields_df, left_on = 'id' , right_on = 'label_id' ) # 选择需要的列 result_df = merged_df[[ 'id_x' , 'name_x' , 'id_y' , 'name_y' , 'type' , 'field_type' , 'is_required' , 'options' ]] # print(result_df) # 创建一个新的DataFrame来存储组合后的结果 combined_data = pd.DataFrame () # 使用嵌套的for循环组合数据 for _, company_row in companies_df.iterrows (): for _, result_row in result_df.iterrows (): # 创建一个新的行,包含公司数据和结果数据 new_row = { 'company_id' : company_row[ 'id' ], 'company_ap_cid' : company_row[ 'ap_cid' ], 'company_name' : company_row[ 'company_name' ], 'result_id_x' : result_row[ 'id_x' ], 'result_name_x' : result_row[ 'name_x' ], 'result_id_y' : result_row[ 'id_y' ], 'result_name_y' : result_row[ 'name_y' ], 'result_type' : result_row[ 'type' ], 'result_field_type' : result_row[ 'field_type' ], 'result_is_required' : result_row[ 'is_required' ], 'result_options' : result_row[ 'options' ], 'input_options' : '' } # 将新行添加到组合后的数据框中 combined_data = pd.concat ([combined_data, pd.DataFrame ([new_row])], ignore_index = True ) # 打印结果 # print(combined_data) combined_data.columns = [ 'Company Id' , 'Company AP ID' , 'Company Name' , 'Label Id' , 'Label Name' , 'Label Field Id' , 'Label Field Name' , 'Label Field Type' , 'Label Field Val Type' , 'is Required' , 'Optional options' , 'Input options' ] # print(combined_data) # 获取当前日期和时间 current_datetime = datetime.datetime.now () # 把日期和事件格式化为字符串 formatted_datetime = current_datetime.strftime ( "%Y%m%d_%H%M%S" ) # 在当前文件夹下的 "outFiles" 文件夹中保存 Excel 文件 output_folder = "outFiles" # 将格式化的日期时间与所需的文件名连接起来 output_file_name = f "outPutLabel_{formatted_datetime}.xlsx" # 如果 "outFiles" 文件夹不存在,则创建它 if not os.path.exists(output_folder): os.makedirs(output_folder) # 拼接输出文件的完整路径 output_file_path = os.path.join(output_folder, output_file_name) # 将数据框架写入 Excel 文件 combined_data.to_excel (output_file_path, index = False ) print (f "Excel 文件已保存到 {output_file_path}" ) |
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· winform 绘制太阳,地球,月球 运作规律
· AI与.NET技术实操系列(五):向量存储与相似性搜索在 .NET 中的实现
· 超详细:普通电脑也行Windows部署deepseek R1训练数据并当服务器共享给他人
· 上周热点回顾(3.3-3.9)
· AI 智能体引爆开源社区「GitHub 热点速览」