python处理数据的导出到Excel

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import datetime
import json
import os
import pandas as pd
from sqlalchemy import create_engine
from sqlalchemy.sql import text
 
# 数据库连接配置,请根据你的实际情况修改
db_config = {
    'host': 'your_database_host',
    'user': 'your_database_user',
    'password': 'your_database_password',
    'database': 'your_database_name'
}
 
# 使用 SQLAlchemy 创建数据库连接,并获取 Connection 对象
engine = create_engine (
    f"mysql+mysqlconnector://{db_config['user']}:{db_config['password']}@{db_config['host']}/{db_config['database']}")
connection = engine.connect ()
 
# 查询onboard_companies表中status等于1的数据
companies_query = text ("SELECT id,ap_cid,company_name FROM onboard_companies WHERE status = 1")
# 尝试读取数据到DataFrame
companies_df = pd.read_sql (companies_query, connection)
# print(companies_df)
 
# 查询labels表中status等于2的数据
labels_query = text ("SELECT id,name FROM labels WHERE status = 2 AND id IN (4,9)")
# 尝试读取数据到DataFrame
labels_df = pd.read_sql (labels_query, connection)
 
# 查询label_fields表中与labels表关联的数据
label_fields_query = text ("""
    SELECT lf.id, lf.label_id, lf.name, lf.type, lf.field_type,lf.is_required, lf.options
    FROM label_fields lf
    INNER JOIN labels l ON lf.label_id = l.id
    WHERE l.status = 2 AND l.id IN (4,9) AND lf.status = 1 AND lf.type != 7
""")
# 尝试读取数据到DataFrame
label_fields_df = pd.read_sql (label_fields_query, connection)
 
# 关闭数据库连接
connection.close ()
 
# 转换type和is_required的值
type_mapping = {
    1: "Single-select dropdown list (Self-defined)",
    2: "Single-select dropdown list (Company ID)",
    3: "Multi-select dropdown list (Self-defined)",
    4: "Multi-select dropdown list (Company ID)",
    5: "Multi-select dropdown list (Trade Code)",
    6: "Calendar",
    8: "Free text",
}
 
field_type_mapping = {
    1: "dropdown",
    2: "checkbox",
    3: "textarea",
    4: "date",
    5: "file",
    6: "multidropdow",
}
 
is_required_mapping = {
    0: "可选",
    1: "必填",
}
 
 
# 处理options列
def process_options(options):
    if pd.notna (options):
        options_list = json.loads (options)
        options_str = ",".join (options_list)
        return options_str
    return None
 
 
label_fields_df['type'] = label_fields_df['type'].map (type_mapping)
label_fields_df['field_type'] = label_fields_df['field_type'].map (field_type_mapping)
label_fields_df['is_required'] = label_fields_df['is_required'].map (is_required_mapping)
label_fields_df['options'] = label_fields_df['options'].apply (process_options)
 
# 合并数据
merged_df = pd.merge (labels_df, label_fields_df, left_on='id', right_on='label_id')
 
# 选择需要的列
result_df = merged_df[['id_x', 'name_x', 'id_y', 'name_y', 'type', 'field_type', 'is_required', 'options']]
# print(result_df)
 
# 创建一个新的DataFrame来存储组合后的结果
combined_data = pd.DataFrame ()
# 使用嵌套的for循环组合数据
for _, company_row in companies_df.iterrows ():
    for _, result_row in result_df.iterrows ():
        # 创建一个新的行,包含公司数据和结果数据
        new_row = {
            'company_id': company_row['id'],
            'company_ap_cid': company_row['ap_cid'],
            'company_name': company_row['company_name'],
            'result_id_x': result_row['id_x'],
            'result_name_x': result_row['name_x'],
            'result_id_y': result_row['id_y'],
            'result_name_y': result_row['name_y'],
            'result_type': result_row['type'],
            'result_field_type': result_row['field_type'],
            'result_is_required': result_row['is_required'],
            'result_options': result_row['options'],
            'input_options': ''
        }
        # 将新行添加到组合后的数据框中
        combined_data = pd.concat ([combined_data, pd.DataFrame ([new_row])], ignore_index=True)
 
# 打印结果
# print(combined_data)
 
combined_data.columns = ['Company Id', 'Company AP ID', 'Company Name', 'Label Id', 'Label Name', 'Label Field Id',
                         'Label Field Name', 'Label Field Type', 'Label Field Val Type', 'is Required', 'Optional options', 'Input options']
# print(combined_data)
 
# 获取当前日期和时间
current_datetime = datetime.datetime.now ()
 
# 把日期和事件格式化为字符串
formatted_datetime = current_datetime.strftime ("%Y%m%d_%H%M%S")
 
# 在当前文件夹下的 "outFiles" 文件夹中保存 Excel 文件
output_folder = "outFiles"
 
# 将格式化的日期时间与所需的文件名连接起来
output_file_name = f"outPutLabel_{formatted_datetime}.xlsx"
 
# 如果 "outFiles" 文件夹不存在,则创建它
if not os.path.exists(output_folder):
    os.makedirs(output_folder)
 
# 拼接输出文件的完整路径
output_file_path = os.path.join(output_folder, output_file_name)
 
# 将数据框架写入 Excel 文件
combined_data.to_excel (output_file_path, index=False)
 
print(f"Excel 文件已保存到 {output_file_path}")

  

posted @   爱家家的卡卡  阅读(99)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· winform 绘制太阳,地球,月球 运作规律
· AI与.NET技术实操系列(五):向量存储与相似性搜索在 .NET 中的实现
· 超详细:普通电脑也行Windows部署deepseek R1训练数据并当服务器共享给他人
· 上周热点回顾(3.3-3.9)
· AI 智能体引爆开源社区「GitHub 热点速览」
点击右上角即可分享
微信分享提示