| |
| |
| |
| |
| |
| |
| |
| user_names = ['xiao_shuai', 'xiao_wang', 'xiaoming', 'xiao_lei', 'xiao_bo', 'xiao_hong'] |
| |
| |
| split_size = total_row_count // len(user_names) |
| if total_row_count % len(user_names) != 0: |
| split_size += 1 |
| split_size |
| |
| |
| df_subs = [] |
| for idx, user_name in enumerate(user_names): |
| begin = idx * split_size |
| end = begin + split_size |
| df_sub = df_source.iloc[begin:end] |
| df_subs.append((idx, user_name, df_sub)) |
| |
| for idx, user_name, df_sub in df_subs: |
| file_name = f'{splits_dir}/split_{idx}_{user_name}.xlsx' |
| df_sub.to_excel(file_name, index=False) |
| |
| |
| |
| excel_names = [] |
| for excel_name in os.listdir(splits_dir): |
| excel_names.append(excel_name) |
| |
| |
| df_list = [] |
| for excel_name in excel_names: |
| excel_path = f'{splits_dir}/{excel_name}' |
| df_split = pd.read_excel(excel_path) |
| user_name = excel_name.replace('split_', '').replace('.xlsx', '')[2:] |
| print(excel_name, user_name) |
| df_split['username'] = user_name |
| df_list.append(df_split) |
| |
| |
| df_merged = pd.concat(df_list) |
| df_merged['username'].value_counts() |
| |
| |
| df_merged.to_excel(f'{work_dir}/merged_data.xlsx', index=False) |
| |
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 全程不用写代码,我用AI程序员写了一个飞机大战
· DeepSeek 开源周回顾「GitHub 热点速览」
· 记一次.NET内存居高不下排查解决与启示
· 物流快递公司核心技术能力-地址解析分单基础技术分享
· .NET 10首个预览版发布:重大改进与新特性概览!