根据后缀名把Excel文件转换成可以插入MongoDB数据库的数据

import pandas as pd
import os

def convert_file_to_json(file_path):
    # 检查文件扩展名并读取文件
    _, file_extension = os.path.splitext(file_path)
    if file_extension.lower() == '.csv':
        df = pd.read_csv(file_path)
    elif file_extension.lower() in ['.xlsx', '.xls']:
        df = pd.read_excel(file_path)
    else:
        raise ValueError("Unsupported file format. Only CSV and XLSX files are supported.")

    # 将\N、NaN值、空字符串、换行符等转换为空字符串
    df = df.replace(r'\N', '', regex=False)  # 替换\N为空字符串
    df = df.fillna('')  # 替换NaN值为空字符串
    df = df.replace('\n', '', regex=True)  # 替换换行符为空字符串
    df = df.replace('undefined', '', regex=True)  # 替换换行符为空字符串

    # 将DataFrame转换为JSON格式的字符串
    json_str = df.to_json(orient='records', lines=False, force_ascii=False)

    # 保存为JSON文件,文件名与原文件相同,但扩展名为.json
    json_file_path = os.path.splitext(file_path)[0] + '.json'
    with open(json_file_path, 'w', encoding='utf-8') as json_file:
        json_file.write(json_str)

    print(f"File {file_path} has been successfully converted to JSON and saved as {json_file_path}")
# 使用示例
file_path = '装修.xlsx'
convert_file_to_json(file_path)

 

posted @ 2024-11-13 15:45  布都御魂  阅读(1)  评论(0编辑  收藏  举报