根据后缀名把Excel文件转换成可以插入MongoDB数据库的数据
import pandas as pd
import os
def convert_file_to_json(file_path):
# 检查文件扩展名并读取文件
_, file_extension = os.path.splitext(file_path)
if file_extension.lower() == '.csv':
df = pd.read_csv(file_path)
elif file_extension.lower() in ['.xlsx', '.xls']:
df = pd.read_excel(file_path)
else:
raise ValueError("Unsupported file format. Only CSV and XLSX files are supported.")
# 将\N、NaN值、空字符串、换行符等转换为空字符串
df = df.replace(r'\N', '', regex=False) # 替换\N为空字符串
df = df.fillna('') # 替换NaN值为空字符串
df = df.replace('\n', '', regex=True) # 替换换行符为空字符串
df = df.replace('undefined', '', regex=True) # 替换换行符为空字符串
# 将DataFrame转换为JSON格式的字符串
json_str = df.to_json(orient='records', lines=False, force_ascii=False)
# 保存为JSON文件,文件名与原文件相同,但扩展名为.json
json_file_path = os.path.splitext(file_path)[0] + '.json'
with open(json_file_path, 'w', encoding='utf-8') as json_file:
json_file.write(json_str)
print(f"File {file_path} has been successfully converted to JSON and saved as {json_file_path}")
# 使用示例
file_path = '装修.xlsx'
convert_file_to_json(file_path)