python从旧库中导出csv并导入新库
在线的游戏, 迁移数据库, 数据比较大, 游戏不能停很久, 先使用sqldump导入不变的表, 再使用python导出可变的表到csv文件, 导入到新库. 找出各表中csv中最大的id, 然后停服, 然后根据各表的id, 从id位置开始再导出新增数据, 再导入到新库.
export.py
""" 导出msql表格 """ import csv import os import shutil import pymysql #删除文件夹 def deleteFolder(path): if not os.path.isdir(path): return try: shutil.rmtree(path) print(f"成功删除文件夹 {path}") except OSError as e: print(f"删除文件夹 {path} 失败: {e}") def clearDir(path): if not os.path.isdir(path): return try: files = os.listdir(path) for cur in files: fullPath = os.path.join(path, cur) if os.path.isfile(fullPath): os.remove(fullPath) elif os.path.isdir(fullPath): deleteFolder(fullPath) print(f"成功清空文件夹 {path}") except OSError as e: print(f"清空文件夹 {path} 失败: {e}") def createEmptyFolder(folderName): """ 创建空文件夹 """ if os.path.exists(folderName): clearDir(folderName) else: os.makedirs(folderName, exist_ok=True) def export_table(db_config, table_name, id_col, export_start_id, chunk_size = 100000): # 创建数据库连接 connection = pymysql.connect(**db_config) cursor = connection.cursor() print(f"Exporting {table_name}...") folder_dir = f'export/{table_name}' createEmptyFolder(folder_dir) # 导出数据 page_num = 1 while True: cursor.execute(f"SELECT * FROM {table_name} where {id_col} > {export_start_id} order by {id_col} asc limit {chunk_size}") with open(f"{folder_dir}/{table_name + '_' + str(page_num)}.csv", 'w', newline='', encoding='utf-8') as f: # 获取查询结果 results = cursor.fetchall() if results and len(results) > 0: export_start_id = results[len(results) - 1][id_col] print(f"{table_name} Last id: {export_start_id}") # 写入CSV文件 writer = csv.DictWriter(f, fieldnames=results[0].keys()) # 取第一行数据的键作为列名 writer.writeheader() for row in results: writer.writerow(row) page_num = page_num + 1 if len(results) < chunk_size: break else: break print(f"Exported {table_name} to CSV.") # 关闭数据库连接 cursor.close() connection.close() if __name__ == "__main__": db_config = { 'host': '127.0.0.1', 'user': 'game', 'password': '123', 'database': 'game', 'port': 3306, # 默认MySQL端口是3306 'charset': 'utf8mb4', 'cursorclass': pymysql.cursors.DictCursor } export_table(db_config, 'd_user', 'uid', 265219, 10 * 10000) export_table(db_config, 'd_user_bind', 'uid', 265219, 10 * 10000) # 从2024-11-05 00:00:00 第一条id开始 export_table(db_config, 'd_desk_user', 'id', 13379855, 10 * 10000) export_table(db_config, 'coin_log', 'id', 40797349, 10 * 10000)
import.py
""" 导入mysql表格 """ import pandas as pd import os from datetime import datetime import pymysql import csv def now_time(): # 获取当前时间 now = datetime.now() # 使用f-string格式化时间输出 formatted_now = f"[{now:%Y-%m-%d %H:%M:%S}]" return formatted_now def get_mysql_connection(database_username, database_password, database_ip, database_name): # 数据库连接参数 db_config = { 'host': database_ip, 'user': database_username, 'password': database_password, 'database': database_name, 'charset': 'utf8mb4', 'cursorclass': pymysql.cursors.DictCursor } # 连接到MySQL数据库 connection = pymysql.connect(**db_config) return connection def import_one_csv(connection, csv_file, table_name): # 创建目标数据库连接 target_cursor = connection.cursor() # 导入CSV文件 print(f"Importing {csv_file}...") with open(csv_file, 'r', newline='', encoding='utf-8') as file: reader = csv.reader(file) headers = next(reader) # 读取表头 placeholders = ', '.join(['%s'] * len(headers)) # 创建占位符 data = list(reader) # 读取所有数据行 # 插入数据 for row in data: target_cursor.execute(f"INSERT INTO {table_name} ({', '.join(headers)}) VALUES ({placeholders})", row) connection.commit() # 提交事务 print(f"Imported {csv_file} ") # 关闭目标数据库连接 target_cursor.close() def import_one_csv_with_null_values(connection, csv_file, table_name): # 创建目标数据库连接 cursor = connection.cursor() # 获取表结构 cursor.execute(f"DESCRIBE {table_name}") columns = cursor.fetchall() # 创建一个字典来存储列名和对应的数据类型 column_types = {column['Field']: column['Type'] for column in columns} # 读取 CSV 文件 df = pd.read_csv(csv_file) # 遍历 DataFrame 中的每一行 for index, row in df.iterrows(): # 构建 SQL 插入语句的参数列表 params = [] # 构建 SQL 插入语句的列名和值部分 columns_part = [] values_part = [] for column, value in row.items(): # 检查列的数据类型 if column_types[column].startswith('int') and value == '': # 整数列,空字符串转换为 None params.append(None) elif pd.isnull(value): # 处理 NaN 值 params.append(None) else: # 其他列(如 varchar),保留空字符串 params.append(value if value != '' else None) columns_part.append(f"`{column}`") values_part.append('%s') # 构建完整的 SQL 插入语句 sql = f"INSERT INTO {table_name} ({', '.join(columns_part)}) VALUES ({', '.join(values_part)})" # 执行 SQL 语句 cursor.execute(sql, params) # 提交事务 connection.commit() print(f"Imported {csv_file} ") def import_table(connection, table_name, folder_path): num = 0 for filename in os.listdir(folder_path): if filename.endswith(".csv") and table_name in filename: num = num + 1 for i in range(1, num + 1): filename = f"{table_name}_{i}.csv" file_path = os.path.join(folder_path, filename) print(now_time(), f"开始导入 csv文件:{filename}") if table_name == 'd_user' or table_name == 'd_user_bind' or table_name == 'd_desk_user': import_one_csv_with_null_values(connection, file_path, table_name) else: import_one_csv(connection, file_path, table_name) def import_game_data(database_ip, database_username, database_password): database_name = 'game' connection = get_mysql_connection(database_username, database_password, database_ip, database_name) tables = ['d_user', 'd_user_bind', 'd_desk_user', 'coin_log'] for table in tables: import_table(connection, table, table) connection.close() if __name__ == "__main__": database_username = 'root' database_password = '123' database_ip = '127.0.0.1' now = datetime.now() print(now_time(), f"开始导入hub数据") import_game_data(database_ip, database_username, database_password) print(now_time(), f"导入完成,耗时:{end - now}")