悉野小楼

导航

python从旧库中导出csv并导入新库

在线的游戏, 迁移数据库, 数据比较大, 游戏不能停很久, 先使用sqldump导入不变的表, 再使用python导出可变的表到csv文件, 导入到新库. 找出各表中csv中最大的id, 然后停服,  然后根据各表的id, 从id位置开始再导出新增数据, 再导入到新库.

export.py

"""
导出msql表格
"""
import csv
import os
import shutil

import pymysql


#删除文件夹
def deleteFolder(path):
    if not os.path.isdir(path):
        return
    try:
        shutil.rmtree(path)
        print(f"成功删除文件夹 {path}")
    except OSError as e:
        print(f"删除文件夹 {path} 失败: {e}")
def clearDir(path):
    if not os.path.isdir(path):
        return
    try:
        files = os.listdir(path)
        for cur in files:
            fullPath = os.path.join(path, cur)
            if os.path.isfile(fullPath):
                os.remove(fullPath)
            elif os.path.isdir(fullPath):
                deleteFolder(fullPath)
        print(f"成功清空文件夹 {path}")
    except OSError as e:
        print(f"清空文件夹 {path} 失败: {e}")

def createEmptyFolder(folderName):
    """
    创建空文件夹
    """
    if os.path.exists(folderName):
        clearDir(folderName)
    else:
        os.makedirs(folderName, exist_ok=True)
def export_table(db_config, table_name, id_col, export_start_id, chunk_size = 100000):
    # 创建数据库连接
    connection = pymysql.connect(**db_config)
    cursor = connection.cursor()

    print(f"Exporting {table_name}...")

    folder_dir = f'export/{table_name}'
    createEmptyFolder(folder_dir)
    # 导出数据
    page_num = 1
    while True:
        cursor.execute(f"SELECT * FROM {table_name} where {id_col} > {export_start_id} order by {id_col} asc limit {chunk_size}")
        with open(f"{folder_dir}/{table_name + '_' + str(page_num)}.csv", 'w', newline='', encoding='utf-8') as f:
            # 获取查询结果
            results = cursor.fetchall()
            if results and len(results) > 0:
                export_start_id = results[len(results) - 1][id_col]
                print(f"{table_name} Last id: {export_start_id}")
                # 写入CSV文件
                writer = csv.DictWriter(f, fieldnames=results[0].keys())  # 取第一行数据的键作为列名
                writer.writeheader()
                for row in results:
                    writer.writerow(row)
                page_num = page_num + 1
                if len(results) < chunk_size:
                    break
            else:
                break

    print(f"Exported {table_name} to CSV.")

    # 关闭数据库连接
    cursor.close()
    connection.close()

if __name__ == "__main__":
    db_config = {
        'host': '127.0.0.1',
        'user': 'game',
        'password': '123',
        'database': 'game',
        'port': 3306,  # 默认MySQL端口是3306
        'charset': 'utf8mb4',
        'cursorclass': pymysql.cursors.DictCursor
    }
    export_table(db_config, 'd_user', 'uid', 265219, 10 * 10000)
    export_table(db_config, 'd_user_bind', 'uid', 265219, 10 * 10000)

    # 从2024-11-05 00:00:00 第一条id开始
    export_table(db_config, 'd_desk_user', 'id', 13379855, 10 * 10000)
    export_table(db_config, 'coin_log', 'id', 40797349, 10 * 10000)

 

import.py

"""
导入mysql表格
"""
import pandas as pd
import os
from datetime import datetime
import pymysql
import csv

def now_time():
    # 获取当前时间
    now = datetime.now()
    # 使用f-string格式化时间输出
    formatted_now = f"[{now:%Y-%m-%d %H:%M:%S}]"
    return formatted_now
def get_mysql_connection(database_username, database_password, database_ip, database_name):
    # 数据库连接参数
    db_config = {
        'host': database_ip,
        'user': database_username,
        'password': database_password,
        'database': database_name,
        'charset': 'utf8mb4',
        'cursorclass': pymysql.cursors.DictCursor
    }
    # 连接到MySQL数据库
    connection = pymysql.connect(**db_config)
    return connection

def import_one_csv(connection, csv_file, table_name):
    # 创建目标数据库连接
    target_cursor = connection.cursor()

    # 导入CSV文件
    print(f"Importing {csv_file}...")
    with open(csv_file, 'r', newline='', encoding='utf-8') as file:
        reader = csv.reader(file)
        headers = next(reader)  # 读取表头
        placeholders = ', '.join(['%s'] * len(headers))  # 创建占位符
        data = list(reader)  # 读取所有数据行

        # 插入数据
        for row in data:
            target_cursor.execute(f"INSERT INTO {table_name} ({', '.join(headers)}) VALUES ({placeholders})", row)
        connection.commit()  # 提交事务

    print(f"Imported {csv_file} ")

    # 关闭目标数据库连接
    target_cursor.close()

def import_one_csv_with_null_values(connection, csv_file, table_name):
    # 创建目标数据库连接
    cursor = connection.cursor()
    # 获取表结构
    cursor.execute(f"DESCRIBE {table_name}")
    columns = cursor.fetchall()

    # 创建一个字典来存储列名和对应的数据类型
    column_types = {column['Field']: column['Type'] for column in columns}

    # 读取 CSV 文件
    df = pd.read_csv(csv_file)
    # 遍历 DataFrame 中的每一行
    for index, row in df.iterrows():
        # 构建 SQL 插入语句的参数列表
        params = []
        # 构建 SQL 插入语句的列名和值部分
        columns_part = []
        values_part = []
        for column, value in row.items():
            # 检查列的数据类型
            if column_types[column].startswith('int') and value == '':
                # 整数列,空字符串转换为 None
                params.append(None)
            elif pd.isnull(value):
                # 处理 NaN 值
                params.append(None)
            else:
                # 其他列(如 varchar),保留空字符串
                params.append(value if value != '' else None)
            columns_part.append(f"`{column}`")
            values_part.append('%s')

        # 构建完整的 SQL 插入语句
        sql = f"INSERT INTO {table_name} ({', '.join(columns_part)}) VALUES ({', '.join(values_part)})"
        # 执行 SQL 语句
        cursor.execute(sql, params)
    # 提交事务
    connection.commit()
    print(f"Imported {csv_file} ")

def import_table(connection, table_name, folder_path):
    num = 0
    for filename in os.listdir(folder_path):
        if filename.endswith(".csv") and table_name in filename:
            num = num + 1
    for i in range(1, num + 1):
        filename = f"{table_name}_{i}.csv"
        file_path = os.path.join(folder_path, filename)
        print(now_time(), f"开始导入 csv文件:{filename}")
        if table_name == 'd_user' or table_name == 'd_user_bind' or table_name == 'd_desk_user':
            import_one_csv_with_null_values(connection, file_path, table_name)
        else:
            import_one_csv(connection, file_path, table_name)


def import_game_data(database_ip, database_username, database_password):
    database_name = 'game'
    connection = get_mysql_connection(database_username, database_password, database_ip, database_name)
    tables = ['d_user', 'd_user_bind', 'd_desk_user', 'coin_log']
    for table in tables:
        import_table(connection, table, table)
    connection.close()

if __name__ == "__main__":
    database_username = 'root'
    database_password = '123'
    database_ip = '127.0.0.1'

    now = datetime.now()
    print(now_time(), f"开始导入hub数据")
    import_game_data(database_ip, database_username, database_password)
    print(now_time(), f"导入完成,耗时:{end - now}")

 

posted on 2024-11-15 10:43  悉野  阅读(1)  评论(0编辑  收藏  举报