NETCORE - 大数据操作 csv 文件方式

NETCORE - 大数据操作 csv 文件方式

 1. List 转换类

  \\N 为 null

        public static List<string> TrunsToList<T>(this List<T> listModel, string splitChar = ",")
        {
            var props = typeof(T).GetProperties();

            Func<T, List<string>> propFun = (p) =>
            {
                var temp = props.Select(prop =>
                {
                    var tt = prop.GetValue(p)?.ToString() ?? "\\N";

                    return $"\"{tt}\"";
                }).ToList();

                return temp;
            };

            List<string> result = listModel.Select(p =>
            {
                var str = string.Join(splitChar, propFun(p));

                return str;
            }).ToList();

            return result;

        }
    }

 

 

 2. 临时文件地址 ,使用 Path.GetTempPath()

    public class ProjectDirectiory
    {
        public static string ProjectRoot
        {
            get
            {
                var currentDir = Path.Combine(AppContext.BaseDirectory, "TestFile"); // Path.GetTempPath()
                return currentDir;
            }
        }
    }

 

 

        public static async Task<string> WriteToFileAsync(List<string> list, string filePath)
        {
            //System.Text.Encoding.RegisterProvider(System.Text.CodePagesEncodingProvider.Instance);

            if (System.IO.File.Exists(filePath)) File.Delete(filePath);

            FileStream sfile = new FileStream(filePath, FileMode.Append, FileAccess.Write);
            StreamWriter myfile = new StreamWriter(sfile);
            try
            {
                for (int i = 0; i < list.Count; i++)
                {
                    myfile.Write(list[i] + "\n");
                }
                myfile.Flush();
            }
            catch (Exception ex)
            {

            }
            finally
            {
                myfile.Close();
                sfile.Close();
            }

            //await System.IO.File.WriteAllLinesAsync(filePath, list, new UTF8Encoding(false));

            return filePath;
        }

 

 

 3. 调用方式

    List<string> rows = list.ToList().TrunsToList<TtTrainLine>();

    var tempFileAllName = Path.Combine(Path.GetTempPath(), Guid.NewGuid() + ".csv");

    await FileOperation.WriteToFile(string.Join("\n", rows), tempFileAllName);

    ConsoleTime.WriteLine(" tt_train_line 开始写入");

    string DbConnectionString_RailRunPlan = configuration.GetSection("ConnectionStrings:RailRunPlan").Value;

    await (new LagerDataExecute()).OptimizedBulkLoadAsync(DbConnectionString_RailRunPlan, tempFileAllName, "tt_train_line");

    if (File.Exists(tempFileAllName)) File.Delete(tempFileAllName);

    ConsoleTime.WriteLine(" tt_train_line 数据写入完成");

 

 

 

4. 大数据写入类

using MySqlConnector;
using Rail.Medium.Common;
using Rail.Medium.Config;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace Rail.Service.RunPlan.MysqlExtend
{
    public class LagerDataExecute
    {
        public async Task OptimizedBulkLoadAsync(string connectionString, string filePath, string tableName)
        {
            await using var connection = new MySqlConnection(connectionString + ";AllowLoadLocalInfile=true;DefaultCommandTimeout=0");

            await connection.OpenAsync();

            await using var transacation = await connection.BeginTransactionAsync();
            try
            {
                // 1. 禁用索引和外键检查
                await DisableTableIndexesAsync(connection, tableName, transacation);

                var bulkLoader = new MySqlBulkLoader(connection)
                {
                    TableName = tableName,
                    FileName = filePath,
                    FieldTerminator = ",",
                    LineTerminator = "\n",
                    NumberOfLinesToSkip = 0,
                    FieldQuotationCharacter = '"',
                    EscapeCharacter = '\\',
                    Local = true
                    //Columns = {"Id","" }
                };

                // 2. 执行导入
                var stopwatch = Stopwatch.StartNew();
                var rowCount = await bulkLoader.LoadAsync();
                stopwatch.Stop();

                Console.WriteLine($"导入 {rowCount} 行数据,耗时 {stopwatch.Elapsed.TotalSeconds:F2}秒");
            }
            catch (Exception ex)
            {
                await transacation.RollbackAsync();
                ConsoleTime.WriteLine("导入失败");
            }
            finally
            {
                // 3. 重新启用约束
                await EnableTableIndexesAsync(connection, tableName, transacation);
                await connection.CloseAsync();
            }
        }

        public static async Task Insert(IEnumerable<string> contents, string tablename, string connectionString)
        {
            // 先将数据写入CSV文件
            //var csvPath = Path.GetTempFileName();

            var csvPath = Path.GetTempPath() + Path.GetFileNameWithoutExtension(Path.GetTempFileName()) + ".csv";

            await File.WriteAllLinesAsync(csvPath, contents);

            ConsoleTime.WriteLine("数据写入CSV完成");

            await using var connection = new MySqlConnection(connectionString);

            await connection.OpenAsync();


            // 3. 关键优化:禁用索引和约束(提升3-5倍速度)
            await DisableTableIndexesAsync(connection, tablename);

            try
            {
                // 4. 执行 LOAD DATA INFILE
                var loadCommand = connection.CreateCommand();
                loadCommand.CommandText = $"LOAD DATA LOCAL INFILE '{csvPath.Replace("\\", "/")}' INTO TABLE {tablename} FIELDS TERMINATED BY ','";
                await loadCommand.ExecuteNonQueryAsync();
            }
            catch (Exception ex)
            {
                ConsoleTime.WriteLine(ex.Message);
            }
            finally
            {
                // 5. 重新启用索引和约束
                await EnableTableIndexesAsync(connection, tablename);
                File.Delete(csvPath); // 清理临时文件
            }
        }

        public static async Task DisableTableIndexesAsync(MySqlConnection connection, string tableName, MySqlTransaction transaction = null)
        {
            var cmd = connection.CreateCommand();
            cmd.CommandText = $"ALTER TABLE {tableName} DISABLE KEYS;SET FOREIGN_KEY_CHECKS = 0;SET UNIQUE_CHECKS = 0;SET GLOBAL local_infile = 1;";

            if (transaction != null) cmd.Transaction = transaction;

            await cmd.ExecuteNonQueryAsync();

        }

        public static async Task EnableTableIndexesAsync(MySqlConnection connection, string tableName, MySqlTransaction transaction = null)
        {
            var cmd = connection.CreateCommand();
            cmd.CommandText = $"ALTER TABLE {tableName} ENABLE KEYS;SET FOREIGN_KEY_CHECKS = 1;SET UNIQUE_CHECKS = 1;";

            if (transaction != null) cmd.Transaction = transaction;

            await cmd.ExecuteNonQueryAsync();
        }
    }
}

 

 

 

 

 

 

 

 

MySQL InnoDB 关键参数配置建议

这三个参数对MySQL的写入性能有重大影响,特别是在批量导入数据时。以下是针对您50万数据批量导入场景的具体配置建议:

1. innodb_buffer_pool_size(缓冲池大小)

推荐值:服务器物理内存的50-80%

  • 作用:缓存表数据和索引,减少磁盘I/O

  • 建议

    • 4GB内存服务器:2G (50%)

    • 8GB内存服务器:4G-6G (50-75%)

    • 16GB内存服务器:12G (75%)

    • 32GB+内存服务器:24G (75%) 

完整配置示例(my.cnf/my.ini)

[mysqld]
# 缓冲池配置(16GB内存服务器示例)
innodb_buffer_pool_size =24G

 

测试情况

 100w 数据 40秒左右
 300w 数据 125秒左右
 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

end

posted @ 2025-04-28 13:33  无心々菜  阅读(12)  评论(0)    收藏  举报