NETCORE - 大数据操作 csv 文件方式
NETCORE - 大数据操作 csv 文件方式
1. List 转换类
\\N 为 null
public static List<string> TrunsToList<T>(this List<T> listModel, string splitChar = ",") { var props = typeof(T).GetProperties(); Func<T, List<string>> propFun = (p) => { var temp = props.Select(prop => { var tt = prop.GetValue(p)?.ToString() ?? "\\N"; return $"\"{tt}\""; }).ToList(); return temp; }; List<string> result = listModel.Select(p => { var str = string.Join(splitChar, propFun(p)); return str; }).ToList(); return result; } }
2. 临时文件地址 ,使用 Path.GetTempPath()
public class ProjectDirectiory { public static string ProjectRoot { get { var currentDir = Path.Combine(AppContext.BaseDirectory, "TestFile"); // Path.GetTempPath() return currentDir; } } }
public static async Task<string> WriteToFileAsync(List<string> list, string filePath) { //System.Text.Encoding.RegisterProvider(System.Text.CodePagesEncodingProvider.Instance); if (System.IO.File.Exists(filePath)) File.Delete(filePath); FileStream sfile = new FileStream(filePath, FileMode.Append, FileAccess.Write); StreamWriter myfile = new StreamWriter(sfile); try { for (int i = 0; i < list.Count; i++) { myfile.Write(list[i] + "\n"); } myfile.Flush(); } catch (Exception ex) { } finally { myfile.Close(); sfile.Close(); } //await System.IO.File.WriteAllLinesAsync(filePath, list, new UTF8Encoding(false)); return filePath; }
3. 调用方式
List<string> rows = list.ToList().TrunsToList<TtTrainLine>(); var tempFileAllName = Path.Combine(Path.GetTempPath(), Guid.NewGuid() + ".csv"); await FileOperation.WriteToFile(string.Join("\n", rows), tempFileAllName); ConsoleTime.WriteLine(" tt_train_line 开始写入"); string DbConnectionString_RailRunPlan = configuration.GetSection("ConnectionStrings:RailRunPlan").Value; await (new LagerDataExecute()).OptimizedBulkLoadAsync(DbConnectionString_RailRunPlan, tempFileAllName, "tt_train_line"); if (File.Exists(tempFileAllName)) File.Delete(tempFileAllName); ConsoleTime.WriteLine(" tt_train_line 数据写入完成");
4. 大数据写入类
using MySqlConnector; using Rail.Medium.Common; using Rail.Medium.Config; using System; using System.Collections.Generic; using System.Diagnostics; using System.Linq; using System.Text; using System.Threading.Tasks; namespace Rail.Service.RunPlan.MysqlExtend { public class LagerDataExecute { public async Task OptimizedBulkLoadAsync(string connectionString, string filePath, string tableName) { await using var connection = new MySqlConnection(connectionString + ";AllowLoadLocalInfile=true;DefaultCommandTimeout=0"); await connection.OpenAsync(); await using var transacation = await connection.BeginTransactionAsync(); try { // 1. 禁用索引和外键检查 await DisableTableIndexesAsync(connection, tableName, transacation); var bulkLoader = new MySqlBulkLoader(connection) { TableName = tableName, FileName = filePath, FieldTerminator = ",", LineTerminator = "\n", NumberOfLinesToSkip = 0, FieldQuotationCharacter = '"', EscapeCharacter = '\\', Local = true //Columns = {"Id","" } }; // 2. 执行导入 var stopwatch = Stopwatch.StartNew(); var rowCount = await bulkLoader.LoadAsync(); stopwatch.Stop(); Console.WriteLine($"导入 {rowCount} 行数据,耗时 {stopwatch.Elapsed.TotalSeconds:F2}秒"); } catch (Exception ex) { await transacation.RollbackAsync(); ConsoleTime.WriteLine("导入失败"); } finally { // 3. 重新启用约束 await EnableTableIndexesAsync(connection, tableName, transacation); await connection.CloseAsync(); } } public static async Task Insert(IEnumerable<string> contents, string tablename, string connectionString) { // 先将数据写入CSV文件 //var csvPath = Path.GetTempFileName(); var csvPath = Path.GetTempPath() + Path.GetFileNameWithoutExtension(Path.GetTempFileName()) + ".csv"; await File.WriteAllLinesAsync(csvPath, contents); ConsoleTime.WriteLine("数据写入CSV完成"); await using var connection = new MySqlConnection(connectionString); await connection.OpenAsync(); // 3. 关键优化:禁用索引和约束(提升3-5倍速度) await DisableTableIndexesAsync(connection, tablename); try { // 4. 执行 LOAD DATA INFILE var loadCommand = connection.CreateCommand(); loadCommand.CommandText = $"LOAD DATA LOCAL INFILE '{csvPath.Replace("\\", "/")}' INTO TABLE {tablename} FIELDS TERMINATED BY ','"; await loadCommand.ExecuteNonQueryAsync(); } catch (Exception ex) { ConsoleTime.WriteLine(ex.Message); } finally { // 5. 重新启用索引和约束 await EnableTableIndexesAsync(connection, tablename); File.Delete(csvPath); // 清理临时文件 } } public static async Task DisableTableIndexesAsync(MySqlConnection connection, string tableName, MySqlTransaction transaction = null) { var cmd = connection.CreateCommand(); cmd.CommandText = $"ALTER TABLE {tableName} DISABLE KEYS;SET FOREIGN_KEY_CHECKS = 0;SET UNIQUE_CHECKS = 0;SET GLOBAL local_infile = 1;"; if (transaction != null) cmd.Transaction = transaction; await cmd.ExecuteNonQueryAsync(); } public static async Task EnableTableIndexesAsync(MySqlConnection connection, string tableName, MySqlTransaction transaction = null) { var cmd = connection.CreateCommand(); cmd.CommandText = $"ALTER TABLE {tableName} ENABLE KEYS;SET FOREIGN_KEY_CHECKS = 1;SET UNIQUE_CHECKS = 1;"; if (transaction != null) cmd.Transaction = transaction; await cmd.ExecuteNonQueryAsync(); } } }
MySQL InnoDB 关键参数配置建议
这三个参数对MySQL的写入性能有重大影响,特别是在批量导入数据时。以下是针对您50万数据批量导入场景的具体配置建议:
1. innodb_buffer_pool_size(缓冲池大小)
推荐值:服务器物理内存的50-80%
-
作用:缓存表数据和索引,减少磁盘I/O
-
建议:
-
4GB内存服务器:
2G
(50%) -
8GB内存服务器:
4G-6G
(50-75%) -
16GB内存服务器:
12G
(75%) -
32GB+内存服务器:
24G
(75%)
-
完整配置示例(my.cnf/my.ini)
100w 数据 40秒左右
300w 数据 125秒左右
end