Entity Framework与ADO.NET批量插入数据性能测试
Entity Framework是.NET平台下的一种简单易用的ORM框架,它既便于Domain Model和持久层的OO设计,也提高了代码的可维护性。但在使用中发现,有几类业务场景是EF不太擅长的,比如批量写入大量同类数据,为此本人做了一些对比测试,以供大家参考。
现假设我们需要做一个用户批量导入的功能,需要从某处导入1k~1w个User到SQLServer数据库,本人听说过的常见做法有如下几种:
- 使用ADO.NET单条SqlCommand执行1w次(根据常识作为EF的替代其性能还不够格,所以就不做测试了)
- 使用StringBuilder拼接SQL语句,将1w条Insert语句拼接成1到若干条SqlCommand执行
- 使用EntityFramework的基本功能进行插入
- 使用SqlBulkCopy进行批量插入
- 使用存储过程,其中的2种分支分别对应上述1、2用例,另外还有1种表参数存储过程。
数据库准备工作:
CREATE DATABASE BulkInsertTest GO USE BulkInsertTest GO CREATE TABLE [dbo].[User]( [Id] [int] IDENTITY(1,1) NOT NULL, [Name] [nvarchar](50) NOT NULL, [Birthday] [date] NOT NULL, [Gender] [char](1) NOT NULL, [Email] [nvarchar](50) NOT NULL, [Deleted] [bit] NOT NULL, CONSTRAINT [PK_User] PRIMARY KEY CLUSTERED ( [Id] ASC )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY] ) ON [PRIMARY] GO CREATE PROCEDURE [dbo].[InsertUser] @Name nvarchar(50) ,@Birthday date ,@Gender char(1) ,@Email nvarchar(50) ,@Deleted bit AS BEGIN INSERT INTO [BulkInsertTest].[dbo].[User] ([Name] ,[Birthday] ,[Gender] ,[Email] ,[Deleted]) VALUES (@Name,@Birthday,@Gender,@Email,@Deleted) END /* Create a table type. */ CREATE TYPE LocationTableType AS TABLE ( Name nvarchar(50) ,Birthday date ,Gender char(1) ,Email nvarchar(50) ,Deleted bit ); GO /* Create a procedure to receive data for the table-valued parameter. */ CREATE PROCEDURE [dbo].[InsertUsers] @Users LocationTableType AS SET NOCOUNT ON INSERT INTO [dbo].[User] ([Name] ,[Birthday] ,[Gender] ,[Email] ,[Deleted]) SELECT * FROM @Users; GO
创建DbContext和User Entity的C#代码:
using System; using System.ComponentModel.DataAnnotations; using System.ComponentModel.DataAnnotations.Schema; using System.Data.Entity; namespace ConsoleApplication5 { public class MyDbContext : DbContext { public MyDbContext() : base("MyDbContext") { } public MyDbContext(string connectionString) : base(connectionString) { } public DbSet<User> Users { get; set; } } [Table("User")] public class User { [Key] public int Id { get; set; } public string Name { get; set; } public DateTime Birthday { get; set; } public string Gender { get; set; } public string Email { get; set; } public bool Deleted { get; set; } } }
测试程序C#代码:
using System; using System.Data; using System.Data.SqlClient; using System.Diagnostics; using System.Linq; using System.Text; namespace ConsoleApplication5 { class Program { private const string ConnectionString = "Data Source=.;Initial Catalog=BulkInsertTest;User=sa;Password=IGTtest1"; private const int Times = 10; private const int Entries = 10000; static void Main(string[] args) { long sumBulkCopyTime = 0, sumSqlCmdsTime = 0, sumMultiSpTime = 0, sumTableSpTime = 0, sumEfTime = 0; long maxBulkCopyTime = 0, maxSqlCmdsTime = 0, maxMultiSpTime = 0, maxTableSpTime = 0, maxEfTime = 0; for (int i = 0; i < Times; i++) { long bulkCopyTime = InsertBySqlBulkCopy(); sumBulkCopyTime += bulkCopyTime; maxBulkCopyTime = Math.Max(maxBulkCopyTime, bulkCopyTime); long sqlCmdsTime = InsertBySqlCmds(); sumSqlCmdsTime += sqlCmdsTime; maxSqlCmdsTime = Math.Max(maxSqlCmdsTime, sqlCmdsTime); long multiSpTime = InsertByMultiStoreProcedure(); sumMultiSpTime += multiSpTime; maxMultiSpTime = Math.Max(maxMultiSpTime, multiSpTime); long tableSpTime = InsertByTableStoreProcedure(); sumTableSpTime += tableSpTime; maxTableSpTime = Math.Max(maxTableSpTime, tableSpTime); long efTime = InsertByEntityFramework(); sumEfTime += efTime; maxEfTime = Math.Max(maxEfTime, efTime); } Console.WriteLine(new string('-', 40)); Console.WriteLine("Time Cost of SqlBulkCopy: avg:{0}ms, max:{1}ms", sumBulkCopyTime / Times, maxBulkCopyTime); Console.WriteLine("Time Cost of SqlCommands: avg:{0}ms, max:{1}ms", sumSqlCmdsTime / Times, maxSqlCmdsTime); Console.WriteLine("Time Cost of MultiStoreProcedure: avg:{0}ms, max:{1}ms", sumMultiSpTime / Times, maxMultiSpTime); Console.WriteLine("Time Cost of TableStoreProcedure: avg:{0}ms, max:{1}ms", sumTableSpTime / Times, maxTableSpTime); Console.WriteLine("Time Cost of EntityFramework: avg:{0}ms, max:{1}ms", sumEfTime / Times, maxEfTime); Console.ReadLine(); } private static long InsertBySqlCmds() { Stopwatch stopwatch = Stopwatch.StartNew(); using (var connection = new SqlConnection(ConnectionString)) { SqlTransaction transaction = null; connection.Open(); try { transaction = connection.BeginTransaction(); StringBuilder sb = new StringBuilder(); for (int j = 0; j < Entries; j++) { sb.AppendFormat(@"INSERT INTO dbo.[User] ([Name],[Birthday],[Gender],[Email],[Deleted]) VALUES('{0}','{1:yyyy-MM-dd}','{2}','{3}',{4});", "name" + j, DateTime.Now.AddDays(j), 'M', "user" + j + "@abc.com", 0); } var sqlCmd = connection.CreateCommand(); sqlCmd.CommandText = sb.ToString(); sqlCmd.Transaction = transaction; sqlCmd.ExecuteNonQuery(); transaction.Commit(); } catch { if (transaction != null) { transaction.Rollback(); } throw; } } stopwatch.Stop(); Console.WriteLine("SqlCommand time cost: {0}ms", stopwatch.ElapsedMilliseconds); return stopwatch.ElapsedMilliseconds; } private static long InsertByMultiStoreProcedure() { Stopwatch stopwatch = Stopwatch.StartNew(); using (var connection = new SqlConnection(ConnectionString)) { SqlTransaction transaction = null; connection.Open(); for (int i = 0; i < 10; i++) { try { transaction = connection.BeginTransaction(); StringBuilder sb = new StringBuilder(); for (int j = 0; j < Entries/10; j++) { sb.AppendFormat(@"EXECUTE [dbo].[InsertUser] '{0}','{1:yyyy-MM-dd}','{2}','{3}',{4};", "name" + j, DateTime.Now.AddDays(j), 'M', "user" + j + "@abc.com", 0); } var sqlCmd = connection.CreateCommand(); sqlCmd.CommandText = sb.ToString(); sqlCmd.Transaction = transaction; sqlCmd.ExecuteNonQuery(); transaction.Commit(); } catch { if (transaction != null) { transaction.Rollback(); } throw; } } } stopwatch.Stop(); Console.WriteLine("MultiStoreProcedure time cost: {0}ms", stopwatch.ElapsedMilliseconds); return stopwatch.ElapsedMilliseconds; } private static long InsertByTableStoreProcedure() { Stopwatch stopwatch = Stopwatch.StartNew(); var table = PrepareDataTable(); using (var connection = new SqlConnection(ConnectionString)) { SqlTransaction transaction = null; connection.Open(); try { transaction = connection.BeginTransaction(); var sqlCmd = connection.CreateCommand(); sqlCmd.CommandText = "InsertUsers"; sqlCmd.CommandType = CommandType.StoredProcedure; sqlCmd.Parameters.Add(new SqlParameter("@Users", SqlDbType.Structured)); sqlCmd.Parameters["@Users"].Value = table; sqlCmd.Transaction = transaction; sqlCmd.ExecuteNonQuery(); transaction.Commit(); } catch { if (transaction != null) { transaction.Rollback(); } throw; } } stopwatch.Stop(); Console.WriteLine("TableStoreProcedure time cost: {0}ms", stopwatch.ElapsedMilliseconds); return stopwatch.ElapsedMilliseconds; } private static long InsertBySqlBulkCopy() { Stopwatch stopwatch = Stopwatch.StartNew(); var table = PrepareDataTable(); SqlBulkCopy(table); stopwatch.Stop(); Console.WriteLine("SqlBulkCopy time cost: {0}ms", stopwatch.ElapsedMilliseconds); return stopwatch.ElapsedMilliseconds; } private static DataTable PrepareDataTable() { DataTable table = new DataTable(); table.Columns.Add("Name", typeof (string)); table.Columns.Add("Birthday", typeof (DateTime)); table.Columns.Add("Gender", typeof (char)); table.Columns.Add("Email", typeof (string)); table.Columns.Add("Deleted", typeof (bool)); for (int i = 0; i < Entries; i++) { var row = table.NewRow(); row["Name"] = "name" + i; row["Birthday"] = DateTime.Now.AddDays(i); row["Gender"] = 'M'; row["Email"] = "user" + i + "@abc.com"; row["Deleted"] = false; table.Rows.Add(row); } return table; } private static void SqlBulkCopy(DataTable dataTable) { using (var connection = new SqlConnection(ConnectionString)) { SqlTransaction transaction = null; connection.Open(); try { transaction = connection.BeginTransaction(); using (var sqlBulkCopy = new SqlBulkCopy(connection, SqlBulkCopyOptions.Default, transaction)) { sqlBulkCopy.BatchSize = dataTable.Rows.Count; sqlBulkCopy.DestinationTableName = "[User]"; //sqlBulkCopy.ColumnMappings.Add("Id", "Id"); sqlBulkCopy.ColumnMappings.Add("Name", "Name"); sqlBulkCopy.ColumnMappings.Add("Birthday", "Birthday"); sqlBulkCopy.ColumnMappings.Add("Gender", "Gender"); sqlBulkCopy.ColumnMappings.Add("Email", "Email"); sqlBulkCopy.ColumnMappings.Add("Deleted", "Deleted"); sqlBulkCopy.WriteToServer(dataTable); } transaction.Commit(); } catch { if (transaction!=null) { transaction.Rollback(); } throw; } } } private static long InsertByEntityFramework() { Stopwatch stopwatch = Stopwatch.StartNew(); using (MyDbContext context = new MyDbContext(ConnectionString)) { context.Configuration.AutoDetectChangesEnabled = false; context.Configuration.ValidateOnSaveEnabled = false; for (int i = 0; i < Entries; i++) { context.Users.Add(new User() { Name = "name" + i, Birthday = DateTime.Now.AddDays(i), Gender = "F", Email = "user" + i + "@abc.com", Deleted = false }); } context.SaveChanges(); } stopwatch.Stop(); Console.WriteLine("EntityFramework time cost: {0}ms", stopwatch.ElapsedMilliseconds); return stopwatch.ElapsedMilliseconds; } } }
插入1000行测试结果:
插入10000行测试结果:
分析与结论:单从性能上来说,SqlBulkCopy和表参数StoreProcedure胜出,且完胜Entity Framework,所以当EF实在无法满足性能要求时,SqlBulkCopy或表参数SP可以很好的解决EF批量插入的性能问题。但衡量软件产品的标准不仅仅只有性能这一方面,比如我们还要在设计美学和性能之间进行权衡。当插入数据量较小或是低压力时间段自动执行插入的话,EF仍然是不错的选择。从代码可维护性方面来看ADO.NET实现的可读性、重构友好型都弱于EF实现,所以对于需求变动较多的领域模型而言这几种解决方法都需要更多的设计抽象和单元测试,以此来确保产品的持续发展。从影响范围来看,在ADO.NET实现方式中SqlBulkCopy和拼接Sql字符串的方案不需要额外加入存储过程,所以可以在不影响数据库部署的前提下与EF的实现相互替换。
关于SqlBulkCopy请参考:Bulk Copy Operations in SQL Server
为了比较优雅使用SqlBulkCopy,有人写了一种AsDataReader扩展方法请参考:LinqEntityDataReader
根据MSDN的说法,由于表参数存储过程的启动准备消耗时间较小,所以1k行(经验)以下插入性能将胜于SqlBulkCopy,而随着插入行数的增多,SqlBulkCopy的性能优势将体现出来,另外两种方案相比还有一些其他方面的差异,从本测试的实际结果来看,SqlBulkCopy在首次插入1k条数据时确实耗时稍长一点。具体请参考:Table-Valued Parameters vs. BULK INSERT Operations
另外还有人做过SqlBulkCopy和SqlDataAdapter插入的性能对比:High performance bulk loading to SQL Server using SqlBulkCopy
----------转自 火星老蒋 http://www.cnblogs.com/jiangdaoli/p/3297007.html