在实际项目开发过程中,经常有合并数据的需求。这里合并数据的意思是,对于源表A,目标表B,如果A中存在B中不存在则插入记录,如果A中存在B中也存在则更新记录,如果A中不存在B中存在则删除记录。

  为了实现这一需求,我们有两种解决方案,一是传统的处理方法,即使用EXISTS谓词,更新和新增分开处理的方式。另一种是使用MERGE语句(SQL Server 2008中新增的功能)。为了演示这一功能,首先我们需要准备测试数据,我们在tempdb临时数据库中新建两个表,源表Customers和目标表CustomersStage,然后向这两个表中插入测试数据,如下代码。

USE tempdb;
GO

-- 合并数据
-- 对于源表A,目标表B,如果A中存在B中不存在则插入记录,如果A中存在B中也存在则更新记录,如果A中不存在B中存在则删除记录。
-- 准备测试数据
IF OBJECT_ID('dbo.Customers','U') IS NOT NULL DROP TABLE dbo.Customers;
GO

CREATE TABLE dbo.Customers
(
    custid INT NOT NULL,
    companyname NVARCHAR(30) NOT NULL,
    phone VARCHAR(30) NOT NULL,
    ADDRESS NVARCHAR(50) NOT NULL,
    CONSTRAINT PK_Customers PRIMARY KEY(custid)        
);

INSERT INTO dbo.Customers
        ( custid, companyname, phone, ADDRESS )
VALUES  (1,N'cust 1','(111)111-111',N'address 1'),
        (2,N'cust 2','(222)222-222',N'address 2'),
        (3,N'cust 3','(333)333-333',N'address 3'),
        (4,N'cust 4','(444)444-444',N'address 4'),
        (5,N'cust 5','(555)555-555',N'address 5');
        
IF OBJECT_ID('dbo.CustomersStage','U') IS NOT NULL DROP TABLE dbo.CustomersStage;
GO

CREATE TABLE dbo.CustomersStage
(
    custid INT NOT NULL,
    companyname NVARCHAR(30) NOT NULL,
    phone VARCHAR(30) NOT NULL,
    ADDRESS NVARCHAR(50) NOT NULL,
    CONSTRAINT PK_CustomersStage PRIMARY KEY(custid)    
);

INSERT INTO dbo.CustomersStage
        ( custid, companyname, phone, ADDRESS )
VALUES  (2,N'AAAAA','(222)222-222',N'address 2'),
        (3,N'cust 3','(333)333-333',N'address 3'),
        (5,N'BBBBB','CCCCC',N'DDDDD'),
        (6,N'cust 6(new)','(666)666-666',N'address 6'),
        (7,N'cust 7(new)','(777)777-777',N'address 7');

运行以下代码查看示例数据效果。

SELECT * FROM dbo.Customers;
SELECT * FROM dbo.CustomersStage;

查询结果如下。

基于以上测试数据,所以我们要做的是,更新客户2,3和5的信息,将源表中客户6和7插入到目标表中,并且删除目标表中客户1和4。

首先使用传统的处理方式,既使用临时表和EXISTS谓词,如下代码所示。

-- 方法二:使用临时表和EXISTS谓词

-- 将源表的主键custid插入到临时表中
BEGIN TRAN;
IF OBJECT_ID('tempdb.dbo.#CustomersStage','U') IS NOT NULL DROP TABLE dbo.#CustomersStage;
GO

SELECT custid 
INTO #CustomersStage 
FROM dbo.CustomersStage;

DECLARE @custid INT;

WHILE EXISTS (SELECT * FROM #CustomersStage)
BEGIN
    SET @custid= (SELECT TOP 1 custid FROM #CustomersStage ORDER BY custid ASC);
    
    -- 方法1,if row exists update,otherwise insert
    IF EXISTS (SELECT * FROM dbo.Customers WHERE custid= @custid)
    BEGIN
        -- 更新
        UPDATE customers SET companyname= customersStage.companyname,phone= customersStage.phone,ADDRESS= customersStage.ADDRESS
        FROM dbo.Customers AS customers
        LEFT JOIN dbo.CustomersStage AS customersStage ON customers.custid = customersStage.custid
        WHERE customersStage.custid= @custid;
    END
    ELSE
    BEGIN
        -- 插入
        INSERT INTO dbo.Customers
                ( custid, companyname, phone, ADDRESS )
        SELECT custid, companyname,phone,ADDRESS FROM dbo.CustomersStage
        WHERE custid=@custid;
    END
    
    -- 方法2,update,if @@ROWCOUNT=0 then insert
    --UPDATE customers SET companyname= customersStage.companyname,phone= customersStage.phone,ADDRESS= customersStage.ADDRESS
    --FROM dbo.Customers AS customers
    --LEFT JOIN dbo.CustomersStage AS customersStage ON customers.custid = customersStage.custid
    --WHERE customersStage.custid= @custid;
    
    --IF @@ROWCOUNT=0
    --BEGIN
    --    -- 插入
    --    INSERT INTO dbo.Customers
    --            ( custid, companyname, phone, ADDRESS )
    --    SELECT custid, companyname,phone,ADDRESS FROM dbo.CustomersStage
    --    WHERE custid=@custid;    
    --END
    
    DELETE #CustomersStage
    WHERE custid= @custid;
END
GO

-- 从目标表中删除在源表中不存在的行
IF OBJECT_ID('tempdb.dbo.#Customers','U') IS NOT NULL DROP TABLE dbo.#Customers;
GO

SELECT custid
INTO #Customers
FROM dbo.Customers;

DECLARE @custidTGT INT;

WHILE EXISTS (SELECT * FROM #Customers)
BEGIN
    SET @custidTGT= (SELECT TOP 1 custid FROM #Customers ORDER BY custid ASC);
    
    IF NOT EXISTS (SELECT * FROM dbo.CustomersStage WHERE custid= @custidTGT)
    BEGIN
        DELETE FROM dbo.Customers
        WHERE custid= @custidTGT;        
    END
    
    DELETE #Customers
    WHERE custid= @custidTGT;
END
GO

SELECT * FROM dbo.Customers;
ROLLBACK TRAN;

处理结果如下。

从目标表的查询结果可以看到,客户2,3和5的信息已被更新,新的客户6和7已经插入,在源表中不存在的客户1和4已经被删除了,所以已经实现了我们的需求。但是,传统的处理方法一是代码量大,而且每操作一行数据需要两次查询数据库,导致效率较低。为了解决这些问题,我们可以使用新引入的MERGE语句来实现这个功能,代码如下。

-- 方法一:使用MERGE语句
BEGIN TRAN;
MERGE INTO dbo.Customers AS tgt
USING dbo.CustomersStage AS src ON tgt.custid=src.custid
WHEN MATCHED AND 
    ( (tgt.companyname<>src.companyname 
        OR (tgt.companyname IS NOT NULL AND src.companyname IS NULL)
        OR (tgt.companyname IS NULL AND src.companyname IS NOT NULL))
    OR (tgt.phone<>src.phone
        OR (tgt.phone IS NOT NULL AND src.phone IS NULL)
        OR (tgt.phone IS NULL AND src.phone IS NOT NULL))
    OR (tgt.ADDRESS<>src.ADDRESS
        OR (tgt.ADDRESS IS NOT NULL AND src.ADDRESS IS NULL)
        OR (tgt.ADDRESS IS NULL AND src.ADDRESS IS NOT NULL)) ) THEN 
    UPDATE SET tgt.companyname= src.companyname,
                tgt.phone= src.phone,
                tgt.ADDRESS= src.ADDRESS
WHEN NOT MATCHED THEN 
    INSERT (custid,companyname,phone,ADDRESS)
    VALUES (src.custid,src.companyname,src.phone,src.ADDRESS)
WHEN NOT MATCHED BY SOURCE THEN 
    DELETE;
    
SELECT * FROM dbo.Customers;
ROLLBACK TRAN;

通过以上查询代码我们会发现,我们使用了更少的代码实现了相同的功能,而且逻辑更清晰易懂。

附:全部sql代码。

USE tempdb;
GO

-- 合并数据
-- 对于源表A,目标表B,如果A中存在B中不存在则插入记录,如果A中存在B中也存在则更新记录,如果A中不存在B中存在则删除记录。
-- 准备测试数据
IF OBJECT_ID('dbo.Customers','U') IS NOT NULL DROP TABLE dbo.Customers;
GO

CREATE TABLE dbo.Customers
(
    custid INT NOT NULL,
    companyname NVARCHAR(30) NOT NULL,
    phone VARCHAR(30) NOT NULL,
    ADDRESS NVARCHAR(50) NOT NULL,
    CONSTRAINT PK_Customers PRIMARY KEY(custid)        
);

INSERT INTO dbo.Customers
        ( custid, companyname, phone, ADDRESS )
VALUES  (1,N'cust 1','(111)111-111',N'address 1'),
        (2,N'cust 2','(222)222-222',N'address 2'),
        (3,N'cust 3','(333)333-333',N'address 3'),
        (4,N'cust 4','(444)444-444',N'address 4'),
        (5,N'cust 5','(555)555-555',N'address 5');
        
IF OBJECT_ID('dbo.CustomersStage','U') IS NOT NULL DROP TABLE dbo.CustomersStage;
GO

CREATE TABLE dbo.CustomersStage
(
    custid INT NOT NULL,
    companyname NVARCHAR(30) NOT NULL,
    phone VARCHAR(30) NOT NULL,
    ADDRESS NVARCHAR(50) NOT NULL,
    CONSTRAINT PK_CustomersStage PRIMARY KEY(custid)    
);

INSERT INTO dbo.CustomersStage
        ( custid, companyname, phone, ADDRESS )
VALUES  (2,N'AAAAA','(222)222-222',N'address 2'),
        (3,N'cust 3','(333)333-333',N'address 3'),
        (5,N'BBBBB','CCCCC',N'DDDDD'),
        (6,N'cust 6(new)','(666)666-666',N'address 6'),
        (7,N'cust 7(new)','(777)777-777',N'address 7');
        


-- 方法一:使用MERGE语句
BEGIN TRAN;
MERGE INTO dbo.Customers AS tgt
USING dbo.CustomersStage AS src ON tgt.custid=src.custid
WHEN MATCHED AND 
    ( (tgt.companyname<>src.companyname 
        OR (tgt.companyname IS NOT NULL AND src.companyname IS NULL)
        OR (tgt.companyname IS NULL AND src.companyname IS NOT NULL))
    OR (tgt.phone<>src.phone
        OR (tgt.phone IS NOT NULL AND src.phone IS NULL)
        OR (tgt.phone IS NULL AND src.phone IS NOT NULL))
    OR (tgt.ADDRESS<>src.ADDRESS
        OR (tgt.ADDRESS IS NOT NULL AND src.ADDRESS IS NULL)
        OR (tgt.ADDRESS IS NULL AND src.ADDRESS IS NOT NULL)) ) THEN 
    UPDATE SET tgt.companyname= src.companyname,
                tgt.phone= src.phone,
                tgt.ADDRESS= src.ADDRESS
WHEN NOT MATCHED THEN 
    INSERT (custid,companyname,phone,ADDRESS)
    VALUES (src.custid,src.companyname,src.phone,src.ADDRESS)
WHEN NOT MATCHED BY SOURCE THEN 
    DELETE;
    
SELECT * FROM dbo.Customers;
ROLLBACK TRAN;

-- 方法二:使用临时表和EXISTS谓词

-- 将源表的主键custid插入到临时表中
BEGIN TRAN;
IF OBJECT_ID('tempdb.dbo.#CustomersStage','U') IS NOT NULL DROP TABLE dbo.#CustomersStage;
GO

SELECT custid 
INTO #CustomersStage 
FROM dbo.CustomersStage;

DECLARE @custid INT;

WHILE EXISTS (SELECT * FROM #CustomersStage)
BEGIN
    SET @custid= (SELECT TOP 1 custid FROM #CustomersStage ORDER BY custid ASC);
    
    -- 方法1,if row exists update,otherwise insert
    IF EXISTS (SELECT * FROM dbo.Customers WHERE custid= @custid)
    BEGIN
        -- 更新
        UPDATE customers SET companyname= customersStage.companyname,phone= customersStage.phone,ADDRESS= customersStage.ADDRESS
        FROM dbo.Customers AS customers
        LEFT JOIN dbo.CustomersStage AS customersStage ON customers.custid = customersStage.custid
        WHERE customersStage.custid= @custid;
    END
    ELSE
    BEGIN
        -- 插入
        INSERT INTO dbo.Customers
                ( custid, companyname, phone, ADDRESS )
        SELECT custid, companyname,phone,ADDRESS FROM dbo.CustomersStage
        WHERE custid=@custid;
    END
    
    -- 方法2,update,if @@ROWCOUNT=0 then insert
    --UPDATE customers SET companyname= customersStage.companyname,phone= customersStage.phone,ADDRESS= customersStage.ADDRESS
    --FROM dbo.Customers AS customers
    --LEFT JOIN dbo.CustomersStage AS customersStage ON customers.custid = customersStage.custid
    --WHERE customersStage.custid= @custid;
    
    --IF @@ROWCOUNT=0
    --BEGIN
    --    -- 插入
    --    INSERT INTO dbo.Customers
    --            ( custid, companyname, phone, ADDRESS )
    --    SELECT custid, companyname,phone,ADDRESS FROM dbo.CustomersStage
    --    WHERE custid=@custid;    
    --END
    
    DELETE #CustomersStage
    WHERE custid= @custid;
END
GO

-- 从目标表中删除在源表中不存在的行
IF OBJECT_ID('tempdb.dbo.#Customers','U') IS NOT NULL DROP TABLE dbo.#Customers;
GO

SELECT custid
INTO #Customers
FROM dbo.Customers;

DECLARE @custidTGT INT;

WHILE EXISTS (SELECT * FROM #Customers)
BEGIN
    SET @custidTGT= (SELECT TOP 1 custid FROM #Customers ORDER BY custid ASC);
    
    IF NOT EXISTS (SELECT * FROM dbo.CustomersStage WHERE custid= @custidTGT)
    BEGIN
        DELETE FROM dbo.Customers
        WHERE custid= @custidTGT;        
    END
    
    DELETE #Customers
    WHERE custid= @custidTGT;
END
GO

SELECT * FROM dbo.Customers;
ROLLBACK TRAN;
View Code
posted on 2014-10-20 15:44  永远的麦子  阅读(1053)  评论(2编辑  收藏  举报