SQL进阶系列之2自连接

写在前面

一般地,SQL的连接运算根据其特征的不同,有着不同的名称,比如内连接、外连接、交叉连接等,这些连接大多是以不同的表或视图为对象进行的,针对相同的表进行的连接成为自连接。理解自连接有助于我们理解SQL面向集合的语言特性。

可重排列、排列、组合

CREATE TABLE Products
(name VARCHAR(16) PRIMARY KEY,
 price INTEGER NOT NULL);

--可重排列·排列·组合
INSERT INTO Products VALUES('苹果',	50);
INSERT INTO Products VALUES('橘子',	100);
INSERT INTO Products VALUES('香蕉',	80);

--排序
DELETE FROM Products;
INSERT INTO Products VALUES('苹果',	50);
INSERT INTO Products VALUES('橘子',	100);
INSERT INTO Products VALUES('葡萄',	50);
INSERT INTO Products VALUES('西瓜',	80);
INSERT INTO Products VALUES('柠檬',	30);
INSERT INTO Products VALUES('香蕉',	50);

--不聚合,查看集合的包含关系
DELETE FROM Products;
INSERT INTO Products VALUES('橘子',	100);
INSERT INTO Products VALUES('葡萄',	50);
INSERT INTO Products VALUES('西瓜',	80);
INSERT INTO Products VALUES('柠檬',	30);
-- 可重排列(3*3种结果)
SELECT P1.name AS name_1,P2.name AS name_2 FROM Products P1,Products P2;
-- 排列($P_3^2$种结果)
SELECT P1.name AS name_1,P2.name AS name_2 FROM Products P1,Products P2 WHERE P1.name <> P2.name;
-- 组合($C_3^2$种结果)
SELECT P1.name AS name_1,P2.name AS name_2 FROM Products P1,Products P2 WHERE P1.name > P2.name;
-- 组合 非等值自连接
SELECT P1.name AS name_1,P2.name AS name_2,P3.name AS name_3 FROM Products P1,Products P2,Products P3 WHERE P1.name > P2.name AND P2.name > P3.name;

删除重复行

-- 用于删除重复行的SQL语句(1):使用极值函数(仅支持Oracle)
DELETE FROM Products P1 WHERE rowid < (SELECT MAX(P2.rowid) FROM Product P2 WHERE P1.name = P2.name AND P1.price = P2.price)
-- 用于删除重复行的SQL语句(1):使用非等值连接
DELETE FROM Products P1 WHERE EXISTS (SELECT * FROM Products P2 WHERE P1.name = P2.name AND P1.price = P2.price AND P1.rowid < P2.rowid)

查找局部不一致的列

--查找局部不一致的列
CREATE TABLE Addresses
(name VARCHAR(32),
 family_id INTEGER,
 address VARCHAR(32),
 PRIMARY KEY(name, family_id));

INSERT INTO Addresses VALUES('前田义明', '100', '东京都港区虎之门3-2-29');
INSERT INTO Addresses VALUES('前田由美', '100', '东京都港区虎之门3-2-92');
INSERT INTO Addresses VALUES('加藤茶',   '200', '东京都新宿区西新宿2-8-1');
INSERT INTO Addresses VALUES('加藤胜',   '200', '东京都新宿区西新宿2-8-1');
INSERT INTO Addresses VALUES('福尔摩斯',  '300', '贝克街221B');
INSERT INTO Addresses VALUES('华生',  '400', '贝克街221B');
SELECT DISTINCT A1.name,A2.name FROM Addresses A1,Addresses A2 WHERE A1.family_id = A2.family_id AND A1.address <> A2.address;
DELETE FROM Products;
INSERT INTO Products VALUES('苹果',	50);
INSERT INTO Products VALUES('橘子',	100);
INSERT INTO Products VALUES('葡萄',	50);
INSERT INTO Products VALUES('西瓜',	80);
INSERT INTO Products VALUES('柠檬',	30);
INSERT INTO Products VALUES('草莓',	100);
INSERT INTO Products VALUES('香蕉',	100);
-- 用于查找价格相同但商品名称不同的记录的SQL语句
SELECT DISTINCT P1.name,P1.price FROM Products P1,Products P2 WHERE P1.price = P2.price AND P1.name <> P2.name;

排序

-- 窗口函数(OLAP函数/分析函数)依赖特定数据库的实现
SELECT name,price,
	   RANK() OVER (ORDER BY price DESC) AS rank_1, -- 跳过并列
	   DENSE_RANK() OVER (ORDER BY price DESC) AS rank_2 --不跳过并列
FROM Products;

1567513153150

SELECT P1.name,P1.price,
(SELECT COUNT(P2.price) 
 FROM Products P2 WHERE P1.price < P2.price) + 1 AS rank_1 
 FROM Products P1 ORDER BY rank_1;

1567854735137

-- 添加DISTINCT,类似于DENSE_RANK函数
SELECT P1.name,P1.price,
(SELECT COUNT(DISTINCT P2.price) 
 FROM Products P2 WHERE P1.price <  P2.price) + 1 AS rank_1 
 FROM Products P1 ORDER BY rank_1;

1567854852552

-- 排序 使用自连接
SELECT P1.name,MAX(P1.price) AS price,COUNT(P2.price)+1 AS rank_1 FROM Products AS P1 
LEFT JOIN Products AS P2 ON P1.price < P2.price GROUP BY P1.name ORDER BY rank_1;
-- 不聚合,查看结合的包含关系
SELECT P1.name,P2.name FROM Products AS P1 
LEFT JOIN Products AS P2 ON P1.price < P2.price;
-- 如果这里不是用LEFT JOIN而使用INNER JOIN?

小结

  • 自连接经常和非等值连接结合起来使用
  • 自连接和GROUP BY结合使用可以生成递归集合
  • 将自连接看做不同表直接的连接更容易理解
  • 应把表看做行的集合,用面向集合的方法来思考
  • 自连接的性能开销更大,应尽量给用于连接的列建立索引

练习题

-- 练习题 1-2-1 可重组合
-- 可重组合
SELECT P1.name,P2.name FROM Products AS P1,Products AS P2 WHERE P1.name >= P2.name;
-- 练习题 1-2-2 分地区排序
-- 方法一 窗口函数
SELECT  district,name,price,RANK () OVER (PARTITION BY district ORDER BY price DESC)
FROM DistrictProducts;
-- 自连接(关联子查询)
SELECT DP1.district,DP1.name,DP1.price,(SELECT COUNT(DP2.price)+1 FROM DistrictProducts DP2 WHERE DP1.district = DP2.district AND DP1.price < DP2.price) AS rank_1
FROM DistrictProducts DP1;
-- 更新位次
/* 练习题1-2-3:自连接 */
SELECT P1.district, P1.name,
       MAX(P1.price) AS price, 
       COUNT(P2.name) +1 AS rank_1
  FROM DistrictProducts P1 LEFT OUTER JOIN DistrictProducts P2
    ON  P1.district = P2.district
   AND P1.price < P2.price
 GROUP BY P1.district, P1.name;
posted @ 2019-09-07 20:11  Evian_Jeff  阅读(602)  评论(0编辑  收藏  举报