从产品表中归纳出同类前缀 再实验

表结构:

create table hy_product(
    id number(9,0) primary key,
    name nvarchar2(20) not null,
    price integer not null);

数据:

复制代码
insert into hy_product(id,name,price) values('1','AA','10');
insert into hy_product(id,name,price) values('2','AA款','10');
insert into hy_product(id,name,price) values('3','AA屏风','10');
insert into hy_product(id,name,price) values('4','PDST','20');
insert into hy_product(id,name,price) values('5','PDST款','20');
insert into hy_product(id,name,price) values('6','PDST-TJ','20');
insert into hy_product(id,name,price) values('7','ASDF','20');
insert into hy_product(id,name,price) values('8','ASDF款','20');
insert into hy_product(id,name,price) values('9','ASDF-TJ','20');
insert into hy_product(id,name,price) values('10','PDF','20');
insert into hy_product(id,name,price) values('11','PDF款','20');
insert into hy_product(id,name,price) values('12','PDF-TJ','20');
复制代码

需求:上表的name字段里,有AA款,AA,AA屏风等产品,实际上它们都是同一产品的不同表述。现在需要把前缀相同的归到最一类,举例来说AA款,AA,AA屏风等应该被归纳到AA类里。

期待值:按照需求,归类完的产品应该是AA,3;PDST,3;ASDF,3;PDF,3;四种。

 

昨天的文章 https://www.cnblogs.com/xiandedanteng/p/12709690.html 是边讲解边运行,SQL显得有点多,今天只把必要的列出。

创建辅助序列表:

create table tb_seq(
    id number(9,0) primary key);


insert into tb_seq
select rownum from dual
connect by level<16
order by dbms_random.random;

整理数据放到临时表里:

复制代码
create table tb_tmp1 as select d.sery,d.cnt from
(select c.sery,count(*) as cnt from 
(select b.sery from
(select (case when a.namelen<a.cutlen then 'extra' else to_char(substr(a.name,1,a.cutlen)) end) as sery from
(select p.name,length(p.name) as namelen,seq.id as cutlen from hy_product p,(select id from tb_seq where id<=(select max(len) as maxlen from (select name,length(name) as len from hy_product)) order by id) seq) a) b
where b.sery<>'extra') c
group by c.sery) d
where length(d.sery)>1 and d.cnt>1
order by d.cnt desc,d.sery
复制代码

最后从临时表里查询的SQL:

select *
from tb_tmp1
where LENGTH(REGEXP_REPLACE(REPLACE((select listagg(sery,',') within group(order by sery) from tb_tmp1 ), sery, '@'),  '[^@]+', ''))=1

 

查询结果:

可见结果与预期是完全相符的,昨晚今晨披星戴月写的SQL程序通过了考验。

 

以上使用的全部SQL:

复制代码
create table hy_product(
    id number(9,0) primary key,
    name nvarchar2(20) not null,
    price integer not null);
    
insert into hy_product(id,name,price) values('1','AA','10');
insert into hy_product(id,name,price) values('2','AA款','10');
insert into hy_product(id,name,price) values('3','AA屏风','10');
insert into hy_product(id,name,price) values('4','PDST','20');
insert into hy_product(id,name,price) values('5','PDST款','20');
insert into hy_product(id,name,price) values('6','PDST-TJ','20');
insert into hy_product(id,name,price) values('7','ASDF','20');
insert into hy_product(id,name,price) values('8','ASDF款','20');
insert into hy_product(id,name,price) values('9','ASDF-TJ','20');
insert into hy_product(id,name,price) values('10','PDF','20');
insert into hy_product(id,name,price) values('11','PDF款','20');
insert into hy_product(id,name,price) values('12','PDF-TJ','20');

commit;


create table tb_seq(
    id number(9,0) primary key);


insert into tb_seq
select rownum from dual
connect by level<16
order by dbms_random.random;

create table tb_tmp1 as select d.sery,d.cnt from
(select c.sery,count(*) as cnt from 
(select b.sery from
(select (case when a.namelen<a.cutlen then 'extra' else to_char(substr(a.name,1,a.cutlen)) end) as sery from
(select p.name,length(p.name) as namelen,seq.id as cutlen from hy_product p,(select id from tb_seq where id<=(select max(len) as maxlen from (select name,length(name) as len from hy_product)) order by id) seq) a) b
where b.sery<>'extra') c
group by c.sery) d
where length(d.sery)>1 and d.cnt>1
order by d.cnt desc,d.sery

select *
from tb_tmp1
where LENGTH(REGEXP_REPLACE(REPLACE((select listagg(sery,',') within group(order by sery) from tb_tmp1 ), sery, '@'),  '[^@]+', ''))=1


drop table hy_product
drop table tb_seq
drop table tb_tmp1
复制代码

--2020-04-16--

posted @   逆火狂飙  阅读(171)  评论(1编辑  收藏  举报
编辑推荐:
· Linux系列:如何用heaptrack跟踪.NET程序的非托管内存泄露
· 开发者必知的日志记录最佳实践
· SQL Server 2025 AI相关能力初探
· Linux系列:如何用 C#调用 C方法造成内存泄露
· AI与.NET技术实操系列(二):开始使用ML.NET
阅读排行:
· 无需6万激活码!GitHub神秘组织3小时极速复刻Manus,手把手教你使用OpenManus搭建本
· C#/.NET/.NET Core优秀项目和框架2025年2月简报
· Manus爆火,是硬核还是营销?
· 终于写完轮子一部分:tcp代理 了,记录一下
· 【杭电多校比赛记录】2025“钉耙编程”中国大学生算法设计春季联赛(1)
历史上的今天:
2018-04-16 【Canvas与徽章】金边红底赢字徽章
生当作人杰 死亦为鬼雄 至今思项羽 不肯过江东
点击右上角即可分享
微信分享提示