从产品表中归纳出同类前缀 再实验
表结构:
create table hy_product( id number(9,0) primary key, name nvarchar2(20) not null, price integer not null);
数据:
insert into hy_product(id,name,price) values('1','AA','10'); insert into hy_product(id,name,price) values('2','AA款','10'); insert into hy_product(id,name,price) values('3','AA屏风','10'); insert into hy_product(id,name,price) values('4','PDST','20'); insert into hy_product(id,name,price) values('5','PDST款','20'); insert into hy_product(id,name,price) values('6','PDST-TJ','20'); insert into hy_product(id,name,price) values('7','ASDF','20'); insert into hy_product(id,name,price) values('8','ASDF款','20'); insert into hy_product(id,name,price) values('9','ASDF-TJ','20'); insert into hy_product(id,name,price) values('10','PDF','20'); insert into hy_product(id,name,price) values('11','PDF款','20'); insert into hy_product(id,name,price) values('12','PDF-TJ','20');
需求:上表的name字段里,有AA款,AA,AA屏风等产品,实际上它们都是同一产品的不同表述。现在需要把前缀相同的归到最一类,举例来说AA款,AA,AA屏风等应该被归纳到AA类里。
期待值:按照需求,归类完的产品应该是AA,3;PDST,3;ASDF,3;PDF,3;四种。
昨天的文章 https://www.cnblogs.com/xiandedanteng/p/12709690.html 是边讲解边运行,SQL显得有点多,今天只把必要的列出。
创建辅助序列表:
create table tb_seq( id number(9,0) primary key); insert into tb_seq select rownum from dual connect by level<16 order by dbms_random.random;
整理数据放到临时表里:
create table tb_tmp1 as select d.sery,d.cnt from (select c.sery,count(*) as cnt from (select b.sery from (select (case when a.namelen<a.cutlen then 'extra' else to_char(substr(a.name,1,a.cutlen)) end) as sery from (select p.name,length(p.name) as namelen,seq.id as cutlen from hy_product p,(select id from tb_seq where id<=(select max(len) as maxlen from (select name,length(name) as len from hy_product)) order by id) seq) a) b where b.sery<>'extra') c group by c.sery) d where length(d.sery)>1 and d.cnt>1 order by d.cnt desc,d.sery
最后从临时表里查询的SQL:
select * from tb_tmp1 where LENGTH(REGEXP_REPLACE(REPLACE((select listagg(sery,',') within group(order by sery) from tb_tmp1 ), sery, '@'), '[^@]+', ''))=1
查询结果:
可见结果与预期是完全相符的,昨晚今晨披星戴月写的SQL程序通过了考验。
以上使用的全部SQL:
create table hy_product( id number(9,0) primary key, name nvarchar2(20) not null, price integer not null); insert into hy_product(id,name,price) values('1','AA','10'); insert into hy_product(id,name,price) values('2','AA款','10'); insert into hy_product(id,name,price) values('3','AA屏风','10'); insert into hy_product(id,name,price) values('4','PDST','20'); insert into hy_product(id,name,price) values('5','PDST款','20'); insert into hy_product(id,name,price) values('6','PDST-TJ','20'); insert into hy_product(id,name,price) values('7','ASDF','20'); insert into hy_product(id,name,price) values('8','ASDF款','20'); insert into hy_product(id,name,price) values('9','ASDF-TJ','20'); insert into hy_product(id,name,price) values('10','PDF','20'); insert into hy_product(id,name,price) values('11','PDF款','20'); insert into hy_product(id,name,price) values('12','PDF-TJ','20'); commit; create table tb_seq( id number(9,0) primary key); insert into tb_seq select rownum from dual connect by level<16 order by dbms_random.random; create table tb_tmp1 as select d.sery,d.cnt from (select c.sery,count(*) as cnt from (select b.sery from (select (case when a.namelen<a.cutlen then 'extra' else to_char(substr(a.name,1,a.cutlen)) end) as sery from (select p.name,length(p.name) as namelen,seq.id as cutlen from hy_product p,(select id from tb_seq where id<=(select max(len) as maxlen from (select name,length(name) as len from hy_product)) order by id) seq) a) b where b.sery<>'extra') c group by c.sery) d where length(d.sery)>1 and d.cnt>1 order by d.cnt desc,d.sery select * from tb_tmp1 where LENGTH(REGEXP_REPLACE(REPLACE((select listagg(sery,',') within group(order by sery) from tb_tmp1 ), sery, '@'), '[^@]+', ''))=1 drop table hy_product drop table tb_seq drop table tb_tmp1
--2020-04-16--
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· Linux系列:如何用heaptrack跟踪.NET程序的非托管内存泄露
· 开发者必知的日志记录最佳实践
· SQL Server 2025 AI相关能力初探
· Linux系列:如何用 C#调用 C方法造成内存泄露
· AI与.NET技术实操系列(二):开始使用ML.NET
· 无需6万激活码!GitHub神秘组织3小时极速复刻Manus,手把手教你使用OpenManus搭建本
· C#/.NET/.NET Core优秀项目和框架2025年2月简报
· Manus爆火,是硬核还是营销?
· 终于写完轮子一部分:tcp代理 了,记录一下
· 【杭电多校比赛记录】2025“钉耙编程”中国大学生算法设计春季联赛(1)
2018-04-16 【Canvas与徽章】金边红底赢字徽章