sql实现协同过滤2

 

 

 

 

准备数据

create table table_origin_data(
  users varchar(100),
  item varchar(100),
  score number(20,4)
);
insert into table_origin_data(users,item,score) VALUES ('张三','M1',5)
insert into table_origin_data(users,item,score) VALUES ('李四','M2',2)
insert into table_origin_data(users,item,score) VALUES ('王二','M1',4)
insert into table_origin_data(users,item,score) VALUES ('张三','M2',3)
insert into table_origin_data(users,item,score) VALUES ('张三','M3',1)

表table_origin_data

step1-1计算分子(乘积)

 在计算的这个数据集的基础之上按item1,item2进行分组,sum(score_prod),把计算的结果存入到table_cov表中

CREATE TABLE table_cov AS
SELECT item1, item2,sum(score_prod) AS prod
FROM
(SELECT t1.item AS item1, t2.item AS item2, t1.score*t2.score as score_prod
FROM table_origin_data t1
INNER JOIN
table_origin_data t2
ON t1.users=t2.users)
GROUP BY item1, item2;

 

step1_2:计算余弦相似度

CREATE TABLE table_corr AS
SELECT t1.item1, t1.item2, t1.prod/SQRT(t2.prod*t3.prod) AS corr 
FROM
table_cov t1
INNER JOIN table_cov t2
ON t1.item1=t2.item1 AND t1.item1=t2.item2
INNER JOIN table_cov t3
ON t1.item2=t3.item1 AND t1.item2=t3.item2;

 

table_cov表inner join关联自己两次,第一次关联条件分别用t1表的item1列=自己t2表的item1  and  item1列=自己t2表的item2

第二次关联条件t1表的item2列=自己t2表的item1  and  item1列=自己t2表的item2

select查询的列就是t1表的item1,item2,和t1表的prod/根号下t2的prod乘以t3的prod

 

第三步:协同过滤

CREATE TABLE table_unkown AS
SELECT t1.users, t2.item
FROM
(SELECT DISTINCT users, 1 AS joiner
FROM table_origin_data) t1
INNER JOIN
(SELECT DISTINCT item, 1 AS joiner
FROM table_origin_data) t2
ON t1.joiner=t2.joiner
LEFT JOIN
table_origin_data t3
ON t1.users=t3.users AND t2.item=t3.item
;

 

把计算出的结果存入table_unkown表中,算最终的结果

 

SELECT users, item, SUM(score) AS score
FROM (
    SELECT t1.users, t1.item, t2.score*t3.corr AS score
    FROM table_unkown t1
    INNER JOIN table_origin_data t2
    ON t1.users=t2.users
    INNER JOIN table_corr t3
    ON t1.item=t3.item1 AND t2.item=t3.item2
)
GROUP BY users, item;

 

 

 

https://blog.csdn.net/guofei9987/article/details/111993311  原博文

 

 

 

posted @ 2023-06-29 18:25  瘦多一点  阅读(61)  评论(0编辑  收藏  举报