推荐算法之协同过滤

代码下载

# coding:UTF-8
'''
Date:20160928
@author: zhaozhiyong
'''

import numpy as np
import pandas

def load_data(file_path):
    '''导入用户商品数据
    input:  file_path(string):用户商品数据存放的文件
    output: data(mat):用户商品矩阵
    '''
    f = open(file_path)   
    data = []
    for line in f.readlines():
        lines = line.strip().split("\t")
        tmp = []
        for x in lines:
            if x != "-":
                tmp.append(float(x))  # 直接存储用户对商品的打分
            else:
                tmp.append(0)
        data.append(tmp)
    f.close()

    return np.mat(data)

def cos_sim(x, y):
    '''余弦相似性
    input:  x(mat):以行向量的形式存储，可以是用户或者商品
            y(mat):以行向量的形式存储，可以是用户或者商品
    output: x和y之间的余弦相似度
    '''
    numerator = x * y.T  # x和y之间的额内积
    denominator = np.sqrt(x * x.T) * np.sqrt(y * y.T) 
    return (numerator / denominator)[0, 0]


def similarity(data):
    '''计算矩阵中任意两行之间的相似度
    input:  data(mat):任意矩阵
    output: w(mat):任意两行之间的相似度
    '''
    m = np.shape(data)[0]  # 用户的数量
    # 初始化相似度矩阵
    w = np.mat(np.zeros((m, m)))

    for i in range(m):
        for j in range(i, m):
            if j != i:
                # 计算任意两行之间的相似度
                w[i, j] = cos_sim(data[i, ], data[j, ])
                w[j, i] = w[i, j]
            else:
                w[i, j] = 0
    return w

def user_based_recommend(data, w, user):
    '''基于用户相似性为用户user推荐商品
    input:  data(mat):用户商品矩阵
            w(mat):用户之间的相似度
            user(int):用户的编号
    output: predict(list):推荐列表
    '''
    m, n = np.shape(data)
    interaction = data[user, ]  # 用户user与商品信息

    # 1、找到用户user没有互动过的商品
    not_inter = []
    for i in range(n):
        if interaction[0, i] == 0:  # 没有互动的商品
            not_inter.append(i)

    # 2、对没有互动过的商品进行预测

    print('not_inter=',not_inter)
    predict={}
    dd=np.array(data)
    ww=np.array(w) 
    if len(not_inter)>0:

        for i in not_inter:

            predict[i]=ww[:,user]@dd[:,i].T
            print(predict)
    return predict



def top_k(predict, k):
    '''为用户推荐前k个商品
    input:  predict(list):排好序的商品列表
            k(int):推荐的商品个数
    output: top_recom(list):top_k个商品
    '''
    pp=pandas.Series(predict)
    pp1=pp.sort_values(ascending=False)
    #top_recom = []
    len_result = len(predict)

    if k>=len_result:

        return pp1.iloc[:k]
    else:
        return pp1

if __name__ == "__main__":
    # 1、导入用户商品数据
    print ("------------ 1. load data ------------")
    data = load_data("data.txt")
    # 2、计算用户之间的相似性
    print ("------------ 2. calculate similarity between users -------------"  )  
    w = similarity(data)
    # 3、利用用户之间的相似性进行推荐
    print ("------------ 3. predict ------------" )   
    predict = user_based_recommend(data, w, 0)
    # 4、进行Top-K推荐
    print ("------------ 4. top_k recommendation ------------")
    top_recom = top_k(predict, 1)
    print ('top_recom=',top_recom)

------------ 1. load data ------------
------------ 2. calculate similarity between users -------------
------------ 3. predict ------------
not_inter= [2, 4]
{2: 5.1030390226883604}
{2: 5.1030390226883604, 4: 2.2249110640673515}
------------ 4. top_k recommendation ------------
top_recom= 2    5.103039
4    2.224911
dtype: float64

posted @ 2022-08-19 22:58 luoganttcc 阅读(11) 评论(0) 编辑收藏举报

刷新页面返回顶部

登录后才能查看或发表评论，立即登录或者逛逛博客园首页

相关博文：

· 推荐算法之协同过滤

· 一文深入理解协同过滤

· 2025.2.4（周二）

· 基于协同过滤算法的智能推荐系统基础介绍

· 协同过滤推荐算法

阅读排行：
· TypeScript + Deepseek 打造卜卦网站：技术与玄学的结合
· Manus的开源复刻OpenManus初探
· AI 智能体引爆开源社区「GitHub 热点速览」
· C#/.NET/.NET Core技术前沿周刊 | 第 29 期（2025年3.1-3.9）
· 从HTTP原因短语缺失研究HTTP/2和HTTP/3的设计差异

公告

昵称： luoganttcc
园龄： 8年1个月
粉丝： 8
关注： 1

+加关注

2025年3月

日

一

二

三

四

五

六

luoganttcc

推荐算法之协同过滤

公告

搜索

常用链接

随笔分类

随笔档案

阅读排行榜

评论排行榜

推荐排行榜

最新评论

luoganttcc

推荐算法 之协同过滤

公告

搜索

常用链接

随笔分类

随笔档案

阅读排行榜

评论排行榜

推荐排行榜

最新评论

推荐算法之协同过滤