基于用户的协同过滤 Base User CF (使用jaccard_score 距离评价标准)

import  pandas as pd
import numpy as np

users = ['user1','user2','user3','user4','user5',]
items = ['item A','item B','item C','item D','item E',]


datasets = [
[1,0,1,1,0],
[1,0,0,1,1],
[1,0,1,0,0],
[0,1,0,1,1],
[1,1,1,0,1]
]


df = pd.DataFrame(datasets,columns= items,index= users)

print(df)

from sklearn.metrics import jaccard_similarity_score,jaccard_score

# sim = jaccard_similarity_score(df['item A'],df['item B'])
sim = jaccard_score(df['item A'],df['item B'])
print(sim)


from sklearn.metrics.pairwise import pairwise_distances

user_similar = 1 - pairwise_distances(df.values,metric='jaccard')
# print(user_similar)

user_similar = pd.DataFrame(user_similar,columns=users,index=users)
print(user_similar)


items_similar = 1 - pairwise_distances(df.T.values,metric='jaccard')

items_similar = pd.DataFrame(items_similar,columns=items,index=items)
print(items_similar)


topN_users = {}
# print(user_similar.index)
for i in user_similar.index:
_df = user_similar.loc[i].drop([i])
print(_df)
_df_sorted = _df.sort_values(ascending=False)
top2 = list(_df_sorted.index[:2])
topN_users[i] = top2

print('Top 2 相似用户:')
print(topN_users)


rs_results = {}

# for user, sim_users in topN_users.items():
# rs_result = set()
# for sim_user in sim_users:
# #构建初始推荐结果
# print('sim_users ',sim_users)
# print('sim_users items :', df.ix[sim_user])
# print('df loc :',df.ix[sim_user].replace(0,np.nan).dropna().index)
# rs_result = rs_result.union(set(df.ix[sim_user].replace(0,np.nan).dropna().index))
# #过滤掉已经购买过的物品
# rs_result -= set(df.ix[user].replace(0,np.nan).dropna().index)
# rs_results[user] = rs_result
# print('最终推荐结果:')
# print(rs_results)

for user, sim_users in topN_users.items():
rs_result = set()
for sim_user in sim_users:
#构建初始推荐结果
print('sim_users ',sim_users)
print('sim_users items :', df.loc[sim_user])
print('df loc :',df.loc[sim_user].replace(0,np.nan).dropna().index)
rs_result = rs_result.union(set(df.loc[sim_user].replace(0,np.nan).dropna().index))
#过滤掉已经购买过的物品
rs_result -= set(df.loc[user].replace(0,np.nan).dropna().index)
rs_results[user] = rs_result
print('最终推荐结果:')
print(rs_results)


最终推荐结果:
{'user1': {'item E'}, 'user2': {'item B', 'item C'}, 'user3': {'item D', 'item B', 'item E'}, 'user4': {'item A', 'item C'}, 'user5': {'item D'}}



posted @ 2020-09-02 10:21  kpwong  阅读(596)  评论(0编辑  收藏  举报