from numpy import * from numpy import linalg as la def loadExData1(): return [[2,0,0,4,4,0,0,0,0,0,0], [0,0,0,0,0,0,0,0,0,0,5], [0,0,0,0,0,0,0,1,0,4,0], [3,3,4,0,3,0,0,2,2,0,0], [5,5,5,0,0,0,0,0,0,0,0], [0,0,0,0,0,0,5,0,0,5,0], [4,0,4,0,0,0,0,0,0,0,5], [0,0,0,0,0,4,0,0,0,0,4], [0,0,0,0,0,0,5,0,0,5,0], [0,0,0,3,0,0,0,0,4,5,0], [1,1,2,1,1,2,1,0,4,5,0]] #相似度计算 def ecludSim(inA,inB): return 1.0/(1.0 + la.norm(inA - inB)) def pearsSim(inA,inB): if len(inA) < 3 : return 1.0 return 0.5+0.5*corrcoef(inA, inB, rowvar=0)[0][1] def cosSim(inA, inB): """ :param inA: [a] :param inB: [b] :return: 1 or 0 """ num = float(inA.T*inB) demon = la.norm(inA)*la.norm(inB) return 0.5+0.5*(num/demon) # 0.5+0.5*(a*b/abs(a*b)) # 基于物品相似度的推荐引擎 def standEst(dataMat, user, simMeas, item): """ :param dataMat: ex loadExdata1() :param user: ex user=1 :param simMeas: cosSim() :param item: ex # user=1对应的数据[0,0,0,0,0,0,0,0,0,0,5]的列=0的下标为 0 1 2 3 4 5 6 7 8 9 :return: 相似度 """ n = shape(dataMat)[1] simTotal = 0.0; ratSimTotal = 0.0 for j in range(n): # shape(dataMat):[x, n] x:数据集长度 n:维度 userRating = dataMat[user, j] # 取出user这条数据 a = np.array[[1,2,3],[0,2,1]] if userRating == 0: # for i in [0,1,2]: print(a[1, i]) # 0 2 1 continue # logical_and---- numpy逻辑与的判断 # logical_or---- numpy逻辑或的判断 # logical_not---- numpy逻辑非的判断 overLap = nonzero(logical_and(dataMat[:, item].A > 0, \ dataMat[:, j].A > 0))[0] if len(overLap) == 0: similarity = 0 else: similarity = simMeas(dataMat[overLap,item],\ dataMat[overLap,j]) print('the %d and %d similarity is: %f'%(item, j, similarity)) simTotal += similarity ratSimTotal += similarity * userRating if simTotal == 0: return 0 return ratSimTotal/simTotal # 将一个11纬的矩阵转换成一个5维的矩阵,基于SVD的评3分估计 def svdEst(dataMat, user, simMeas, item): n = shape(dataMat)[1] #获取物品的数量 simTotal = 0.0; ratSimTotal = 0.0 U,Sigma, VT = la.svd(dataMat) Sig4 = mat(eye(4)*Sigma[:4]) xformedItems = dataMat.T*U[:,:4]*Sig4.I for j in range(n): userRating = dataMat[user,j] if userRating == 0 or j==item: continue similarity = simMeas(xformedItems[item,:].T,\ xformedItems[j,:].T) print('the %d and %d similarity is:%f'%(item, j, similarity)) simTotal += similarity ratSimTotal += similarity * userRating if simTotal == 0:return 0 else: return ratSimTotal/simTotal def recommend(dataMat, user, N=3, simMeas=cosSim, estMethod=standEst): """ :param dataMat: 测试数据集, ex loadExData1() :param user: 用户ID所对应的行号index, ex user=1 :param N: default N=3 N个推荐结果,默认设为3 :param simMeas: 默认相关性函数cosSim :param estMethod: 默认基于物品相似度的推荐函数standEst :return: N个推荐结果 """ unratedItems = nonzero(dataMat[user,:].A==0)[1] #返回user=1行[0,0,0,0,0,0,0,0,0,0,5],中元素为0的列下标 if len(unratedItems) == 0: return 'you rated everything' itemScores = [] for item in unratedItems: # [0 1 2 3 4 5 6 7 8 9] estimatedScore = estMethod(dataMat, user, simMeas, item) itemScores.append((item, estimatedScore)) return sorted(itemScores, key=lambda jj: jj[1], reverse=True)[:N] if __name__ == '__main__': data = mat(loadExData1()) re = recommend(data, 1) print(re)
from django.db import connection
select_sql = 'select * from model' datas = pd.read_sql(select_sql, connection) # <pandas.core.frame.DataFrame'> temp = datas.iloc[:, 2:] # 取出所有数据的 除了前两个字段 tp = temp.sum(axis=0) # 所有字段纵向相加 top_sorts = tp.sort_values(ascending=False) # 降序排序 top3 = top_sorts.index[:4] top_recommends = top3.values.tolist()