数据挖据之余弦相识度

# coding:utf-8
__author__ = 'hdfs'
from math import sqrt

users = {
    "Angelica": {"Blues Traveler": 3.5, "Broken Bells": 2.0, "Norah Jones": 4.5, "Phoenix": 5.0,
                 "Slightly Stoopid": 1.5,
                 "The Strokes": 2.5, "Vampire Weekend": 2.0},
    "Hailey": {"Broken Bells": 4.0, "Deadmau5": 1.0, "Norah Jones": 4.0, "The Strokes": 4.0, "Vampire Weekend": 1.0},
    "Jordyn": {"Broken Bells": 4.5, "Deadmau5": 4.0, "Norah Jones": 5.0, "Phoenix": 5.0, "Slightly Stoopid": 4.5,
               "The Strokes": 4.0, "Vampire Weekend": 4.0},
    "Sam": {"Blues Traveler": 5.0, "Broken Bells": 2.0, "Norah Jones": 3.0, "Phoenix": 5.0, "Slightly Stoopid": 4.0,
            "The Strokes": 5.0},
    "Veronica": {"Blues Traveler": 3.0, "Norah Jones": 5.0, "Phoenix": 4.0, "Slightly Stoopid": 2.5, "The Strokes": 3.0}
}


def coscoefficient(rating1, rating2):
    '''
    余弦相识度
    :param rating1: 
    :param rating2: 
    :return:
    '''
    xleng, yleng = 0, 0
    for ratval01key, ratval01 in rating1.items():
        xleng = xleng + ratval01 * ratval01

    for ratval02key, ratval02 in rating2.items():
        yleng = yleng + ratval02 * ratval02

    neiji = 0
    for key1 in rating1.keys():
        if key1 in rating2.keys():
            neiji = neiji + rating1[key1] * rating2[key1]

    result = neiji / (sqrt(xleng) * sqrt(yleng))
    return result


if __name__ == "__main__":
    print(coscoefficient(users['Angelica'], users['Veronica']))

  

posted @ 2016-07-03 08:33  similarface  阅读(223)  评论(0编辑  收藏  举报