分享一段基于物品的协同推荐(ItemCF)代码:根据用户读的书推荐图书

from configparser import ConfigParser
from concurrent.futures import ThreadPoolExecutor
import pymysql
import math
import time


def get_db_conn():
    return pymysql.connect(
        host=host,
        user=user,
        password=password,
        database=database,
        port=port,
        autocommit=True,
        charset="utf8mb4")


def get_train():
    print("获取训练数据")

    con = get_db_conn()
    cursor = con.cursor()
    cursor.execute("SELECT user_id, book_id FROM kid_user_read_book_for_reading_recommend %s" % limit)
    results = cursor.fetchall()

    ret = dict()
    for row in results:
        user_id = row[0]
        book_id = row[1]
        ret.setdefault(user_id, {})
        ret[user_id][book_id] = 1

    con.close()
    return ret


def take_second(elem):
    return elem[1]


def item_similarity():
    print("计算物品-物品的共同矩阵")
    index = 1
    count = len(train)
    for u, items in train.items():
        print("进度:%s/%s" % (index, count))
        index += 1

        for i in items.keys():
            n.setdefault(i, 0)
            n[i] += 1
            c.setdefault(i, {})
            for j in items.keys():
                if i == j:
                    continue
                c[i].setdefault(j, 0)
                c[i][j] += 1

    # 计算相似度矩阵
    con = get_db_conn()
    cursor = con.cursor()
    cursor.execute("DROP TABLE IF EXISTS tmp_reading_recommend_for_you_by_read")
    cursor.execute("CREATE TABLE tmp_reading_recommend_for_you_by_read LIKE kid_reading_recommend_for_you_by_read")
    con.close()

    print("计算相似度矩阵")
    global total_count
    total_count = len(c)
    with ThreadPoolExecutor(300) as executor:
        for book_id, related_items in c.items():
            executor.submit(save, book_id, related_items)

    con = get_db_conn()
    cursor = con.cursor()
    cursor.execute("DROP TABLE kid_reading_recommend_for_you_by_read")
    cursor.execute("RENAME TABLE tmp_reading_recommend_for_you_by_read TO kid_reading_recommend_for_you_by_read")
    con.close()


def save(book_id, related_items):
    sorted_list = []
    for j, cij in related_items.items():
        sorted_list.append((j, cij / math.sqrt(n[book_id] * n[j])))
        sorted_list.sort(key=take_second, reverse=True)
        sorted_list = sorted_list[:save_top_count]

    sql = "INSERT INTO tmp_reading_recommend_for_you_by_read(book_id,similar_book_id,value) VALUES(" + str(
        book_id) + ",%s,%s)"
    con = get_db_conn()
    con.cursor().executemany(sql, sorted_list)
    con.close()

    global finish_count
    finish_count += 1
    print("进度:%s/%s" % (finish_count, total_count))


start_time = time.strftime("%H:%M:%S", time.localtime())

limit = ""
save_top_count = 30

finish_count = 0
total_count = 0

cf = ConfigParser()
cf.read("../py_config.ini")
host = cf.get("mysql", "host")
user = cf.get("mysql", "user")
password = cf.get("mysql", "password")
database = cf.get("mysql", "db")
port = cf.getint("mysql", "port")

conn = get_db_conn()
conn.cursor().callproc("计算用户读过的书(为你推荐)")
conn.close()

# 物品-物品的共同矩阵
c = dict()
# 物品被多少个不同用户购买
n = dict()
train = get_train()
item_similarity()

conn = get_db_conn()
conn.cursor().callproc("calcReadingRecommendForYou", [1])
conn.close()

end_time = time.strftime("%H:%M:%S", time.localtime())
print("运行结束,开始于 %s,结束于 %s" % (start_time, end_time))
posted @ 2020-03-14 11:15  荣神益人  阅读(189)  评论(0编辑  收藏  举报