from configparser import ConfigParser
from concurrent.futures import ThreadPoolExecutor
import pymysql
import math
import time
def get_db_conn():
return pymysql.connect(
host=host,
user=user,
password=password,
database=database,
port=port,
autocommit=True,
charset="utf8mb4")
def get_train():
print("获取训练数据")
con = get_db_conn()
cursor = con.cursor()
cursor.execute("SELECT user_id, book_id FROM kid_user_read_book_for_reading_recommend %s" % limit)
results = cursor.fetchall()
ret = dict()
for row in results:
user_id = row[0]
book_id = row[1]
ret.setdefault(user_id, {})
ret[user_id][book_id] = 1
con.close()
return ret
def take_second(elem):
return elem[1]
def item_similarity():
print("计算物品-物品的共同矩阵")
index = 1
count = len(train)
for u, items in train.items():
print("进度:%s/%s" % (index, count))
index += 1
for i in items.keys():
n.setdefault(i, 0)
n[i] += 1
c.setdefault(i, {})
for j in items.keys():
if i == j:
continue
c[i].setdefault(j, 0)
c[i][j] += 1
# 计算相似度矩阵
con = get_db_conn()
cursor = con.cursor()
cursor.execute("DROP TABLE IF EXISTS tmp_reading_recommend_for_you_by_read")
cursor.execute("CREATE TABLE tmp_reading_recommend_for_you_by_read LIKE kid_reading_recommend_for_you_by_read")
con.close()
print("计算相似度矩阵")
global total_count
total_count = len(c)
with ThreadPoolExecutor(300) as executor:
for book_id, related_items in c.items():
executor.submit(save, book_id, related_items)
con = get_db_conn()
cursor = con.cursor()
cursor.execute("DROP TABLE kid_reading_recommend_for_you_by_read")
cursor.execute("RENAME TABLE tmp_reading_recommend_for_you_by_read TO kid_reading_recommend_for_you_by_read")
con.close()
def save(book_id, related_items):
sorted_list = []
for j, cij in related_items.items():
sorted_list.append((j, cij / math.sqrt(n[book_id] * n[j])))
sorted_list.sort(key=take_second, reverse=True)
sorted_list = sorted_list[:save_top_count]
sql = "INSERT INTO tmp_reading_recommend_for_you_by_read(book_id,similar_book_id,value) VALUES(" + str(
book_id) + ",%s,%s)"
con = get_db_conn()
con.cursor().executemany(sql, sorted_list)
con.close()
global finish_count
finish_count += 1
print("进度:%s/%s" % (finish_count, total_count))
start_time = time.strftime("%H:%M:%S", time.localtime())
limit = ""
save_top_count = 30
finish_count = 0
total_count = 0
cf = ConfigParser()
cf.read("../py_config.ini")
host = cf.get("mysql", "host")
user = cf.get("mysql", "user")
password = cf.get("mysql", "password")
database = cf.get("mysql", "db")
port = cf.getint("mysql", "port")
conn = get_db_conn()
conn.cursor().callproc("计算用户读过的书(为你推荐)")
conn.close()
# 物品-物品的共同矩阵
c = dict()
# 物品被多少个不同用户购买
n = dict()
train = get_train()
item_similarity()
conn = get_db_conn()
conn.cursor().callproc("calcReadingRecommendForYou", [1])
conn.close()
end_time = time.strftime("%H:%M:%S", time.localtime())
print("运行结束,开始于 %s,结束于 %s" % (start_time, end_time))