大数据之路【第十五篇】:数据挖掘--推荐算法
import web import sys reload(sys) sys.setdefaultencoding('utf-8') sys.path.append("./jieba/") import jieba import jieba.posseg import jieba.analyse urls = ( '/', 'index', '/test', 'test', ) app = web.application(urls, globals()) rec_map = {} with open('inverted.data', 'r') as fd: for line in fd: ss = line.strip().split('\t') if len(ss) != 2: continue token = ss[0].strip().encode('utf8') music_rec_list_str = ss[1].strip() for music_score in music_rec_list_str.split(''): name, score = music_score.strip().split('') if token not in rec_map: rec_map[token] = [] rec_map[token].append((name, round(float(score), 2))) print len(rec_map) class index: def GET(self): params = web.input() content = params.get('content', '') print 'content: ', content # for k, v in rec_map.items(): # if content == k: # print k # print v # print '====' # if content not in rec_map.keys(): # return 'no found!' # else: # tmp_list = [] # for tup in rec_map[content.encode('utf8')]: # name, score = tup # print name # tmp_list.append(name) # return '\n'.join(tmp_list) seg_list = jieba.cut(content, cut_all=False) result_map = {} for seg in seg_list: print 'seg: ', seg if seg in rec_map.keys(): print '1111111111' for name_score in rec_map[seg.encode('utf8')]: tmp_name, score = name_score name = tmp_name.encode('utf8') if name not in result_map: print '22222222' result_map[name] = score else: print '3333333' old_score = result_map[name] new_score = old_score + score result_map[name] = new_score rec_list = [] for k, v in result_map.items(): rec_list.append('\t'.join([k, str(v)])) return "\r\n".join(rec_list) class test: def GET(self): print web.input() return '222' if __name__ == "__main__": app.run()
搜索MV推荐
搜索周杰伦
########## 今天的苦逼是为了不这样一直苦逼下去!##########