numpy初用

import numpy as np

for k,v in stat.iteritems():

print k

v.sort()

#v = v[len(v)*3/100:len(v)*97/100]

data = np.array(v)

hist,bins=np.histogram(data,bins=np.linspace(data.min(),data.max(),20))

#hist,bins=np.histogram(data,bins=20)

print data.min(), data.max(), np.average(data)

print hist

print bins

print v[0::len(v)/20]

with open('candidate_words') as f:

for nline, line in enumerate(f):

line = line.strip()

terms = line.split(' ')

query_vocab[''.join(terms)] = nline

vecs = [vec_space[term] for term in terms if term in vec_space]

weights = [term_imp_dict.get(term, 0.0) for term in terms if term in vec_space]

#if len(vecs) == 1:

# terms_vec = vecs[0]

#else:

# terms_vec = [sum(x) for x in izip(*vecs)]

#terms_vec = [elem/len(terms_vec) for elem in terms_vec]

if not vecs:

continue

if max(weights) == 0:

continue

vecs = np.array(vecs)

terms_vec = np.average(vecs, axis=0, weights=weights)

terms_vec_len = np.linalg.norm(terms_vec)

ters_vec = terms_vec / terms_vec_len

aindex.add_item(nline, terms_vec.tolist())

posted on 2019-02-11 21:02 冰山上的博客阅读(240) 评论(0) 编辑收藏举报