faiss 使用记录
import sys
import faiss
import numpy as np
d = 64
nb = 100
nq = 10
np.random.seed(1234)
xb = np.random.random((nb,d)).astype('float32')
print xb[:2]
xb[:, 0] += np.arange(nb).astype('float32') / 1000
#sys.exit()
print xb[:2]
xq = np.random.random((nq, d)).astype('float32')
xq[:, 0] += np.arange(nq).astype('float32') / 1000
index = faiss.IndexFlatL2(d) # buid the index
print (index.is_trained),"@@"
index.add(xb)
print index.ntotal # 加入了多少行数据
k = 4
D,I = index.search(xb[:5],k)
print "IIIIIIIIIIII"
print I
print "ddddddddd"
print D
print "#########"
index = faiss.IndexFlatIP(d)
index.add(xb)
k = 4
D,I = index.search(xb[:5],k)
print I
print "ddddddddd"
print D
from pathlib import Path
import faiss
import numpy as np
# 加载模型,将数据进行向量化处理
from sentence_transformers import SentenceTransformer
print("开始加载模型")
model = SentenceTransformer('roberta-large-nli-stsb-mean-tokens')
print("模型加载完毕")
sentences = ["abc", "abcd", "你好天气如何", "你好天气"]
if Path("data.npy").exists():
embeddings = np.load("data.npy")
else:
# Get embeddings of sentences
print("文本转向量数据")
embeddings = model.encode(sentences)
print("文本转向量数据完毕,数据量", len(embeddings))
save_file = "data.npy"
np.save(save_file, embeddings)
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
nlist = 50
# index = faiss.IndexIVFFlat(quantizer, dimension, nlist)
print(index.is_trained)
# index.train(embeddings)
index.add(embeddings)
print("建立向量索引完毕,数据量", index.ntotal)
topK = 2
search = model.encode(["今天天气如何"])
print(search)
D, I = index.search(search, topK)
print(np.array(sentences)[I])
不论你在什么时候开始,重要的是开始之后就不要停止。
不论你在什么时候结束,重要的是结束之后就不要悔恨。