faiss 使用记录

import sys
import faiss
import numpy as np 
d = 64  
nb = 100
nq = 10
np.random.seed(1234)
xb = np.random.random((nb,d)).astype('float32')
print xb[:2]
xb[:, 0] += np.arange(nb).astype('float32') / 1000
#sys.exit()
print xb[:2]
xq = np.random.random((nq, d)).astype('float32')
xq[:, 0] += np.arange(nq).astype('float32') / 1000
index = faiss.IndexFlatL2(d) # buid the index
print (index.is_trained),"@@"
index.add(xb)
print index.ntotal  # 加入了多少行数据

k = 4
D,I = index.search(xb[:5],k)
print "IIIIIIIIIIII" 
print I
print "ddddddddd"
print D

print "#########"
index = faiss.IndexFlatIP(d)
index.add(xb)
k = 4
D,I = index.search(xb[:5],k)
print I
print "ddddddddd"
print D
from pathlib import Path

import faiss
import numpy as np

# 加载模型,将数据进行向量化处理
from sentence_transformers import SentenceTransformer

print("开始加载模型")
model = SentenceTransformer('roberta-large-nli-stsb-mean-tokens')
print("模型加载完毕")

sentences = ["abc", "abcd", "你好天气如何", "你好天气"]


if Path("data.npy").exists():
    embeddings = np.load("data.npy")
else:
    # Get embeddings of sentences
    print("文本转向量数据")
    embeddings = model.encode(sentences)
    print("文本转向量数据完毕,数据量", len(embeddings))

    save_file = "data.npy"
    np.save(save_file, embeddings)

dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
nlist = 50
# index = faiss.IndexIVFFlat(quantizer, dimension, nlist)
print(index.is_trained)
# index.train(embeddings)
index.add(embeddings)
print("建立向量索引完毕,数据量", index.ntotal)


topK = 2
search = model.encode(["今天天气如何"])
print(search)
D, I = index.search(search, topK)
print(np.array(sentences)[I])

posted @ 2023-07-05 15:28  不能说的秘密  阅读(64)  评论(0编辑  收藏  举报