milvus入门使用
插入数据后的效果:
代码如下:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 | import configparser from pymilvus import connections, Collection, DataType, FieldSchema, CollectionSchema import numpy as np def create_collection(): # Define the schema fields = [ FieldSchema(name = "sentence_id" , dtype = DataType.INT64, is_primary = True , auto_id = True ), FieldSchema(name = "sentence" , dtype = DataType.VARCHAR, max_length = 512 ), FieldSchema(name = "embedding" , dtype = DataType.FLOAT_VECTOR, dim = 128 ) ] schema = CollectionSchema(fields, description = "Sentence collection" ) # Create the collection collection = Collection(name = "sentence_collection" , schema = schema) return collection def insert_data(collection): sentences = [ "这是第一句。" , "这是第二句。" , "这是第三句。" ] embeddings = np.random.rand( len (sentences), 128 ).tolist() # Generate 128-dimensional vectors entities = [ sentences, embeddings ] insert_result = collection.insert(entities) print (f "Inserted {len(insert_result.primary_keys)} records into collection." ) def create_index(collection): index_params = { "index_type" : "IVF_FLAT" , "params" : { "nlist" : 128 }, "metric_type" : "L2" } collection.create_index(field_name = "embedding" , index_params = index_params) print ( "Index created." ) def search_data(collection, query_sentence): query_embedding = np.random.rand( 1 , 128 ).tolist() # Generate a vector for the query sentence search_params = { "metric_type" : "L2" , "params" : { "nprobe" : 10 }} results = collection.search( data = query_embedding, anns_field = "embedding" , param = search_params, limit = 3 , expr = None , output_fields = [ "sentence" ] ) for hits in results: for hit in hits: print (f "Match found: {hit.id} with distance: {hit.distance}, sentence: {hit.entity.get('sentence')}" ) if __name__ = = '__main__' : # Connect to Milvus cfp = configparser.RawConfigParser() cfp.read( 'config.ini' ) milvus_uri = cfp.get( 'example' , 'uri' ) token = cfp.get( 'example' , 'token' ) connections.connect( "default" , uri = milvus_uri, token = token) print (f "Connecting to DB: {milvus_uri}" ) # Create collection collection = create_collection() # Insert data insert_data(collection) # Create index create_index(collection) # Load the collection into memory collection.load() # Search data search_data(collection, "这是一个查询句子。" ) |
运行效果:
python hello_zilliz_vectordb.py
Connecting to DB: https://in03-ca69f49bb65709f.api.gcp-us-west1.zillizcloud.com
Inserted 3 records into collection.
Index created.
Match found: 450140263656791260 with distance: 19.557846069335938, sentence: 这是第二句。
Match found: 450140263656791261 with distance: 20.327802658081055, sentence: 这是第三句。
Match found: 450140263656791259 with distance: 20.40052032470703, sentence: 这是第一句。
注意事项:
- 向量转换:上面的代码使用了随机向量来模拟句子向量。在实际应用中,您需要使用 NLP 模型(例如中文 BERT)来将中文句子转换为向量。
- 字符编码:确保在读取和处理中文文本时使用正确的字符编码(通常是 UTF-8)。
标签:
大模型
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 全程不用写代码,我用AI程序员写了一个飞机大战
· MongoDB 8.0这个新功能碉堡了,比商业数据库还牛
· 记一次.NET内存居高不下排查解决与启示
· 白话解读 Dapr 1.15:你的「微服务管家」又秀新绝活了
· DeepSeek 开源周回顾「GitHub 热点速览」
2022-06-13 2022 闲置电脑显卡快速挖矿,纯新手教程(ETH,RVN,ERG)——todo,待实践
2018-06-13 leetcode 119. Pascal's Triangle II
2018-06-13 leetcode 110. Balanced Binary Tree
2018-06-13 leetcode 232. Implement Queue using Stacks
2017-06-13 搜索引擎——用户搜索意图的理解及其难点解析,本质是利用机器学习用户的意图分类
2017-06-13 深入浅出时序数据库之预处理篇——批处理和流处理,用户可定制,但目前流行influxdb没有做
2017-06-13 FreeWheel基于Go的实践经验漫谈——GC是大坑(关键业务场景不用),web框架尚未统一,和c++性能相比难说