ES使用C# NEST创建索引支持nGram+MatchPhrase查询
ES写法:
POST index_test { "settings": { "index.max_ngram_diff":5, "analysis": { "analyzer": { "ngram_analyzer_short": { "filter": "lowercase", "tokenizer": "ngram_tokenizer_short" }, "ngram_analyzer_long": { "filter": "lowercase", "tokenizer": "ngram_tokenizer_long" } }, "tokenizer": { "ngram_tokenizer_short": { "type": "nGram", "min_gram": "1", "max_gram": "4" }, "ngram_tokenizer_long": { "type": "nGram", "min_gram": "5", "max_gram": "5" } } } }, "mappings": { "properties": { "vcContent": { "type": "keyword", "fields": { "long_text": { "type": "text", "analyzer": "ngram_analyzer_long" }, "short_text": { "type": "text", "analyzer": "ngram_analyzer_short" } } } } } }
C#写法:
1、创建索引
public static async Task CreateIndexAsync<T>(this ElasticClient elasticClient, string indexName = "", int numberOfShards = 5, int numberOfReplicas = 1, int refreshInterval = 5) where T : class { if (string.IsNullOrWhiteSpace(indexName)) throw new ArgumentException("索引名称不可为空"); if (!(await elasticClient.Indices.ExistsAsync(indexName)).Exists) { var dict = new Dictionary<string, object>(); dict.Add("index.number_of_shards", numberOfShards); dict.Add("index.number_of_replicas", numberOfReplicas); dict.Add("index.refresh_interval", refreshInterval + "s"); dict.Add("index.max_result_window", 2000000000); dict.Add("index.max_ngram_diff", 5); var indsettings = new IndexSettings(dict); indsettings.Analysis = new Analysis(); indsettings.Analysis.Analyzers = new Analyzers(); indsettings.Analysis.Tokenizers = new Tokenizers(); //短内容分析设置5个字符以内 var an1 = new CustomAnalyzer(); an1.Tokenizer = "ngram_tokenizer_short"; an1.Filter = new List<string>() { "lowercase" }; indsettings.Analysis.Analyzers.Add("ngram_analyzer_short", an1); indsettings.Analysis.Tokenizers.Add("ngram_tokenizer_short", new Nest.NGramTokenizer { MaxGram = 4, MinGram = 1 }); //长内容分析设置5个字符以上 var an2 = new CustomAnalyzer(); an2.Tokenizer = "ngram_tokenizer_long"; an2.Filter = new List<string>() { "lowercase" }; indsettings.Analysis.Analyzers.Add("ngram_analyzer_long", an2); indsettings.Analysis.Tokenizers.Add("ngram_tokenizer_long", new Nest.NGramTokenizer { MaxGram = 5, MinGram = 5 }); var indexState = new IndexState { Settings = indsettings }; var response = await elasticClient.Indices.CreateAsync(indexName, p => p.InitializeUsing(indexState) .Map<T>(x => x.AutoMap<T>() //属性配置ngram搜索 .Properties<T>(pp => pp.Keyword(t => t.Name("name").Fields(f => f.Text(t1 => t1.Name("long_text").Analyzer("ngram_analyzer_long")) .Text(t2 => t2.Name("short_text").Analyzer("ngram_analyzer_short")) ))))); if (!response.IsValid) { throw new Exception($"创建索引失败:{response.OriginalException.Message}"); } }
2、搜索条件按字符长度指定搜索方式
//查询数据 var mustFilters = new List<Func<QueryContainerDescriptor<TempList>, QueryContainer>>(); if (!string.IsNullOrEmpty(name)) { //条件查询 if (name.Length >= 5) { //长字符 mustFilters.Add(t => t.MatchPhrase(t => t.Field("name.long_text").Query(name))); } else { //短字符 mustFilters.Add(t => t.Term(t => t.Field("name.short_text").Value(name))); } }//列表 var idxName = "index_test"; var result = await _esClientService.Client.SearchAsync<TempList>(q => q.Index(idxName) .Query(rq => rq.Bool(b => b.Must(mustFilters))) .From(0).Size(10000)); return (List<TempList>)result.Documents;
3、实体映射
public class TempList {public int id { get; set; } public string name { get; set; } public string description { get; set; } public int type { get; set; } }
参考:https://www.elastic.co/guide/en/elasticsearch/client/net-api/7.x/multi-fields.html
NEST Nuget库版本:7.x.x