Elasticsearch拼音和ik分词器的结合应用

一、创建索引时,自定义拼音分词和ik分词

PUT /my_index
{
    "index": {
        "analysis": {
            "analyzer": {
                "ik_pinyin_analyzer": {  自定义分词name
                    "type": "custom",
                    "tokenizer": "ik_smart",
                    "filter": ["my_pinyin", "word_delimiter"]
                },
                "pinyin_analyzer": {
                    "type": "custom",
                    "tokenizer": "ik_max_word",
                    "filter": ["my_pinyin", "word_delimiter"]
                }
            },
            "filter": {
                "my_pinyin": {
                    "type" : "pinyin",
                    "keep_separate_first_letter" : false, 启用该选项时,将保留第一个字母分开,例如:刘德华ldh,默认:false,注意:查询结果也许是太模糊,由于长期过频
                    "keep_full_pinyin" : true,  当启用该选项,例如:刘德华> [ liudehua],默认值:true
                    "keep_original" : true, 启用此选项时,也将保留原始输入,默认值:false
                    "limit_first_letter_length" : 16, 设置first_letter结果的最大长度,默认值:16
"lowercase" : true, 小写非中文字母,默认值:true
"remove_duplicated_term" : true 启用此选项后,将删除重复的术语以保存索引,例如:de的de,default:false,注意:位置相关的查询可能会受到影响
}
}
}
}
}

 

二、创建mapping时,设置字段分词(注:相同索引下建不同的type时,相同字段名属性必须设一样)

POST /my_index/user/_mapping
{
    "user": {
        "properties": {
          "id":{
            "type":"integer"
          },
            "userName": {
              "type": "text",
              "store": "no",
              "term_vector": "with_positions_offsets",
              "analyzer": "ik_pinyin_analyzer",   自定义分词器name
              "boost": 10,
              "fielddata" : true,
              "fields": {
                    "raw": {
                        "type": "keyword"    设置keyword时,对该字段不进行分析
                    }
                }
            },
            "reason":{
              "type": "text",
              "store": "no",  字段store为true,这意味着这个field的数据将会被单独存储。这时候,如果你要求返回field1(store:yes),es会分辨出field1已经被存储了,因此不会从_source中加载,而是从field1的存储块中加载。
              "term_vector": "with_positions_offsets",
              "analyzer": "ik_pinyin_analyzer",
              "boost": 10
            }
        }
    }
}

 

 

测试

PUT /my_index/user/1
{
  "id":1,
  "userName":"刘德华",
  "reason":"大帅哥"
}

PUT /my_index/user/2
{
  "id":2,
  "userName":"刘德华",
  "reason":"中华人民"
}

不分词查询

GET /my_index/user/_search
{
  "query": {
    "match": {
      "userName.raw": "刘德华"
    }
  }
}


{
  "took": 0,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": 2,
    "max_score": 0.2876821,
    "hits": [
      {
        "_index": "my_index",
        "_type": "user",
        "_id": "2",
        "_score": 0.2876821,
        "_source": {
          "id": 2,
          "userName": "刘德华",
          "reason": "中华人民"
        }
      },
      {
        "_index": "my_index",
        "_type": "user",
        "_id": "1",
        "_score": 0.2876821,
        "_source": {
          "id": 1,
          "userName": "刘德华",
          "reason": "大帅哥"
        }
      }
    ]
  }
}

 

分词查询

GET /my_index/user/_search
{
  "query": {
    "match": {
      "userName": "刘"
    }
  }
}

{
  "took": 0,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": 2,
    "max_score": 0.31331712,
    "hits": [
      {
        "_index": "my_index",
        "_type": "user",
        "_id": "2",
        "_score": 0.31331712,
        "_source": {
          "id": 2,
          "userName": "刘德华",
          "reason": "中华人民"
        }
      },
      {
        "_index": "my_index",
        "_type": "user",
        "_id": "1",
        "_score": 0.31331712,
        "_source": {
          "id": 1,
          "userName": "刘德华",
          "reason": "大帅哥"
        }
      }
    ]
  }
}

 

拼音分词

GET /my_index/user/_search
{
  "query": {
    "match": {
      "reason": "shuai"
    }
  }
}


{
  "took": 0,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": 1,
    "max_score": 3.4884284,
    "hits": [
      {
        "_index": "my_index",
        "_type": "user",
        "_id": "1",
        "_score": 3.4884284,
        "_source": {
          "id": 1,
          "userName": "刘德华",
          "reason": "大帅哥"
        }
      }
    ]
  }
}

 

分组聚合

GET /my_index/user/_search
{ 
  "size":2,
  "query": {
    "match": {
      "userName": "liu"
    }
  },
  "aggs": {
    "group_by_meetingType": {
      "terms": {
        "field": "userName.raw"
      }
    }
  }
}

{
  "took": 1,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": 2,
    "max_score": 3.133171,
    "hits": [
      {
        "_index": "my_index",
        "_type": "user",
        "_id": "2",
        "_score": 3.133171,
        "_source": {
          "id": 2,
          "userName": "刘德华",
          "reason": "中华人民"
        }
      },
      {
        "_index": "my_index",
        "_type": "user",
        "_id": "1",
        "_score": 3.133171,
        "_source": {
          "id": 1,
          "userName": "刘德华",
          "reason": "大帅哥"
        }
      }
    ]
  },
  "aggregations": {
    "group_by_meetingType": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {
          "key": "刘德华",
          "doc_count": 2
        }
      ]
    }
  }
}

 

 

大神们这些都是个人理解哪里有一样的想法或建议欢迎评论!!!!!!!

posted @ 2018-10-31 16:35  Be_Your_Sun  阅读(2996)  评论(1编辑  收藏  举报