Elasticsearch之SearchAPI

Search API

ES 的 Search API 分为两大类,第一类是 URI Search,用 HTTP GET 的方式在 URL 中使用查询参数已达到查询的目的;另一类为 Request Body Search,可以使用 ES 提供的基于 JSON 格式的格式更加完备的查询语言 Query DSL(Domain Specific Language)

语法 范围
/_search 集群上所有的索引
/{索引名}/_search 指定索引名
/a,b/_search 索引a 和 b
/a*/_search 以 a 开头的索引

准备工作

新建索引

PUT student
{
  "mappings": {
    "properties": {
      // 学号
      "id":{"type": "long"},
      // 学生姓名,多字段类型 支持 text和keyword
      "name": {
        "type": "text",
        "fields": {
          "raw": {"type": "keyword"}
        }
      },
      // 年龄
      "age":{"type": "integer"},
      // 性别,0:男;1:女
      "sex":{"type":"integer"},
      // 创建时间== 入学时间
      "create_deate":{
        "type":"date" 
      },
      // 家庭地址
      "address":{
          // 默认类型是object,可以不显式声明
        "type": "object", 
        "properties": {
          // 省份
          "province":{"type":"keyword"},
            // 城市
          "city":{"type":"keyword"},
          // 区
          "district":{"type":"keyword"},
          // 详细地址
          "detail_adress":{"type":"text"}
        }
      },
      
      // 成绩单,嵌套对象
      "transcript":{
        // 设置类型为嵌套
        "type": "nested", 
        "properties": {
          // 科目
          "subject": {"type":"keyword"},
          // 分数
          "score": {"type":"integer"}
        }
      }
    }
  }
}

添加数据

PUT _bulk
{"create":{"_index":"student","_id":1}}
{"id":100000001,"name":"张三","age":22,"sex":0,"create_date":"2012-09-15T00:00:00","address":{"province":"浙江","city":"杭州","district":"滨江","detail_adress":"滨康路106号" },"transcript":[{"subject":"语文","score":100},{"subject":"数学","score":59}]}
{"create":{"_index":"student","_id":2}}
{"id":100000002,"name":"李四","age":23,"sex":1,"create_date":"2012-09-15T00:00:00","address":{"province":"浙江","city":"杭州","district":"富阳","detail_adress":"银湖街道106号" },"transcript":[{"subject":"语文","score":60},{"subject":"数学","score":60}]}
{"create":{"_index":"student","_id":3}}
{"id":100000003,"name":"王五","age":21,"sex":1,"create_date":"2012-09-15T00:00:00","address":{"province":"河南","city":"郑州","district":"中原","detail_adress":"中原街道556号" },"transcript":[{"subject":"语文","score":70},{"subject":"数学","score":55}]}
GET /student/_search?q=name:张三

URI Search 使用的是 GET 方式,其中 q 指定查询语句,语法为 Query String Syntax,是 KV 键值对的形式;上面的请求表示对 name 字段进行查询,查询包含 张三 的所有文档。

URI Search 有很多参数可以指定,除了 q 还有如下参数:

- df:默认字段,不指定时会对所有字段进行查询
- sort:根据字段名排序
- from:返回的索引匹配结果的开始值,默认为 0
- size:搜索结果返回的条数,默认为 10
- timeout:超时的时间设置
- _source:只返回索引中指定的列,多个列中间用逗号分开
- analyzer:当分析查询字符串的时候使用的分词器
- analyze_wildcard:通配符或者前缀查询是否被分析,默认为 false
- explain:在每个返回结果中,将包含评分机制的解释
- lenient:若设置为 true,字段类型转换失败的时候将被忽略,默认为 false
- default_operator:默认多个条件的关系,AND 或者 OR,默认为 OR
- search_type:搜索的类型,可以为 dfs_query_then_fetch 或 query_then_fetch,默认为 query_then_fetch

实例

只返回姓名、性别字段

GET /student/_search?_source=name,sex
{
  "took": 0,
  "timed_out": false,
  "_shards": {
    "total": 1,
    "successful": 1,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 3,
      "relation": "eq"
    },
    "max_score": 1,
    "hits": [
      {
        "_index": "student",
        "_id": "1",
        "_score": 1,
        "_source": {
          "name": "张三",
          "sex": 0
        }
      },
      {
        "_index": "student",
        "_id": "2",
        "_score": 1,
        "_source": {
          "name": "李四",
          "sex": 1
        }
      },
      {
        "_index": "student",
        "_id": "3",
        "_score": 1,
        "_source": {
          "name": "王五",
          "sex": 1
        }
      }
    ]
  }
}

获取性别为女的全部记录

GET /student/_search?_source=name,sex&q=sex:1
{
  "took": 1,
  "timed_out": false,
  "_shards": {
    "total": 1,
    "successful": 1,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 2,
      "relation": "eq"
    },
    "max_score": 1,
    "hits": [
      {
        "_index": "student",
        "_id": "2",
        "_score": 1,
        "_source": {
          "name": "李四",
          "sex": 1
        }
      },
      {
        "_index": "student",
        "_id": "3",
        "_score": 1,
        "_source": {
          "name": "王五",
          "sex": 1
        }
      }
    ]
  }
}

获取性别为女的一条记录

GET /student/_search?_source=name,sex&q=sex:1&size=1
{
  "took": 0,
  "timed_out": false,
  "_shards": {
    "total": 1,
    "successful": 1,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 2,
      "relation": "eq"
    },
    "max_score": 1,
    "hits": [
      {
        "_index": "student",
        "_id": "2",
        "_score": 1,
        "_source": {
          "name": "李四",
          "sex": 1
        }
      }
    ]
  }
}

获取性别为女的第二条记录

GET /student/_search?_source=name,sex&q=sex:1&size=1&from=1
{
  "took": 1,
  "timed_out": false,
  "_shards": {
    "total": 1,
    "successful": 1,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 2,
      "relation": "eq"
    },
    "max_score": 1,
    "hits": [
      {
        "_index": "student",
        "_id": "3",
        "_score": 1,
        "_source": {
          "name": "王五",
          "sex": 1
        }
      }
    ]
  }
}

URI Search 好处就是操作简单,只要写个 URI 就可以了,方便测试,但是 URI Search 只包含一部分查询语法,不能覆盖所有 ES 支持的查询语法

在 ES 中一些高阶用法只能在 Request Body 里做,所以我们尽量使用 Request Body Search,它支持 GET 和 POST 方式对索引进行查询,需要指定操作的索引名称,同样也要通过 _search 来标明这个请求为搜索请求,我们可以在请求体中使用 ES 提供的 DSL,下面这个例子就是简单的 Query DSL:

GET /student/_search
{
	"query": {
		"match_all": {}
	}
}

实例

只返回姓名、性别字段

GET /student/_search
{  
  "_source":["name","sex"]
}

获取性别为女的全部记录

GET /student/_search
{  
  "_source":["name","sex"],
  "query": {
      "term":{"sex":1}
  }
}

获取性别为女的一条记录

GET /student/_search
{  
  "_source":["name","sex"],
  "query": {
      "term":{"sex":1}
  },
  "size":1
}

获取性别为女的第二条记录

GET /student/_search
{  
  "_source":["name","sex"],
  "query": {
      "term":{"sex":1}
  },
  //第几条起 类似 limit 1,1 
  // from size 组合可以当做分页查询使用,from默认从0开始,size默认为 10
  "from":1,
  "size":1
}

按年龄倒序排序

GET /student/_search
{  
  "_source":["name","sex","age"],
  "sort": [
    {
      "age": "desc"
    }
  ]  
}

返回结果:

{
  "took": 0,
  "timed_out": false,
  "_shards": {
    "total": 1,
    "successful": 1,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 3,
      "relation": "eq"
    },
    "max_score": null,
    "hits": [
      {
        "_index": "student",
        "_id": "2",
        "_score": null,
        "_source": {
          "name": "李四",
          "age": 23,
          "sex": 1
        },
        "sort": [
          23
        ]
      },
      {
        "_index": "student",
        "_id": "1",
        "_score": null,
        "_source": {
          "name": "张三",
          "age": 22,
          "sex": 0
        },
        "sort": [
          22
        ]
      },
      {
        "_index": "student",
        "_id": "3",
        "_score": null,
        "_source": {
          "name": "王五",
          "age": 21,
          "sex": 1
        },
        "sort": [
          21
        ]
      }
    ]
  }
}

返回成绩总分

目前我们文档中是没有成绩总分这个字段,需要通过聚合计算的形式返回

GET /student/_search
{
  "size": 0,
  "aggs": {
    "by_name": {
      "terms": {
        "field": "name.raw",
        "size": 10
      },
      "aggs": {
        "total_score": {
          "nested": {
            "path": "transcript"
          },
          "aggs": {
            "sum_score": {
              "sum": {
                "field": "transcript.score"
              }
            }
          }
        }
      }
    }
  }
}

返回结果:

{
  "took": 0,
  "timed_out": false,
  "_shards": {
    "total": 1,
    "successful": 1,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 3,
      "relation": "eq"
    },
    "max_score": null,
    "hits": []
  },
  "aggregations": {
    "by_name": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {
          "key": "张三",
          "doc_count": 1,
          "total_score": {
            "doc_count": 2,
            "sum_score": {
              "value": 159
            }
          }
        },
        {
          "key": "李四",
          "doc_count": 1,
          "total_score": {
            "doc_count": 2,
            "sum_score": {
              "value": 120
            }
          }
        },
        {
          "key": "王五",
          "doc_count": 1,
          "total_score": {
            "doc_count": 2,
            "sum_score": {
              "value": 125
            }
          }
        }
      ]
    }
  }
}

按年龄排序(脚本计算)

王五年龄登记错误,需要+5 ,这个时候需要通过脚本字段的形式排序

GET /student/_search
{
  "_source": [
    "name",
    "sex",
    "age"
  ],
  "sort": [
    {
      "_script": {
         // 排序类型 number 或 string
        "type": "number",
        // 排序方式 desc 或 asc
        "order": "desc",
        // 脚本定义
        "script": {
          // 使用的脚本语言,默认值为painless
          "lang": "painless",
          // 脚本内容
          "source": """
                      if(params['_source']['name'] == '王五'){
                        params['_source']['age']+params.addValue
                      } else {
                        params['_source']['age']
                      }
                    """,
          // 变量参数            
          "params": {
            "addValue": 5
          }
        }
      }
    }
  ]
}

返回结果:

{
  "took": 0,
  "timed_out": false,
  "_shards": {
    "total": 1,
    "successful": 1,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 3,
      "relation": "eq"
    },
    "max_score": null,
    "hits": [
      {
        "_index": "student",
        "_id": "3",
        "_score": null,
        "_source": {
          "name": "王五",
          "age": 21,
          "sex": 1
        },
        "sort": [
          26
        ]
      },
      {
        "_index": "student",
        "_id": "2",
        "_score": null,
        "_source": {
          "name": "李四",
          "age": 23,
          "sex": 1
        },
        "sort": [
          23
        ]
      },
      {
        "_index": "student",
        "_id": "1",
        "_score": null,
        "_source": {
          "name": "张三",
          "age": 22,
          "sex": 0
        },
        "sort": [
          22
        ]
      }
    ]
  }
}
posted @ 2023-09-07 14:32  emptyironbox  阅读(20)  评论(0编辑  收藏  举报