聊一聊Elasticsearch和MySQL的常用操作

前言

ElasticSearch,开源的搜索和数据分析引擎,也是名列前茅的 nosql。

很多时候会想拿它和关系型数据库做对比,严格上它们是没有太多可比性的。

不过把有的东西放在一起比较,会帮助我们快速去理解一些 ElasticSearch 的内容。

老黄这边抽空梳理了一些常用的 sql 语句 “对应” ElasticSearch 的操作,主要时针对 CURD 的。

示例用的是 mysql 和 ElasticSearch 7.12.0 。

表操作

为了简单起见,弄一个简单的订单表 order-2021 来演示。

删除表

drop table `order-2021`
DELETE http://localhost:9200/order-2021

创建表

create table `order-2021` ( 
`id` bigint(20) NOT NULL AUTO_INCREMENT, 
`order_id` varchar(32) NOT NULL, 
`cus_name` varchar(20) NOT NULL, 
`item_name` varchar(64) NOT NULL, 
`number` int NOT NULL, 
`create_time` bigint(20) NOT NULL, 
`update_time` bigint(20) NOT NULL,
PRIMARY KEY (`id`),
KEY `idx_order_id` (`order_id`)
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci
PUT http://localhost:9200/order-2021
Content-Type: application/json

{
    "settings":{
        "number_of_shards": 1
    },
    "mappings":{
        "properties":{
            "order_id" :{
                "type":"keyword"
            },
            "cus_name" :{
                "type":"keyword"
            },
            "item_name" :{
                "type":"text",
                "fields": {
                    "keyword": { 
                        "type": "keyword",
                        "ignore_above": 256
                    }
                }
            },
            "number":{
                "type":"integer"
            },
            "create_time" :{
                "type":"long"
            },
            "update_time" :{
                "type":"long"
            }
        }
    }
}

NOTE:

  1. mysql 的索引需要一个一个的加,ES 这边不需要。
  2. mysql 有自增 Id, ES 这边也会有自动生成 Id 的操作,同样的,两边也可以自定义 Id。
  3. keyword 和 text 按需调整。

改表名

rename table `order-2021` to `order`;
POST http://localhost:9200/_aliases
Content-Type: application/json

{
  "actions" : [
    { "add" : { "index" : "order-2021", "alias" : "order" } }
  ]
}

NOTE: ES 的是起一个别名,然后两个名字都可以用。

插入数据

单条插入

insert into `order-2021` 
(order_id, cus_name, item_name, number, create_time, update_time) 
values 
('11', 'catcherwong', 'phone', 1, 1619877257000, 0)
POST http://localhost:9200/order-2021/_doc/
Content-Type: application/json

{ 
    "order_id" : "11", 
    "cus_name":"catcherwong", 
    "item_name":"phone",
    "number": 1,
    "create_time": 1619877257000, 
    "update_time": 0 
}

批量插入

insert into `order-2021` (order_id, cus_name, item_name, number, create_time, update_time) 
values 
('12', 'catcherwong', 'phone', 1, 1619877257000, 0),
('13', 'catcherwong', 'item-1', 2, 1619977257000, 0),
('14', 'catcherwong', 'item-2', 3, 1614877257000, 0);
POST http://localhost:9200/_bulk
Content-Type: application/x-ndjson

{ "index" : { "_index" : "order-2021" } }
{ "order_id" : "12", "cus_name":"catcherwong", "item_name":"phone", "create_time": 1619877257000, "update_time": 0 }
{ "index" : { "_index" : "order-2021" } }
{ "order_id" : "13", "cus_name":"catcherwong", "item_name":"item-1", "create_time": 1619977257000, "update_time": 0 }
{ "index" : { "_index" : "order-2021" } }
{ "order_id" : "14", "cus_name":"catcherwong", "item_name":"item-2", "create_time": 1614877257000, "update_time": 0 }

NOTE:

  1. ES 的批量操作需要注意,每一行最后要有换行符 \n 结尾
  2. Content-Type 指定为 application/x-ndjson

更新数据

根据ID更新

update `order-2021` 
set update_time = '1619877307000', cus_name = 'catcherwong-1' 
where id = '6wvox3kB4OeD0spWtstW'
POST http://localhost:9200/order-2021/_update/6wvox3kB4OeD0spWtstW
Content-Type: application/json

{
    "doc":{
        "update_time" : 1619877307000,
        "cus_name": "catcherwong-1"
    }    
}

根据查询条件更新

update `order-2021` 
set update_time = '1619877307000', cus_name = 'catcherwong-1' 
where order_id = '11'
POST http://localhost:9200/order-2021/_update_by_query
Content-Type: application/json

{
    "script":{
        "source":"ctx._source['cus_name']=params.cus_name;ctx._source['update_time']=params.update_time;",
        "params":{
            "cus_name":"catcherwong-1",
            "update_time": 1619877307000
        }
    },
    "query":{
        "term":{
            "order_id":"11"
        }
    }
}

NOTE: ES 的条件更新分为两部分,一个是 query ,也就是 where 部分, 一个是 script ,也就是 set 部分。

删除数据

根据Id删除

delete from `order-2021` 
where id = 'c8cb33kBoze4GtqD9rTs'
DELETE http://localhost:9200/order-2021/_doc/c8cb33kBoze4GtqD9rTs

根据查询条件删除

delete from `order-2021` 
where order_id = '11'
POST http://localhost:9200/order-2021/_delete_by_query
Content-Type: application/json

{
  "query": {
    "term": {
      "order_id": "11"
    }
  }
}

查询数据

查询全部

select * from `order-2021`
GET http://localhost:9200/order-2021/_search
Content-Type: application/json

{
  "query": {    
    "match_all": {}
  }
}

条件查询

select * from `order-2021` 
where cus_name in ("catcher-61333", "catcher-89631") 
and create_time >= 0 
and create_time <= 1622555657322
GET http://localhost:9200/order/_search
Content-Type: application/json

{
    "query":{
        "bool":{
            "filter":[
                { "terms":{ "cus_name" : [ "catcher-61333", "catcher-89631" ] }},
                { "range":{ "create_time" : { "gte": 0, "lte": 1622555657322 } } }
            ]
        }        
    }
}

NOTE:

  1. ES 的条件查询是有非常多的, 这里只列举了部分。
  2. ES 的查询,在默认情况下会有打分的操作,是会损耗性能的,而常规的 sql 查询时不需要这些,所以用 bool + filter 来忽略。

查询指定字段

select cus_name, order_id 
from `order-2021` 
where cus_name in ("catcher-61333", "catcher-89631") 
and create_time >= 0 
and create_time <= 1622555657322
GET http://localhost:9200/order-2021/_search
Content-Type: application/json

{
    "_source":[ "cus_name", "order_id"],
    "query":{
        "bool":{
            "filter":[
                { "terms":{ "cus_name" :  [ "catcher-61333", "catcher-89631" ] }},
                { "range":{ "create_time" : { "gte": 0, "lte": 1622555657322 } } }
            ]
        }        
    }
}

NOTE: 查询的时候,如果只要几个字段,那么可以通过 _source 来指定。

查询数量

select count(*) 
from `order-2021` 
where cus_name in ("catcher-61333", "catcher-89631") 
and create_time >= 0 
and create_time <= 1622555657322
GET http://localhost:9200/order-2021/_count
Content-Type: application/json

{
    "query":{
        "bool":{
            "filter":[
                { "terms":{ "cus_name" :  [ "catcher-61333", "catcher-89631" ] }},
                { "range":{ "create_time" : { "gte": 0, "lte": 1622555657322 } } }
            ]
        }        
    }
}

NOTE: 根据指定条件获取数量,建议用 _count 来查询,_search 查询的结果里面的条数不一定准确。

浅分页

select cus_name, order_id 
from `order-2021` 
where cus_name in ("catcher-61333", "catcher-89631") 
and create_time >= 0 
and create_time <= 1622555657322
order by create_time desc 
limit 0,10
GET http://localhost:9200/order-2021/_search
Content-Type: application/json

{
    "_source":[ "cus_name", "order_id"],
    "query":{
        "bool":{
            "filter":[
                { "terms":{ "cus_name" :  [ "catcher-61333", "catcher-89631" ] }},
                { "range":{ "create_time" : { "gte": 0, "lte": 1622555657322 } } }
            ]
        }        
    },
    "size":10,
    "from":0,
    "sort":[
        { "create_time":{ "order":"desc"} }
    ]
}

NOTE: 浅分页,页码不宜过深,适合滚动加载的场景,深度分页可以考虑 SearchAfter

Group By

select number, count(*) as number_count 
from `order-2021` 
where create_time >= 0 
and create_time <= 1622555657322
group by number 
order by number asc
GET http://localhost:9200/order-2021/_search
Content-Type: application/json

{
    "size":0,
    "aggs": {
        "number_count": {
            "terms": {
                "field": "number",
                "order" : { "_key" : "asc" }
            }
        }
    },
    "query":{
        "bool":{
            "filter":[
                { "range":{ "create_time" : { "gte": 0, "lte": 1622555657322 } } }
            ]
        }        
    }
}

NOTE: group by 属于聚合操作的一种,要用 aggs ,聚合,不用原数据,所以 size 设为 0。

Avg/Min/Max/Sum

select avg(number) as number_avg, 
min(number) as number_min, 
max(number) as number_max, 
sum(number) as number_sum 
from order 
where create_time >= 0 
and create_time <= 1622555657322 
GET http://localhost:9200/order/_search
Content-Type: application/json

{
    "size":0,
    "query":{
        "bool":{
            "filter":[
                { "range":{ "create_time" : { "gte": 0, "lte": 1622555657322 } } }
            ]
        }        
    },
    "aggs": {
        "number_avg": {
            "avg": {
                "field": "number"
            }
        },
        "number_min": {
            "min": {
                "field": "number"
            }
        },
        "number_max": {
            "max": {
                "field": "number"
            }
        },
        "number_sum": {
            "sum": {
                "field": "number"
            }
        }
    }
}

写在最后

Elasticsearch 这一块常用的 CURD 操作,和常用的关系型数据库对照一遍,其实很容易上手。

要用到一些 Elasticsearch 特有的功能时,去查询官方文档也基本可以搞定。

posted @ 2021-06-09 21:01  Catcher8  阅读(563)  评论(0编辑  收藏  举报