ES使用总结 --ES实践速查手册
实际工作中使用ES有一段时间了,比起一直在理论上接触ES还是要好上一些的。今天就来总结一些实际工作中用到的一些ES功能吧。本文编排顺序,按使用的先后可能性排序编排。ES的功能很强大,但我们能用到的,也许并不会太多,所以本文可作为一个简单速查手册使用哟。
1. 查看集群状态
# 健康检查
GET _cluster/health?pretty # 作用:可以帮助我们排查es八九是否有故障 # 结果样例如下: { "cluster_name" : "testcluster", "status" : "green", "timed_out" : false, "number_of_nodes" : 2, "number_of_data_nodes" : 2, "active_primary_shards" : 5, "active_shards" : 10, "relocating_shards" : 0, "initializing_shards" : 0, "unassigned_shards" : 0 }
2. 查看分片信息
GET _cat/shards?pretty
# 作用:可以帮助我们查看数据分片情况
# 结果样例如下:
test_maping_20200507 1 p STARTED 17 41.1kb 10.19.70.15 master-node-01
test_maping_20200507 1 r UNASSIGNED
c_maping_20200507 3 p STARTED 14 25.5kb 10.19.70.15 master-node-01
3. 查看节点信息
GET _nodes/stats
# 帮助我们排查es集群问题,知道节点分布情况
# 结果样例如下:
{ "_nodes": { "total": 1, "successful": 1, "failed": 0 }, "cluster_name": "n-cluster", "nodes": { "kSi1H-3pQm6nmkmIbVpywg": { "timestamp": 1607316420983, "name": "master-node-01", "transport_address": "10.19.70.15:9300", "host": "10.19.70.15", "ip": "10.19.70.15:9300", "roles": [ "master", "data", "ingest" ], "attributes": { "ml.enabled": "true" }, "indices": { "docs": { "count": 4286963, "deleted": 19416 }, "store": { "size_in_bytes": 2647850406, "throttle_time_in_millis": 0 }, "indexing": { "index_total": 50056272, "index_time_in_millis": 12151146, "index_current": 0, "index_failed": 0, "delete_total": 213120, "delete_time_in_millis": 11643, "delete_current": 0, "noop_update_total": 0, "is_throttled": false, "throttle_time_in_millis": 0 }, "get": { "total": 5608635, "time_in_millis": 239343, "exists_total": 4523772, "exists_time_in_millis": 211376, "missing_total": 1084863, "missing_time_in_millis": 27967, "current": 0 }, "search": { "open_contexts": 0, "query_total": 7644956, "query_time_in_millis": 1051829, "query_current": 0, "fetch_total": 4710420, "fetch_time_in_millis": 432477, "fetch_current": 0, "scroll_total": 23265, "scroll_time_in_millis": 14799501190, "scroll_current": 0, "suggest_total": 0, "suggest_time_in_millis": 0, "suggest_current": 0 }, "merges": { "current": 0, "current_docs": 0, "current_size_in_bytes": 0, "total": 49638, "total_time_in_millis": 32459437, "total_docs": 304866346, "total_size_in_bytes": 180897863590, "total_stopped_time_in_millis": 0, "total_throttled_time_in_millis": 7534, "total_auto_throttle_in_bytes": 3078827106 }, "refresh": { "total": 1635812, "total_time_in_millis": 18344665, "listeners": 0 }, "flush": { "total": 472, "total_time_in_millis": 44558 }, "warmer": { "current": 0, "total": 1338114, "total_time_in_millis": 396987 }, "query_cache": { "memory_size_in_bytes": 464936, "total_count": 365237, "hit_count": 304671, "miss_count": 60566, "cache_size": 72, "cache_count": 2098, "evictions": 2026 }, "fielddata": { "memory_size_in_bytes": 0, "evictions": 0 }, "completion": { "size_in_bytes": 0 }, "segments": { "count": 459, "memory_in_bytes": 12780066, "terms_memory_in_bytes": 6311772, "stored_fields_memory_in_bytes": 571552, "term_vectors_memory_in_bytes": 0, "norms_memory_in_bytes": 219328, "points_memory_in_bytes": 1008902, "doc_values_memory_in_bytes": 4668512, "index_writer_memory_in_bytes": 0, "version_map_memory_in_bytes": 0, "fixed_bit_set_memory_in_bytes": 41928, "max_unsafe_auto_id_timestamp": -1, "file_sizes": {} }, "translog": { "operations": 93313, "size_in_bytes": 287688877 }, "request_cache": { "memory_size_in_bytes": 187385, "evictions": 0, "hit_count": 87503, "miss_count": 1563 }, "recovery": { "current_as_source": 0, "current_as_target": 0, "throttle_time_in_millis": 0 } }, "os": { "timestamp": 1607316420999, "cpu": { "percent": 8, "load_average": { "1m": 0.34, "5m": 0.34, "15m": 0.39 } }, "mem": { "total_in_bytes": 33465651200, "free_in_bytes": 432812032, "used_in_bytes": 33032839168, "free_percent": 1, "used_percent": 99 }, "swap": { "total_in_bytes": 0, "free_in_bytes": 0, "used_in_bytes": 0 }, "cgroup": { "cpuacct": { "control_group": "/", "usage_nanos": 4704757205576508 }, "cpu": { "control_group": "/", "cfs_period_micros": 100000, "cfs_quota_micros": -1, "stat": { "number_of_elapsed_periods": 0, "number_of_times_throttled": 0, "time_throttled_nanos": 0 } } } }, "process": { "timestamp": 1607316420999, "open_file_descriptors": 577, "max_file_descriptors": 655360, "cpu": { "percent": 0, "total_in_millis": 225252750 }, "mem": { "total_virtual_in_bytes": 26505314304 } }, "jvm": { "timestamp": 1607316421001, "uptime_in_millis": 10785587445, "mem": { "heap_used_in_bytes": 1780673064, "heap_used_percent": 10, "heap_committed_in_bytes": 16750411776, "heap_max_in_bytes": 16750411776, "non_heap_used_in_bytes": 167806944, "non_heap_committed_in_bytes": 176631808, "pools": { "young": { "used_in_bytes": 648635928, "max_in_bytes": 3436052480, "peak_used_in_bytes": 3436052480, "peak_max_in_bytes": 3436052480 }, "survivor": { "used_in_bytes": 44905528, "max_in_bytes": 429457408, "peak_used_in_bytes": 195628936, "peak_max_in_bytes": 429457408 }, "old": { "used_in_bytes": 1087131608, "max_in_bytes": 12884901888, "peak_used_in_bytes": 1087131608, "peak_max_in_bytes": 12884901888 } } }, "threads": { "count": 185, "peak_count": 190 }, "gc": { "collectors": { "young": { "collection_count": 12012, "collection_time_in_millis": 1407041 }, "old": { "collection_count": 1, "collection_time_in_millis": 386 } } }, "buffer_pools": { "direct": { "count": 188, "used_in_bytes": 271132937, "total_capacity_in_bytes": 271132936 }, "mapped": { "count": 876, "used_in_bytes": 2642222120, "total_capacity_in_bytes": 2642222120 } }, "classes": { "current_loaded_count": 15268, "total_loaded_count": 15268, "total_unloaded_count": 0 } }, "thread_pool": { "bulk": { "threads": 8, "queue": 0, "active": 0, "rejected": 0, "largest": 8, "completed": 3234430 }, "fetch_shard_started": { "threads": 0, "queue": 0, "active": 0, "rejected": 0, "largest": 0, "completed": 0 }, "fetch_shard_store": { "threads": 0, "queue": 0, "active": 0, "rejected": 0, "largest": 0, "completed": 0 }, "flush": { "threads": 1, "queue": 0, "active": 0, "rejected": 0, "largest": 4, "completed": 895 }, "force_merge": { "threads": 0, "queue": 0, "active": 0, "rejected": 0, "largest": 0, "completed": 0 }, "generic": { "threads": 8, "queue": 0, "active": 0, "rejected": 0, "largest": 8, "completed": 14018425 }, "get": { "threads": 8, "queue": 0, "active": 0, "rejected": 0, "largest": 8, "completed": 5381151 }, "index": { "threads": 8, "queue": 0, "active": 0, "rejected": 0, "largest": 8, "completed": 213116 }, "listener": { "threads": 0, "queue": 0, "active": 0, "rejected": 0, "largest": 0, "completed": 0 }, "management": { "threads": 5, "queue": 0, "active": 1, "rejected": 0, "largest": 5, "completed": 14542562 }, "ml_autodetect": { "threads": 0, "queue": 0, "active": 0, "rejected": 0, "largest": 0, "completed": 0 }, "ml_datafeed": { "threads": 0, "queue": 0, "active": 0, "rejected": 0, "largest": 0, "completed": 0 }, "ml_utility": { "threads": 40, "queue": 0, "active": 0, "rejected": 0, "largest": 40, "completed": 125 }, "refresh": { "threads": 4, "queue": 0, "active": 0, "rejected": 0, "largest": 4, "completed": 569855652 }, "search": { "threads": 13, "queue": 0, "active": 0, "rejected": 0, "largest": 13, "completed": 12725792 }, "snapshot": { "threads": 0, "queue": 0, "active": 0, "rejected": 0, "largest": 0, "completed": 0 }, "vanguard.data": { "threads": 0, "queue": 0, "active": 0, "rejected": 0, "largest": 0, "completed": 0 }, "vanguard.planner": { "threads": 0, "queue": 0, "active": 0, "rejected": 0, "largest": 0, "completed": 0 }, "vanguard.task": { "threads": 0, "queue": 0, "active": 0, "rejected": 0, "largest": 0, "completed": 0 }, "warmer": { "threads": 4, "queue": 0, "active": 0, "rejected": 0, "largest": 4, "completed": 2034280 }, "watcher": { "threads": 40, "queue": 0, "active": 0, "rejected": 0, "largest": 40, "completed": 213116 } }, "fs": { "timestamp": 1607316421001, "total": { "total_in_bytes": 536608768000, "free_in_bytes": 533629661184, "available_in_bytes": 533629661184, "spins": "true" }, "data": [ { "path": "/srv/data01/nodes/0", "mount": "/srv/data01 (/dev/vdb)", "type": "xfs", "total_in_bytes": 536608768000, "free_in_bytes": 533629661184, "available_in_bytes": 533629661184, "spins": "true" } ], "io_stats": { "devices": [ { "device_name": "vdb", "operations": 13972095, "read_operations": 41, "write_operations": 13972054, "read_kilobytes": 75, "write_kilobytes": 284301092 } ], "total": { "operations": 13972095, "read_operations": 41, "write_operations": 13972054, "read_kilobytes": 75, "write_kilobytes": 284301092 } } }, "transport": { "server_open": 39, "rx_count": 16621987, "rx_size_in_bytes": 973956417, "tx_count": 16621987, "tx_size_in_bytes": 5155351992 }, "http": { "current_open": 4, "total_opened": 349641 }, "breakers": { "request": { "limit_size_in_bytes": 10050247065, "limit_size": "9.3gb", "estimated_size_in_bytes": 0, "estimated_size": "0b", "overhead": 1, "tripped": 0 }, "fielddata": { "limit_size_in_bytes": 10050247065, "limit_size": "9.3gb", "estimated_size_in_bytes": 0, "estimated_size": "0b", "overhead": 1.03, "tripped": 0 }, "in_flight_requests": { "limit_size_in_bytes": 16750411776, "limit_size": "15.6gb", "estimated_size_in_bytes": 0, "estimated_size": "0b", "overhead": 1, "tripped": 0 }, "parent": { "limit_size_in_bytes": 11725288243, "limit_size": "10.9gb", "estimated_size_in_bytes": 0, "estimated_size": "0b", "overhead": 1, "tripped": 0 } }, "script": { "compilations": 17, "cache_evictions": 0 }, "discovery": { "cluster_state_queue": { "total": 0, "pending": 0, "committed": 0 } }, "ingest": { "total": { "count": 0, "time_in_millis": 0, "current": 0, "failed": 0 }, "pipelines": { "xpack_monitoring_2": { "count": 0, "time_in_millis": 0, "current": 0, "failed": 0 } } } } } }
4. 查看所有索引状态及信息
GET _cat/indices?v&pretty
# 作用:可以让我们快速查看都有些什么索引及其数据量,让我们在不理解业务的情况查询数据
# 类似于sql中的 show databases; show tables;
# 结果样例如下:
green open utest_20201201 xxavc 5 0 2 0 7.6kb 7.6kb
yellow open .watcher-history-3-2020.08.05 O8wwxxsaQ0TaomQ 1 1 7200 0 5mb 5mb
5. 查看所有别名情况
GET _cat/aliases
# 作用:可以帮助你快速知道目前使用的索引可能,因为外部用户可能只知道别名,但内部索引可能会很多
# 和 show databases; show tables; 语义相近
# 结果样例如下:
test test_20200330 - - -
c_maping c_20200507 - - -
user user_20200717 - - -
6. 查看创建或修改mapping
# 查看某索引mapping GET /index_name/_mapping/type_name # 创建或修改mapping PUT /index_name { "mapping":{ "article":{ "properties":{ "content":{ "type": "string", "analyzer":"english" }, "post_date":{ "type":"date }, "title":{ "type":"keyword" } } } } }
# 作用:类似sql中创建修改表结构,可以让你快速了解此索引都由些什么构成,并排查可能出线问题的原因,比如text是不能用于搜索的,需要添加 fielddata:true 才可以。
# 结果样例如下:
{ "cus_20200110": { "mappings": { "job": { "properties": { "v1d": { "type": "text", "fields": { "keyword": { "type": "keyword", "ignore_above": 256 } } }, "amt": { "type": "long" } } } } } }
7. 创建索引
# 一般你可以在首次插入数据时创建索引,也可以先手动创建索引,创建mapping,这样控制更好
# 创建空索引
POST index_name # 创建mapping,如上 # 也可以带分片数设置创建索引 POST index_name { "settings": { "number_of_shards": 3, "number_of_replicas": 2 } }
8 插入数据更新数据
# 和添加数据时一样,不过需要指定id
# 更新分个别字段更新和全记录更新
# 个别字段更新使用 _update,如下 POST index_name/anytype/id1/_update { "title": "test title 001", "createDate": "2018-01-12" } # 全记录更新 POST index_name/anytype/id1/_doc { "title":"overwrtite title full" }
9.常用查询
# 毫无疑问,es的作用就在于查询,快速及有效
# 查询场景有很多,我们按照sql的方式类比,简单分为普通查询,聚合查询,分组查询,子查询
# 1. 普通查询(可带分页),类比sql:select * from index_name where title="abc" order by id limit 0, 10 # es 查询如下 GET userindex/anytype/_search { "size": 10, "from": 0 , "query": { "term": { "title": "abc" } }, "sort": [ { "id": { "order": "asc" } } ] }
# 也可以使用如下match查询
{
"query":{
"match":{
"title":"java"
}
}
}
# 其他查询条件可以任意在term中添加 # 2. 聚合查询,类比sql:select max(id) from index_name where title="abc" # es 查询 GET userindex/anytype/_search { "size": 10, "from": 0 , "query": { "term": { "title": "abc" } }, "aggs": { "max_id": { "max": { "field": "id" } } } } # 可以自行组合其他聚合方式,一起得出聚合结果,max,min,
10.主键查询
# get 查询单个doc GET 'indexName/type/1' # mget多个查询 get indexName/type/_mget { "docs":[{ "_id":["id1", "id2"] "_source":["field1", "field2"] }] } # 类似sql中的select f1, f2 from tb where id in (xxx);
11.索引open/close
# 索引打开关闭,是为了一些特殊场景的考量,比如我们做了索引别名,那么老的索引可能没用了,但又不敢完全保证新索引正确,所以先将旧索引关闭然后观察一段时间无误后,再将其删除,一旦出现问题,则将旧索引open恢复启用
#打开索引,使其可用 POST test/_open #关闭索引,减少资源消耗,关闭后索引不可被搜索添加操作 POST test/_close
12.删除索引
# 删除索引就是删除索引及其对应的所有资源,类似于sql中的 drop table 或者 drop database,所以需要小心操作,后果很严重
# 删除某个索引
DELETE indexName
# 删除所有索引:
DELETE * 或者 DELETE _all
13.索引merge
# 索引merge的目的在于将小segment合并为大文件,从而减少文件打开数量,这个动作一般是es自动完成的,但有时我们可能需要自己执行
// 强制merge POST indexname/_forcemerge?max_num_segments=1
14.es中几个搜索相关名词
# 如果仅仅是kv的搜索方式,则只需用 k:v 就可以进行搜索,当然你得先将字段类型设置为keyword或者 fielddata=true 才行,即开启索引的索引
# match/match_all: 普通搜索过滤
# bool/filter/must: 搜索结果过滤
# term(s) : 字段包含某些词搜索
# agg: 聚合结果
# _source: 设置字段信息
# sort: 排序结果
# exists: 空值判定查询
# prefix: 前缀匹配查询
# wildcard: 通配符查询,模糊匹配
# regexp: 正则查询
# score: 搜索评分
15.es中的几个核心问题
es为什么这么快?
system cache的重要性?
segment
translog
memorybuffer
flush
分片
集群通信raft
16.es总览架构图
架构图:
ES写数据流程图示例:
搜索流程图示例:
。。。