b0117 数据库elasticsearch 使用
安装
说明
hc2108 centos 7.9 ,下载 linux x86_64 elasticsearch-8.3.0
jdk1.8已经装好
es 比较耗费内存,准备4G以上内存机器
过程
参考下文
不一样的地方
- 没有创建es 用户, 用hadoop。 注意 不让用root用户安装
- 配置文件 指定了 数据目录,和日志目录, 这两个目录提前创建好
- 这个参数 这样设置, xpack.security.enabled: false, 不然会遇到一个异常
注意:
jvm.options -xms -xmx 可以设置 es所占内存,默认是4G。
机器内存小,可以调小这里,也可以 将 #bootstrap.memory_lock: true 改成false,启动不检测内存状况
后续:
将安全设置全部改成flase
遇到错误
1 elastic安装报错:max file descriptors [4096] for elasticsearch process is too low, increase to at least
https://blog.csdn.net/shipfei_csdn/article/details/122807749
我的是改成hadoop, 退出,重新登录
2. 关于报错:[2]: max virtual memory areas vm.max_map_count [65530] is too low, increase to at least [262144]
https://blog.csdn.net/wsyzxss/article/details/121280361
3 flood stage disk watermark [95%] exceeded on all indices on this node will marked read-only
磁盘空间不够, 查看服务器状态为red。 扩容,参考博文 vmware 硬盘扩容
curl -XGET http://localhost:9200/_cluster/health?pretty
使用
# 查看集群状态 curl -XGET http://localhost:9200/_cluster/health?pretty curl -XGET 127.0.0.1:9200/_cat/nodes?v&pretty cd /opt/elasticsearch-8.3.0/bin # 启动 sh elasticsearch -d # 查看 curl localhost:9200 jps #停止 kill -9 5082 # 计算集群中文档的数量 curl -XGET 'http://localhost:9200/_count?pretty' -H 'Content-Type:application/json' -d ' { "query": { "match_all": {} } } ' 旧 索引 类型 文档 新 索引 文档 ----------------------------- index 索引 相当于表 ## 索引 增删改查 # 创建index curl -X PUT localhost:9200/alibaba # 查看index curl -X GET localhost:9200/_cat/indices?v curl -X GET "localhost:9200/alibaba?pretty=true" # 删除index curl -X DELETE localhost:9200/alibaba ## Type # 列出Index 包含的 Type curl "localhost:9200/_mapping?pretty=true" # 查询type的结构 curl -X GET "localhost:9200/alibaba/_mapping?pretty=true" # 添加字段到type curl -X PUT "localhost:9200/alibaba/_mapping" -H "Content-Type: application/json" -d {\"properties\":{\"age\":{\"type\":\"integer\"}}} ## 文档 增删改查 # 新增 POST /索引库名/_doc/文档id curl -X POST localhost:9200/alibaba/_doc/1 -H "Content-Type: application/json" -d {\"name\":\"rose\"} # 报错 curl -X POST localhost:9200/alibaba/_doc/2 -H "Content-Type: application/json" -d {\"name\":\"fff\",\"age\":3} # 成功 curl -X POST localhost:9200/alibaba/_doc/2 -H "Content-Type: application/json" -d {\"name\":\"fff\"} curl -X POST localhost:9200/alibaba/_doc/3 -H "Content-Type: application/json" -d {\"name\":\"kkk\",\"age\":3} curl -X POST localhost:9200/alibaba/_doc/4 -H "Content-Type: application/json" -d {\"name\":\"dddd\"} curl -X POST localhost:9200/alibaba/_doc/5 -H "Content-Type: application/json" -d {\"age\":5} curl -X POST localhost:9200/alibaba/_doc/6 -H "Content-Type: application/json" -d {\"name\":\"ssss\",\"age\":88} curl -X POST localhost:9200/alibaba/_doc -H "Content-Type: application/json" -d {\"age\":9} # 查询 指定id curl -X GET localhost:9200/alibaba/_doc/1 # 查询所有 curl -X GET "localhost:9200/alibaba/_search?pretty" # 改 curl -X POST localhost:9200/alibaba/_doc/1 -H "Content-Type: application/json" -d {\"name\":\"rowse\"} 全量修改:PUT /{索引库名}/_doc/文档id { json文档 } 增量修改:POST /{索引库名}/_update/文档id { "doc": {字段}} # 删除 curl -X DELETE localhost:9200/alibaba/_doc/1 curl -X PUT localhost:9200/megacorp # 如果索引不存在会自动创建 curl -X PUT "localhost:9200/megacorp/_doc/1?pretty" -H 'Content-Type: application/json' -d' { "first_name" : "John", "last_name" : "Smith", "age" : 25, "about" : "I love to go rock climbing", "interests": [ "sports", "music1" ] } ' curl -X PUT "localhost:9200/megacorp/_doc/2?pretty" -H 'Content-Type: application/json' -d' { "first_name" : "Jane", "last_name" : "Smith", "age" : 32, "about" : "I like to collect rock albums", "interests": [ "music" ] } ' curl -X PUT "localhost:9200/megacorp/_doc/3?pretty" -H 'Content-Type: application/json' -d' { "first_name" : "Douglas", "last_name" : "Fir", "age" : 35, "about": "I like to build cabinets", "interests": [ "forestry" ] } ' curl -X GET localhost:9200/megacorp/_doc/1 curl -X GET "localhost:9200/megacorp/_mapping?pretty=true" curl -X GET "localhost:9200/megacorp/_search?pretty" # 按字段查询 curl -X GET "localhost:9200/megacorp/_search?q=last_name:Smith&pretty" curl -X GET "localhost:9200/megacorp/_search?pretty" -H 'Content-Type: application/json' -d' { "query" : { "match" : { "about" : "cabinets" } } } ' 返回last_name为smith,age大于30的数据 curl -X GET "localhost:9200/megacorp/_search?pretty" -H 'Content-Type: application/json' -d' { "query" : { "bool": { "must": { "match" : { "last_name" : "smith" } }, "filter": { "range" : { "age" : { "gt" : 30 } } } } } } ' # 搜索下所有喜欢攀岩(rock climbing)的员工: curl -X GET "localhost:9200/megacorp/_search?pretty" -H 'Content-Type: application/json' -d' { "query" : { "match" : { "about" : "rock climbing" } } } ' # 短语查询 match_phrase curl -X GET "localhost:9200/megacorp/_search?pretty" -H 'Content-Type: application/json' -d' { "query" : { "match_phrase" : { "about" : "rock climbing" } } } ' # 高亮搜索 curl -X GET "localhost:9200/megacorp/_search?pretty" -H 'Content-Type: application/json' -d' { "query" : { "match_phrase" : { "about" : "rock climbing" } }, "highlight": { "fields" : { "about" : {} } } } ' curl -X PUT localhost:9200/library # 批量插入数据 curl -X PUT "localhost:9200/library/_bulk?refresh&pretty" -H 'Content-Type: application/json' -d' {"index":{"_id": "Leviathan Wakes"}} {"name": "Leviathan Wakes", "author": "James S.A. Corey", "release_date": "2011-06-02", "page_count": 561} {"index":{"_id": "Hyperion"}} {"name": "Hyperion", "author": "Dan Simmons", "release_date": "1989-05-26", "page_count": 482} {"index":{"_id": "Dune"}} {"name": "Dune", "author": "Frank Herbert", "release_date": "1965-06-01", "page_count": 604} ' curl -X GET "localhost:9200/library/_search?pretty" # 执行sql查询 curl -X POST "localhost:9200/_sql?format=txt&pretty" -H 'Content-Type: application/json' -d' { "query": "SELECT * FROM library WHERE release_date < 2000-01-01" } ' curl -X POST "localhost:9200/_sql?format=txt&pretty" -H 'Content-Type: application/json' -d' { "query": "SELECT * FROM library ORDER BY page_count DESC LIMIT 5" } ' # 进入sql命令行 elasticsearch-sql-cli # 执行sql查询 sql> SELECT * FROM library WHERE release_date < '2000-01-01';
Python
insert
#!/usr/bin/env python # -*- coding: utf-8 -*- from elasticsearch import Elasticsearch # 失败 # 实例化一个ip为localhost,端口为9200,允许超时一小时的es对象 # es = Elasticsearch(hosts="192.168.1.10",port=9200,timeout=3600) # 失败 ,适合 7.x 版本 # host = '192.168.1.10' # post = 8888 # es = Elasticsearch([{'host': host, 'port': post}]) # 失败 elastic_transport.TlsError: TLS error caused by: TlsError(TLS error caused by: SSLError([SSL: WRONG_VERSION_NUMBER] wrong version number (_ssl.c:1131))) # es = Elasticsearch("https://192.168.1.10:9200") # 成功 es = Elasticsearch("http://192.168.1.10:9200") # es.indices.create(index="student3") # # es.indices.delete(index="student2") data = { "name": "赵敏", "age": "16", "sex": "f", "address": "大都", "sect": "朝廷", "skill": "无", "power": "40", "create_time": "2022-4-18 14:34:47", "modify_time": "2022-4-18 14:34:52" } response = es.index(index="student", body=data) data = { "name": "灭绝师太", "age": "49", "sex": "f", "address": "峨眉山", "sect": "峨眉派", "skill": "倚天剑", "power": "70", "create_time": "2022-05-08 23:16:53", "modify_time": "2022-05-08 23:16:59" } resp = es.index(index="student", document=data) print(resp) es.close()
query
#!/usr/bin/env python # -*- coding: utf-8 -*- from elasticsearch import Elasticsearch es = Elasticsearch("http://192.168.1.10:9200") result = es.search(index='student') # index:选择数据库 print(type(result)) # out <class 'elastic_transport.ObjectApiResponse'> print(result) cc = result["hits"]["hits"] print(type(cc)) # <class 'list'> print(cc) # ---------------------------- 定义过滤字段,最终只显示此此段信息 # filter_path=['hits.hits._source'] # 字段1 # result = es.search(index='student', filter_path=filter_path) # index:选择数据库 # print(result) # ---------------------------- match,模糊查询 body = { 'query': { # 查询命令 'match': { # 查询方法:模糊查询(会被分词)。比如此代码,会查到只包含:“我爱你”, “中国”的内容 'name': '灭绝' } } } result = es.search(index='student', body=body, size=200) # 指定size,默认是10 print(result) # ---------------------------- 按id查询 # result = es.get(index='student', id = 'uoRU2YEB73XPc-hQ-X2c') # 报错 # result = es.get(index='student', _id = 'uoRU2YEB73XPc-hQ-X2c') # print(result)
delete
#!/usr/bin/env python # -*- coding: utf-8 -*- from elasticsearch import Elasticsearch es = Elasticsearch("http://192.168.1.10:9200") # result = es.delete(index="student", id='uoRU2YEB73XPc-hQ-X2c') # print(result) body = { 'query': { # 查询命令 'match': { # 查询方法:模糊查询(会被分词)。比如此代码,会查到只包含:“我爱你”, “中国”的内容 'name': '赵敏' } } } result = es.delete_by_query(index="student",body=body) print(result)
20220702_大数据_elasticsearch学习.txt
python连接Elasticsearch8.x vincent_duan 2022-05-09
python操作ElasticSearch 超能小墨 2021-02-04
资料
ElasticSearch最全详细使用全套教程(入门到实战)2022最新版!, B站,视频 ,-- 偏重 如何使用查询,没有讲 架构,分片、副本等分布式技术
问题
1. 创建索引后集群状态为yellow
原因是单机状态,副本为1,无法复制。 将索引副本改为0