b0117 数据库elasticsearch 使用

安装

说明

hc2108 centos 7.9 ,下载  linux x86_64 elasticsearch-8.3.0

jdk1.8已经装好

es 比较耗费内存,准备4G以上内存机器

过程

参考下文

Elasticsearch详细安装教程(Linux)

不一样的地方

  • 没有创建es 用户, 用hadoop。 注意 不让用root用户安装
  • 配置文件 指定了 数据目录,和日志目录, 这两个目录提前创建好
  • 这个参数 这样设置, xpack.security.enabled: false, 不然会遇到一个异常

注意:

jvm.options   -xms -xmx 可以设置 es所占内存,默认是4G。 

机器内存小,可以调小这里,也可以 将 #bootstrap.memory_lock: true  改成false,启动不检测内存状况

 

后续:


将安全设置全部改成flase

 

遇到错误

1 elastic安装报错:max file descriptors [4096] for elasticsearch process is too low, increase to at least

https://blog.csdn.net/shipfei_csdn/article/details/122807749

我的是改成hadoop, 退出,重新登录

2. 关于报错:[2]: max virtual memory areas vm.max_map_count [65530] is too low, increase to at least [262144]

https://blog.csdn.net/wsyzxss/article/details/121280361

3 flood stage disk watermark [95%] exceeded on all indices on this node will marked read-only

磁盘空间不够, 查看服务器状态为red。  扩容,参考博文 vmware 硬盘扩容

 curl -XGET http://localhost:9200/_cluster/health?pretty

使用

# 查看集群状态
 curl -XGET http://localhost:9200/_cluster/health?pretty
 
 curl -XGET 127.0.0.1:9200/_cat/nodes?v&pretty
 
 
 
 cd /opt/elasticsearch-8.3.0/bin
 
 
 # 启动
 sh elasticsearch -d
 
 
 
 # 查看
 curl localhost:9200
 
 
 jps
 
 
 #停止
 
 kill -9 5082
 
 
# 计算集群中文档的数量 
 curl -XGET 'http://localhost:9200/_count?pretty'  -H 'Content-Type:application/json'  -d '
{
    "query": {
        "match_all": {}
    }
}
'


旧 索引 类型 文档
新 索引 文档
 
 -----------------------------
 index   索引 相当于表
 
 
 ## 索引 增删改查
 
  # 创建index
 curl -X PUT localhost:9200/alibaba
 
 
 # 查看index
 curl -X GET localhost:9200/_cat/indices?v
 
 curl -X GET "localhost:9200/alibaba?pretty=true"
 
 
 # 删除index
 curl -X DELETE localhost:9200/alibaba
 
 
 
 ## Type
 # 列出Index 包含的 Type
 curl "localhost:9200/_mapping?pretty=true"
 
 # 查询type的结构
 curl -X GET  "localhost:9200/alibaba/_mapping?pretty=true"
 
 # 添加字段到type
 curl -X PUT  "localhost:9200/alibaba/_mapping" -H "Content-Type: application/json" -d {\"properties\":{\"age\":{\"type\":\"integer\"}}}
 
 
  
  
## 文档 增删改查

# 新增     POST /索引库名/_doc/文档id
curl -X POST localhost:9200/alibaba/_doc/1 -H "Content-Type: application/json" -d {\"name\":\"rose\"}





# 报错
curl -X POST localhost:9200/alibaba/_doc/2 -H "Content-Type: application/json" -d {\"name\":\"fff\",\"age\":3}

# 成功
curl -X POST localhost:9200/alibaba/_doc/2 -H "Content-Type: application/json" -d {\"name\":\"fff\"}


curl -X POST localhost:9200/alibaba/_doc/3 -H "Content-Type: application/json" -d {\"name\":\"kkk\",\"age\":3}

curl -X POST localhost:9200/alibaba/_doc/4 -H "Content-Type: application/json" -d {\"name\":\"dddd\"}
curl -X POST localhost:9200/alibaba/_doc/5 -H "Content-Type: application/json" -d {\"age\":5}

curl -X POST localhost:9200/alibaba/_doc/6 -H "Content-Type: application/json" -d {\"name\":\"ssss\",\"age\":88}

curl -X POST localhost:9200/alibaba/_doc  -H "Content-Type: application/json"   -d {\"age\":9}



# 查询  指定id
curl -X GET localhost:9200/alibaba/_doc/1


# 查询所有
curl -X GET "localhost:9200/alibaba/_search?pretty"


# 改
curl -X POST localhost:9200/alibaba/_doc/1 -H "Content-Type: application/json" -d {\"name\":\"rowse\"}


全量修改:PUT /{索引库名}/_doc/文档id { json文档 }
增量修改:POST /{索引库名}/_update/文档id { "doc": {字段}}


# 删除
curl -X DELETE localhost:9200/alibaba/_doc/1




curl -X PUT localhost:9200/megacorp
 
# 如果索引不存在会自动创建
curl -X PUT "localhost:9200/megacorp/_doc/1?pretty" -H 'Content-Type: application/json' -d'
{
    "first_name" : "John",
    "last_name" :  "Smith",
    "age" :        25,
    "about" :      "I love to go rock climbing",
    "interests": [ "sports", "music1" ]
}
'


curl -X PUT "localhost:9200/megacorp/_doc/2?pretty" -H 'Content-Type: application/json' -d'
{
    "first_name" :  "Jane",
    "last_name" :   "Smith",
    "age" :         32,
    "about" :       "I like to collect rock albums",
    "interests":  [ "music" ]
}
'

curl -X PUT "localhost:9200/megacorp/_doc/3?pretty" -H 'Content-Type: application/json' -d'
{
    "first_name" :  "Douglas",
    "last_name" :   "Fir",
    "age" :         35,
    "about":        "I like to build cabinets",
    "interests":  [ "forestry" ]
}
'


curl -X GET localhost:9200/megacorp/_doc/1


curl -X GET  "localhost:9200/megacorp/_mapping?pretty=true"

curl -X GET "localhost:9200/megacorp/_search?pretty"

# 按字段查询
curl -X GET "localhost:9200/megacorp/_search?q=last_name:Smith&pretty"

curl -X GET "localhost:9200/megacorp/_search?pretty" -H 'Content-Type: application/json' -d'
{
    "query" : {
        "match" : {
            "about" : "cabinets"
        }
    }
}
'

返回last_name为smith,age大于30的数据
curl -X GET "localhost:9200/megacorp/_search?pretty" -H 'Content-Type: application/json' -d'
{
    "query" : {
        "bool": {
            "must": {
                "match" : {
                    "last_name" : "smith" 
                }
            },
            "filter": {
                "range" : {
                    "age" : { "gt" : 30 } 
                }
            }
        }
    }
}
'

# 搜索下所有喜欢攀岩(rock climbing)的员工:
curl -X GET "localhost:9200/megacorp/_search?pretty" -H 'Content-Type: application/json' -d'
{
    "query" : {
        "match" : {
            "about" : "rock climbing"
        }
    }
}
'

# 短语查询 match_phrase
curl -X GET "localhost:9200/megacorp/_search?pretty" -H 'Content-Type: application/json' -d'
{
    "query" : {
        "match_phrase" : {
            "about" : "rock climbing"
        }
    }
}
'

# 高亮搜索
curl -X GET "localhost:9200/megacorp/_search?pretty" -H 'Content-Type: application/json' -d'
{
    "query" : {
        "match_phrase" : {
            "about" : "rock climbing"
        }
    },
    "highlight": {
        "fields" : {
            "about" : {}
        }
    }
}
'


curl -X PUT localhost:9200/library

# 批量插入数据
curl -X PUT "localhost:9200/library/_bulk?refresh&pretty" -H 'Content-Type: application/json' -d'
{"index":{"_id": "Leviathan Wakes"}}
{"name": "Leviathan Wakes", "author": "James S.A. Corey", "release_date": "2011-06-02", "page_count": 561}
{"index":{"_id": "Hyperion"}}
{"name": "Hyperion", "author": "Dan Simmons", "release_date": "1989-05-26", "page_count": 482}
{"index":{"_id": "Dune"}}
{"name": "Dune", "author": "Frank Herbert", "release_date": "1965-06-01", "page_count": 604}
'

curl -X GET "localhost:9200/library/_search?pretty"


# 执行sql查询
curl -X POST "localhost:9200/_sql?format=txt&pretty" -H 'Content-Type: application/json' -d'
{
  "query": "SELECT * FROM library WHERE release_date < 2000-01-01"
}
'


curl -X POST "localhost:9200/_sql?format=txt&pretty" -H 'Content-Type: application/json' -d'
{
  "query": "SELECT * FROM library ORDER BY page_count DESC LIMIT 5"
}
'

# 进入sql命令行
elasticsearch-sql-cli

# 执行sql查询
sql> SELECT * FROM library WHERE release_date < '2000-01-01';
View Code

 

Python

insert 

#!/usr/bin/env python
# -*- coding: utf-8 -*-

from elasticsearch import Elasticsearch

# 失败
# 实例化一个ip为localhost,端口为9200,允许超时一小时的es对象
# es = Elasticsearch(hosts="192.168.1.10",port=9200,timeout=3600)

# 失败 ,适合 7.x 版本
# host = '192.168.1.10'
# post = 8888
# es = Elasticsearch([{'host': host, 'port': post}])

# 失败 elastic_transport.TlsError: TLS error caused by: TlsError(TLS error caused by: SSLError([SSL: WRONG_VERSION_NUMBER] wrong version number (_ssl.c:1131)))
# es = Elasticsearch("https://192.168.1.10:9200")

# 成功
es = Elasticsearch("http://192.168.1.10:9200")

# es.indices.create(index="student3")
# # es.indices.delete(index="student2")

data = {
    "name": "赵敏",
    "age": "16",
    "sex": "f",
    "address": "大都",
    "sect": "朝廷",
    "skill": "",
    "power": "40",
    "create_time": "2022-4-18 14:34:47",
    "modify_time": "2022-4-18 14:34:52"
}
response = es.index(index="student", body=data)

data = {
    "name": "灭绝师太",
    "age": "49",
    "sex": "f",
    "address": "峨眉山",
    "sect": "峨眉派",
    "skill": "倚天剑",
    "power": "70",
    "create_time": "2022-05-08 23:16:53",
    "modify_time": "2022-05-08 23:16:59"
}
resp = es.index(index="student", document=data)
print(resp)


es.close()
View Code

 

query

#!/usr/bin/env python
# -*- coding: utf-8 -*-

from elasticsearch import Elasticsearch

es = Elasticsearch("http://192.168.1.10:9200")

result = es.search(index='student')  # index:选择数据库


print(type(result)) # out <class 'elastic_transport.ObjectApiResponse'>

print(result)

cc = result["hits"]["hits"]

print(type(cc))  # <class 'list'>
print(cc)

# ---------------------------- 定义过滤字段,最终只显示此此段信息

# filter_path=['hits.hits._source']  # 字段1
# result = es.search(index='student', filter_path=filter_path)  # index:选择数据库
# print(result)


# ---------------------------- match,模糊查询

body = {
    'query': {  # 查询命令
        'match': {  # 查询方法:模糊查询(会被分词)。比如此代码,会查到只包含:“我爱你”, “中国”的内容
            'name': '灭绝'
        }
    }
}

result = es.search(index='student', body=body, size=200)  # 指定size,默认是10
print(result)




# ---------------------------- 按id查询
# result = es.get(index='student', id = 'uoRU2YEB73XPc-hQ-X2c')
# 报错
# result = es.get(index='student', _id = 'uoRU2YEB73XPc-hQ-X2c')
# print(result)
View Code

 

delete

#!/usr/bin/env python
# -*- coding: utf-8 -*-

from elasticsearch import Elasticsearch

es = Elasticsearch("http://192.168.1.10:9200")


# result = es.delete(index="student", id='uoRU2YEB73XPc-hQ-X2c')
# print(result)

body = {
    'query': {  # 查询命令
        'match': {  # 查询方法:模糊查询(会被分词)。比如此代码,会查到只包含:“我爱你”, “中国”的内容
            'name': '赵敏'
        }
    }
}


result = es.delete_by_query(index="student",body=body)

print(result)
View Code

 

20220702_大数据_elasticsearch学习.txt

Elasticsearch常用命令

python对接elasticsearch的基本操作

 python连接Elasticsearch8.x  2022-05-09

python操作ElasticSearch   2021-02-04

 

 

资料

ElasticSearch最全详细使用全套教程(入门到实战)2022最新版!, B站,视频 ,-- 偏重 如何使用查询,没有讲 架构,分片、副本等分布式技术 

 

问题

1. 创建索引后集群状态为yellow

原因是单机状态,副本为1,无法复制。  将索引副本改为0

posted @ 2022-06-30 23:09  sunzebo  阅读(74)  评论(0编辑  收藏  举报