15 ES

ES (Elastic Stack(ELK))

 

相关链接:

B站:https://www.bilibili.com/video/BV1iJ411c7Az?p=65

gitee文档:https://gitee.com/moxi159753/LearningNotes/tree/master/ElasticStack

百度网盘:https://pan.baidu.com/s/1qlzvjjiJ_yNBbVVFodP6fA

 

安装:

官网链接:https://www.elastic.co/guide/en/elasticsearch/reference/current/install-elasticsearch.html

docker安装

官网链接:https://www.elastic.co/guide/en/elasticsearch/reference/7.16/docker.html
参考链接:https://juejin.cn/post/6844904117580595214

命令:
1 拉取镜像
docker pull docker.elastic.co/elasticsearch/elasticsearch:7.16.1

2 启动容器
docker run -p 9200:9200 -p 9300:9300 --name elasticsearch1 \
-e "discovery.type=single-node" \
-e "cluster.name=elasticsearch" \
-e "ES_JAVA_OPTS=-Xms512m -Xmx512m" \
-v es_plugins:/usr/share/elasticsearch/plugins \
-v es_data:/usr/share/elasticsearch/data \
-d docker.elastic.co/elasticsearch/elasticsearch:7.5.2

3 安装中文分词器IKAnalyzer,并重新启动
docker exec -it elasticsearch1 /bin/bash
#此命令需要在容器中运行
elasticsearch-plugin install https://github.com/medcl/elasticsearch-analysis-ik/releases/download/v6.4.0/elasticsearch-analysis-ik-6.4.0.zip
docker restart elasticsearch1

说明:数据和插件挂载到宿主机 /var/lib/docker/volumes/;调整内存参数;后台启动(-d);

 

 

1 验证登录

账户:admin 密码:Admin@123
curl 127.0.0.1:9200 --user admin:Admin@123

2 插件elasticsearch-head

 http://121.37.212.1%%:9200/?auth_user=admin&auth_password=Admin@123

 

 3 python 使用es

# python中使用elasticsearch
# 官网链接:https://elasticsearch-py.readthedocs.io/en/v7.13.2/

from datetime import datetime
from elasticsearch import Elasticsearch
es = Elasticsearch([{'host': '8.134.210.4', 'port': 9200}])

# doc = {
#     'author': 'wanghua23',
#     'text': 'Elasticsearch: very cool3!',
#     'timestamp': datetime.now(),
# }
#
# #1创建文档
# res = es.index(index="test-index", id=3, document=doc)
# print(res['result'])

# 2 查询文档
# res = es.get(index="test-index", id=1) #查询文档
# print(res['_source'])
#

# 3 按条件查询
es.indices.refresh(index="test-index")
res = es.search(index="test-index", query={"match_all": {}})
print(res)
print("Got %d Hits:" % res['hits']['total']['value'])
# for hit in res['hits']['hits']:es
#     print("%(timestamp)s %(author)s: %(text)s" % hit["_source"])

# 4 查询指定的字段
# res=es.search(index='test-index', filter_path=['hits.hits._id', 'hits.hits._type'])
# es.search(index='test-index', filter_path=['hits.hits._*'])
print(res)
1 python-es简单使用
#!/usr/bin/env python
# Licensed to Elasticsearch B.V under one or more agreements.
# Elasticsearch B.V licenses this file to you under the Apache 2.0 License.
# See the LICENSE file in the project root for more information

"""Script that downloads a public dataset and streams it to an Elasticsearch cluster"""

import csv
from os.path import abspath, join, dirname, exists
import tqdm
import urllib3
from elasticsearch import Elasticsearch
from elasticsearch.helpers import streaming_bulk


NYC_RESTAURANTS = (
    "https://data.cityofnewyork.us/api/views/43nn-pn8j/rows.csv?accessType=DOWNLOAD"
)
DATASET_PATH = join(dirname(abspath(__file__)), "nyc-restaurants.csv")
CHUNK_SIZE = 16384


def download_dataset():
    """Downloads the public dataset if not locally downlaoded
    and returns the number of rows are in the .csv file.
    """
    if not exists(DATASET_PATH):
        http = urllib3.PoolManager()
        resp = http.request("GET", NYC_RESTAURANTS, preload_content=False)

        if resp.status != 200:
            raise RuntimeError("Could not download dataset")

        with open(DATASET_PATH, mode="wb",encoding="utf-8") as f:
            chunk = resp.read(CHUNK_SIZE)
            while chunk:
                f.write(chunk)
                chunk = resp.read(CHUNK_SIZE)

    with open(DATASET_PATH,encoding="utf-8") as f:
        return sum([1 for _ in f]) - 1


def create_index(client):
    """Creates an index in Elasticsearch if one isn't already there."""
    client.indices.create(
        index="nyc-restaurants",
        body={
            "settings": {"number_of_shards": 1},
            "mappings": {
                "properties": {
                    "name": {"type": "text"},
                    "borough": {"type": "keyword"},
                    "cuisine": {"type": "keyword"},
                    "grade": {"type": "keyword"},
                    "location": {"type": "geo_point"},
                }
            },
        },
        ignore=400,
    )


def generate_actions():
    """Reads the file through csv.DictReader() and for each row
    yields a single document. This function is passed into the bulk()
    helper to create many documents in sequence.
    """
    with open(DATASET_PATH, mode="r",encoding="utf-8") as f:
        reader = csv.DictReader(f)

        for row in reader:
            doc = {
                "_id": row["CAMIS"],
                "name": row["DBA"],
                "borough": row["BORO"],
                "cuisine": row["CUISINE DESCRIPTION"],
                "grade": row["GRADE"] or None,
            }

            lat = row["Latitude"]
            lon = row["Longitude"]
            if lat not in ("", "0") and lon not in ("", "0"):
                doc["location"] = {"lat": float(lat), "lon": float(lon)}
            yield doc


def main():
    print("Loading dataset...")
    number_of_docs = download_dataset()

    # client = Elasticsearch(
    #     # Add your cluster configuration here!
    # )
    client = Elasticsearch([{'host': '8.134.210.4', 'port': 9200}])
    print("Creating an index...")
    create_index(client)

    print("Indexing documents...")
    progress = tqdm.tqdm(unit="docs", total=number_of_docs)
    successes = 0
    for ok, action in streaming_bulk(
        client=client, index="nyc-restaurants", actions=generate_actions(),
    ):
        progress.update(1)
        successes += ok
    print("Indexed %d/%d documents" % (successes, number_of_docs))


if __name__ == "__main__":
    main()
2 批量插入数据

技术:csv,文件夹处理,进度条,索引创建,批量插入

 

 

 

待补充..

posted @ 2021-12-06 10:48  风hua  阅读(52)  评论(0编辑  收藏  举报