15 ES
ES (Elastic Stack(ELK))
相关链接:
B站:https://www.bilibili.com/video/BV1iJ411c7Az?p=65
gitee文档:https://gitee.com/moxi159753/LearningNotes/tree/master/ElasticStack
百度网盘:https://pan.baidu.com/s/1qlzvjjiJ_yNBbVVFodP6fA
安装:
官网链接:https://www.elastic.co/guide/en/elasticsearch/reference/current/install-elasticsearch.html
docker安装
官网链接:https://www.elastic.co/guide/en/elasticsearch/reference/7.16/docker.html 参考链接:https://juejin.cn/post/6844904117580595214 命令: 1 拉取镜像 docker pull docker.elastic.co/elasticsearch/elasticsearch:7.16.1 2 启动容器 docker run -p 9200:9200 -p 9300:9300 --name elasticsearch1 \ -e "discovery.type=single-node" \ -e "cluster.name=elasticsearch" \ -e "ES_JAVA_OPTS=-Xms512m -Xmx512m" \ -v es_plugins:/usr/share/elasticsearch/plugins \ -v es_data:/usr/share/elasticsearch/data \ -d docker.elastic.co/elasticsearch/elasticsearch:7.5.2 3 安装中文分词器IKAnalyzer,并重新启动 docker exec -it elasticsearch1 /bin/bash #此命令需要在容器中运行 elasticsearch-plugin install https://github.com/medcl/elasticsearch-analysis-ik/releases/download/v6.4.0/elasticsearch-analysis-ik-6.4.0.zip docker restart elasticsearch1 说明:数据和插件挂载到宿主机 /var/lib/docker/volumes/;调整内存参数;后台启动(-d);
1 验证登录
账户:admin 密码:Admin@123 curl 127.0.0.1:9200 --user admin:Admin@123
2 插件elasticsearch-head
http://121.37.212.1%%:9200/?auth_user=admin&auth_password=Admin@123
3 python 使用es
# python中使用elasticsearch # 官网链接:https://elasticsearch-py.readthedocs.io/en/v7.13.2/ from datetime import datetime from elasticsearch import Elasticsearch es = Elasticsearch([{'host': '8.134.210.4', 'port': 9200}]) # doc = { # 'author': 'wanghua23', # 'text': 'Elasticsearch: very cool3!', # 'timestamp': datetime.now(), # } # # #1创建文档 # res = es.index(index="test-index", id=3, document=doc) # print(res['result']) # 2 查询文档 # res = es.get(index="test-index", id=1) #查询文档 # print(res['_source']) # # 3 按条件查询 es.indices.refresh(index="test-index") res = es.search(index="test-index", query={"match_all": {}}) print(res) print("Got %d Hits:" % res['hits']['total']['value']) # for hit in res['hits']['hits']:es # print("%(timestamp)s %(author)s: %(text)s" % hit["_source"]) # 4 查询指定的字段 # res=es.search(index='test-index', filter_path=['hits.hits._id', 'hits.hits._type']) # es.search(index='test-index', filter_path=['hits.hits._*']) print(res)
#!/usr/bin/env python # Licensed to Elasticsearch B.V under one or more agreements. # Elasticsearch B.V licenses this file to you under the Apache 2.0 License. # See the LICENSE file in the project root for more information """Script that downloads a public dataset and streams it to an Elasticsearch cluster""" import csv from os.path import abspath, join, dirname, exists import tqdm import urllib3 from elasticsearch import Elasticsearch from elasticsearch.helpers import streaming_bulk NYC_RESTAURANTS = ( "https://data.cityofnewyork.us/api/views/43nn-pn8j/rows.csv?accessType=DOWNLOAD" ) DATASET_PATH = join(dirname(abspath(__file__)), "nyc-restaurants.csv") CHUNK_SIZE = 16384 def download_dataset(): """Downloads the public dataset if not locally downlaoded and returns the number of rows are in the .csv file. """ if not exists(DATASET_PATH): http = urllib3.PoolManager() resp = http.request("GET", NYC_RESTAURANTS, preload_content=False) if resp.status != 200: raise RuntimeError("Could not download dataset") with open(DATASET_PATH, mode="wb",encoding="utf-8") as f: chunk = resp.read(CHUNK_SIZE) while chunk: f.write(chunk) chunk = resp.read(CHUNK_SIZE) with open(DATASET_PATH,encoding="utf-8") as f: return sum([1 for _ in f]) - 1 def create_index(client): """Creates an index in Elasticsearch if one isn't already there.""" client.indices.create( index="nyc-restaurants", body={ "settings": {"number_of_shards": 1}, "mappings": { "properties": { "name": {"type": "text"}, "borough": {"type": "keyword"}, "cuisine": {"type": "keyword"}, "grade": {"type": "keyword"}, "location": {"type": "geo_point"}, } }, }, ignore=400, ) def generate_actions(): """Reads the file through csv.DictReader() and for each row yields a single document. This function is passed into the bulk() helper to create many documents in sequence. """ with open(DATASET_PATH, mode="r",encoding="utf-8") as f: reader = csv.DictReader(f) for row in reader: doc = { "_id": row["CAMIS"], "name": row["DBA"], "borough": row["BORO"], "cuisine": row["CUISINE DESCRIPTION"], "grade": row["GRADE"] or None, } lat = row["Latitude"] lon = row["Longitude"] if lat not in ("", "0") and lon not in ("", "0"): doc["location"] = {"lat": float(lat), "lon": float(lon)} yield doc def main(): print("Loading dataset...") number_of_docs = download_dataset() # client = Elasticsearch( # # Add your cluster configuration here! # ) client = Elasticsearch([{'host': '8.134.210.4', 'port': 9200}]) print("Creating an index...") create_index(client) print("Indexing documents...") progress = tqdm.tqdm(unit="docs", total=number_of_docs) successes = 0 for ok, action in streaming_bulk( client=client, index="nyc-restaurants", actions=generate_actions(), ): progress.update(1) successes += ok print("Indexed %d/%d documents" % (successes, number_of_docs)) if __name__ == "__main__": main()
技术:csv,文件夹处理,进度条,索引创建,批量插入
待补充..
作者:华王
博客:https://www.cnblogs.com/huahuawang/