hive入ES5.6.8
2018-06-15 16:09 LI桥IL 阅读(546) 评论(0) 编辑 收藏 举报1、--建立索引
number_of_shards:分片 number_of_replicas:副本数 index.refresh_interval:缓存策略 curl -XPUT 'http://192.168.10.69:9200/zhuanlidata9' -d '{"settings":{"number_of_shards":64,"number_of_replicas":0,"index.refresh_interval": -1}}'
2、--创建mapping
curl -X PUT '192.168.10.69:9200/zhuanlidata9/_mapping/zhuanliquanwen' -d ' { "properties":{ "uuid":{"type":"keyword"}, "filename":{"type":"keyword"}, "lang":{"type":"keyword"}, "country":{"type":"keyword"}, "doc_number":{"type":"keyword"}, "kind":{"type":"keyword"}, "date":{"type":"keyword"}, "gazette_num":{"type":"keyword"}, "gazette_date":{"type":"keyword"}, "appl_type":{"type":"keyword"}, "appl_country":{"type":"keyword"}, "appl_doc_number":{"type":"keyword"}, "appl_date":{"type":"keyword"}, "text":{"type":"keyword"}, "invention_title":{"type":"text","analyzer":"ik_max_word","search_analyzer":"ik_max_word"}, "assignees":{"type":"text"}, "assignees_address":{"type":"text","analyzer":"ik_max_word","search_analyzer":"ik_max_word"}, "abstracts":{"type":"text","analyzer":"ik_max_word","search_analyzer":"ik_max_word"}, "applicants":{"type":"text"}, "applicants_address":{"type":"text"}, "inventors":{"type":"text"}, "agents":{"type":"text"}, "agency":{"type":"text"}, "descriptions":{"type":"text","analyzer":"ik_max_word","search_analyzer":"ik_max_word"}, "claims":{"type":"text","analyzer":"ik_max_word","search_analyzer":"ik_max_word"}, "cn_related_publication":{"type":"text"}, "cn_publication_referen":{"type":"text"}, "cn_related_document":{"type":"text"}, "priority_claims":{"type":"text"}, "reference":{"type":"text"}, "searcher":{"type":"text"} } }'
3、--创建hive映射ES表
--11.31上输入"hive" 然后执行如下命令。 hive --添加jar包 add jar /data/2/zly/elasticsearch-hadoop-5.6.8/dist/elasticsearch-hadoop-5.6.8.jar; --建立映射表 CREATE EXTERNAL TABLE test.zhuanlidata9 ( uuid string, filename string , lang string , country string , doc_number string , kind string , date string , gazette_num string , gazette_date string , appl_type string , appl_country string , appl_doc_number string , appl_date string , text string , invention_title string , assignees string , assignees_address string , abstracts string , applicants string , applicants_address string , inventors string , agents string , agency string , descriptions string , claims string , cn_related_publication string , cn_publication_referen string , Cn_related_document string , priority_claims string , Reference string , Searcher string ) STORED BY 'org.elasticsearch.hadoop.hive.EsStorageHandler' TBLPROPERTIES( 'es.resource' = 'zhuanlidata9/zhuanliquanwen', 'es.nodes'='192.168.10.69,192.168.10.70,192.168.10.71', 'es.port'='9200', 'es.mapping.id' = 'uuid', 'es.write.operation'='upsert' ); --退出hive exit;
4、--将数据load进hive映射es表/*在11.31上 修改 /data/2/zly/test_hive_es.sh 的循环次数以及表名/*
--{1..18}循环次数 mapreduce.job.running.map.limit 线程数 #!/bin/bash for i in {1..18} do hive -e " add jar /data/2/zly/elasticsearch-hadoop-5.6.8/dist/elasticsearch-hadoop-5.6.8.jar; set mapreduce.job.running.map.limit=50; insert into test.zhuanlidata9 select regexp_replace(reflect(\"java.util.UUID\", \"randomUUID\"), \"-\", \"\") uuid, filename, lang, country, doc_number, kind, case when appl_date like '2%' then appl_date else '' end date , gazette_num, gazette_date, appl_type, appl_country, appl_doc_number, case when appl_date like '2%' then appl_date else '' end appl_date , text, invention_title, assignees, assignees_address, abstracts, applicants, applicants_address, inventors, agents, agency, descriptions, claims, cn_related_publication, cn_publication_referen, Cn_related_document, priority_claims, Reference, Searcher from report_statistics.zhuanli_zlqw; " done
声明:本博客仅用于个人记录,请勿进行任何形式转载