代码改变世界

hive入ES5.6.8

2018-06-15 16:09  LI桥IL  阅读(546)  评论(0编辑  收藏  举报

1、--建立索引

number_of_shards:分片 number_of_replicas:副本数 index.refresh_interval:缓存策略
curl -XPUT 'http://192.168.10.69:9200/zhuanlidata9' -d '{"settings":{"number_of_shards":64,"number_of_replicas":0,"index.refresh_interval": -1}}'

2、--创建mapping

curl -X PUT '192.168.10.69:9200/zhuanlidata9/_mapping/zhuanliquanwen' -d '
{
"properties":{
"uuid":{"type":"keyword"},
"filename":{"type":"keyword"},
"lang":{"type":"keyword"},
"country":{"type":"keyword"},
"doc_number":{"type":"keyword"},
"kind":{"type":"keyword"},
"date":{"type":"keyword"}, 
"gazette_num":{"type":"keyword"},
"gazette_date":{"type":"keyword"},
"appl_type":{"type":"keyword"},
"appl_country":{"type":"keyword"},
"appl_doc_number":{"type":"keyword"},
"appl_date":{"type":"keyword"},
"text":{"type":"keyword"},
"invention_title":{"type":"text","analyzer":"ik_max_word","search_analyzer":"ik_max_word"},
"assignees":{"type":"text"},
"assignees_address":{"type":"text","analyzer":"ik_max_word","search_analyzer":"ik_max_word"},
"abstracts":{"type":"text","analyzer":"ik_max_word","search_analyzer":"ik_max_word"},
"applicants":{"type":"text"},
"applicants_address":{"type":"text"},
"inventors":{"type":"text"},
"agents":{"type":"text"},
"agency":{"type":"text"},
"descriptions":{"type":"text","analyzer":"ik_max_word","search_analyzer":"ik_max_word"},
"claims":{"type":"text","analyzer":"ik_max_word","search_analyzer":"ik_max_word"},
"cn_related_publication":{"type":"text"},
"cn_publication_referen":{"type":"text"},
"cn_related_document":{"type":"text"},
"priority_claims":{"type":"text"},
"reference":{"type":"text"},
"searcher":{"type":"text"}
} 
}'

3、--创建hive映射ES表

--11.31上输入"hive" 然后执行如下命令。
hive
--添加jar包
add jar /data/2/zly/elasticsearch-hadoop-5.6.8/dist/elasticsearch-hadoop-5.6.8.jar;
--建立映射表
CREATE EXTERNAL TABLE test.zhuanlidata9 (
uuid string,
filename string ,
lang string ,
country string ,
doc_number string ,
kind string ,
date string ,
gazette_num string ,
gazette_date string ,
appl_type string ,
appl_country string ,
appl_doc_number string ,
appl_date string ,
text string ,
invention_title string ,
assignees string ,
assignees_address string ,
abstracts string ,
applicants string ,
applicants_address string ,
inventors string ,
agents string ,
agency string ,
descriptions string ,
claims string ,
cn_related_publication string ,
cn_publication_referen string ,
Cn_related_document string ,
priority_claims string ,
Reference string ,
Searcher string
)
STORED BY 'org.elasticsearch.hadoop.hive.EsStorageHandler'
TBLPROPERTIES(
'es.resource' = 'zhuanlidata9/zhuanliquanwen', 
'es.nodes'='192.168.10.69,192.168.10.70,192.168.10.71',
'es.port'='9200',
'es.mapping.id' = 'uuid',
'es.write.operation'='upsert'
);
--退出hive
exit;

 

4、--将数据load进hive映射es表/*在11.31上 修改 /data/2/zly/test_hive_es.sh 的循环次数以及表名/*

--{1..18}循环次数  mapreduce.job.running.map.limit 线程数
#!/bin/bash
for i in {1..18}
do
hive -e "
add jar /data/2/zly/elasticsearch-hadoop-5.6.8/dist/elasticsearch-hadoop-5.6.8.jar;
set mapreduce.job.running.map.limit=50;
insert into test.zhuanlidata9
select 
regexp_replace(reflect(\"java.util.UUID\", \"randomUUID\"), \"-\", \"\") uuid,
filename,
lang,
country,
doc_number,
kind,
case when appl_date like '2%' then appl_date else '' end date ,
gazette_num,
gazette_date,
appl_type,
appl_country,
appl_doc_number,
case when appl_date like '2%' then appl_date else '' end appl_date ,
text,
invention_title,
assignees,
assignees_address,
abstracts,
applicants,
applicants_address,
inventors,
agents,
agency,
descriptions,
claims,
cn_related_publication,
cn_publication_referen,
Cn_related_document,
priority_claims,
Reference,
Searcher
from report_statistics.zhuanli_zlqw;
"
done