通过shell脚本批处理es数据
#!/bin/sh 【按照指定的域名-website集合,遍历各个域名,处理url】 #指定待删除的变量集合 arr=(6.0) cur="`date +%Y%m%d%H%M%S`" res_file=${BASH_SOURCE}.${cur}.json.txt log_file=${BASH_SOURCE}.${cur}.log es_str='' for v in ${arr[@]} do es_str='curl testIP:9200/my_index/my_doc//_search?pretty=true -d "{"_source": false,"query": {"match": {"website": "'$v'"}},"from": 1,"size": 9999}"' echo $es_str eval $es_str >> $res_file done str_head='{"delete":{"_id":"' str_foot='"}}' split_file_dir='/data/xiaole_chk_url/domain_iask/' bulk_file=${split_file_dir}${BASH_SOURCE}.${cur}.json #创建文件 echo '' > $bulk_file #单引号字符串的限制: #单引号里的任何字符都会原样输出,单引号字符串中的变量是无效的; #单引号字串中不能出现单引号(对单引号使用转义符后也不行)。 #双引号 #your_name='qinjx' #str="Hello, I know your are \"$your_name\"! \n" #双引号的优点: #双引号里可以有变量 #双引号里可以出现转义字符 str_tag='"_id" : "' #读取文件,生成批处理文件 while read line do echo $line # if [ $a = $line ] # if test $a -eq $line if [[ $line == *$str_tag* ]] then #查找目标字符串 #${string#substring}从变量$string的开头, 删除最短匹配$substring的子串 a=${line#'"_id" : "'} #${string//substring/replacement} #Shell字符串比较相等、不相等方法小结 - CSDN博客 https://blog.csdn.net/mr_leehy/article/details/76383091 #shell中if做比较 - 生活费 - 博客园 http://www.cnblogs.com/276815076/archive/2011/10/30/2229286.html b=${a//'",'/''} echo $b echo ${str_head}${b}${str_foot} >> $bulk_file else echo bbb fi unset a unset b done<$res_file #{"delete":{"_id":website.com.cn/b/tpoNpaBlFx.html"}} #{"delete":{"_id":website.com.cn/b/4W0xcTKZib.html"}} #{"delete":{"_id":website.com.cn/b/5dptLwDEaD.html"}} #{"delete":{"_id":website.com.cn/b/4OdzPUwb6X.html"}} #{"delete":{"_id":website.com.cn/b/2baCMVRsAH.html"}} #{"delete":{"_id":website.com.cn/b/2Nb6PnEt0T.html"}} #{"delete":{"_id":website.com.cn/b/3GbeNhQvyP.html"}} #{"delete":{"_id":website.com.cn/b/3z2wWJWhIf.html"}} #{"delete":{"_id":website.com.cn/b/1id9c9K1MT.html"}} #{"delete":{"_id":website.com.cn/b/2UYjsh1fcf.html"}} #{"delete":{"_id":website.com.cn/b/66PtNs1vbt.html"}} #执行批处理文件es删除操作 curl -XPOST testIP:9200/my_index/my_doc//_bulk --data-binary @$bulk_file >> $log_file exit 0 #检查结果 【返回指定id的查询结果】 #curl 'testIP:9200/my_index/my_doc//_search?pretty=true' -d ' #{ #"query" : { #"bool" : { #"should" : [ #{ "match" : { "_id": "website.com.cn/b/KV4Lw3dAw1.html" } }, #{ "match" : { "_id": "website.com.cn/b/KI9t2kvSlT.html" } }, #{ "match" : { "_id": "website.com.cn/b/4Hdkz68Vox.html" } }, #{ "match" : { "_id": "bbs.py168.com/xinxi/25975882.html" } } # ] # } #} #}' 【检查日志】 #{"took":1980,"errors":false,"items":[{"delete":{"_index":"my_index","_type":"my_doc","_id":"website.com.cn/b/KzVLkQWh9b.html","_version":2,"_shards":{"total":2,"successful":2,"failed":0},"status":200,"found":true}},{"delete":{"_index":"my_index","_type":"my_doc","_id":"website.com.cn/b/Lc2SQpxEPP.html","_version":2,"_shards":{"total":2,"successful":2,"failed":0},"status":200,"found":true}},{"delete":{"_index":"my_index","_type":"my_doc","_id" #!/bin/sh 【 #穷举站点值,得到满足一定条件的rul #可以在穷举循环中,再加一层穷举,得到满足一定条件的url全集 】 loop_step=1 loop_stop=5000 loop_period_start=0 loop_period_end=0 cur="`date +%Y%m%d%H%M%S`" res_file=${BASH_SOURCE}.$cur.json.txt log_file=${BASH_SOURCE}.$cur.log es_str='' for((i=0;i<$loop_stop;i++)) do loop_period_start=$((i*loop_step)) loop_period_end=$((loop_period_start+loop_step)) echo $loop_period_start echo $loop_period_end echo $i #查找域名 size为1;查找域名的子模式,比如是否存在/m/疑似手机站,则设为系统最大值9999 es_str='curl testIP:9200/my_index/my_doc//_search?pretty=true -d "{"query": {"match": {"website": "'${loop_period_start}'"}},"_source":true,"from":1,"size":9999}"' echo $es_str #将执行结果写入结果文件 eval $es_str >> $res_file done str_head='{"delete":{"_id":"' str_foot='"}}' split_file_dir='/data/xiaole_chk_url/url_mobile/' bulk_file=${split_file_dir}${BASH_SOURCE}.${cur}.json str_tag='"_id" : "' str_tag_mobile='/m/' #读取文件,生成批处理文件 while read line do if [[ $line == *$str_tag* ]] then if [[ $line == *$str_tag_mobile* ]] then #查找目标字符串 a=${line#'"_id" : "'} b=${a//'",'/''} echo $line echo ${str_head}${b}${str_foot} >> $bulk_file else echo 'filter_1' fi else echo 'filter_o' fi done<$res_file #curl -XPOST testIP:9200/my_index/my_doc//_bulk --data-binary @$bulk_file >> $log_file exit 0
shell 文件读取 if else 分支 字符串查找 模糊匹配 字符串截取
es 批处理 批删除