elasticsearch-script-painless
为了举例说明Painless是如何工作的,让我们加载一些曲棍球统计数据到ElasticSearch的索引中。
PUT hockey/_bulk?refresh {"index":{"_id":1}} {"first":"johnny","last":"gaudreau","goals":[9,27,1],"assists":[17,46,0],"gp":[26,82,1],"born":"1993/08/13"} {"index":{"_id":2}} {"first":"sean","last":"monohan","goals":[7,54,26],"assists":[11,26,13],"gp":[26,82,82],"born":"1994/10/12"} {"index":{"_id":3}} {"first":"jiri","last":"hudler","goals":[5,34,36],"assists":[11,62,42],"gp":[24,80,79],"born":"1984/01/04"} {"index":{"_id":4}} {"first":"micheal","last":"frolik","goals":[4,6,15],"assists":[8,23,15],"gp":[26,82,82],"born":"1988/02/17"} {"index":{"_id":5}} {"first":"sam","last":"bennett","goals":[5,0,0],"assists":[8,1,0],"gp":[26,1,0],"born":"1996/06/20"} {"index":{"_id":6}} {"first":"dennis","last":"wideman","goals":[0,26,15],"assists":[11,30,24],"gp":[26,81,82],"born":"1983/03/20"} {"index":{"_id":7}} {"first":"david","last":"jones","goals":[7,19,5],"assists":[3,17,4],"gp":[26,45,34],"born":"1984/08/10"} {"index":{"_id":8}} {"first":"tj","last":"brodie","goals":[2,14,7],"assists":[8,42,30],"gp":[26,82,82],"born":"1990/06/07"} {"index":{"_id":39}} {"first":"mark","last":"giordano","goals":[6,30,15],"assists":[3,30,24],"gp":[26,60,63],"born":"1983/10/03"} {"index":{"_id":10}} {"first":"mikael","last":"backlund","goals":[3,15,13],"assists":[6,24,18],"gp":[26,82,82],"born":"1989/03/17"} {"index":{"_id":11}} {"first":"joe","last":"colborne","goals":[3,18,13],"assists":[6,20,24],"gp":[26,67,82],"born":"1990/01/30"}
文档值可以从一个名称为doc的Map中获取
举例说明,下面的脚本计算一个球员的总进球数,这个例子使用了int强类型和for循环。
## 自定义评分计算方式,数组求和,值返回到_score上 GET hockey/_search { "query": { "function_score": { "script_score": { "script": { "lang": "painless", "source": """ int total = 0; for(int i = 0; i < doc['goals'].length; ++i){ total += doc['goals'][i]; } return total; """ } } } } , "size": 2 }
结果
{ "took" : 1, "timed_out" : false, "_shards" : { "total" : 1, "successful" : 1, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : { "value" : 12, "relation" : "eq" }, "max_score" : 87.0, "hits" : [ { "_index" : "hockey", "_type" : "_doc", "_id" : "2", "_score" : 87.0, "_source" : { "first" : "sean", "last" : "monohan", "goals" : [ 7, 54, 26 ], "assists" : [ 11, 26, 13 ], "gp" : [ 26, 82, 82 ], "born" : "1994/10/12" } }, { "_index" : "hockey", "_type" : "_doc", "_id" : "3", "_score" : 75.0, "_source" : { "first" : "jiri", "last" : "hudler", "goals" : [ 5, 34, 36 ], "assists" : [ 11, 62, 42 ], "gp" : [ 24, 80, 79 ], "born" : "1984/01/04" } } ] } }
或者,你可以使用脚本字段完成同样的事情而不是一个function score
## 返回添加一个字段total_goals,数组求和,返回数组 GET hockey/_search { "_source": true, "script_fields": { "total_goals": { "script": { "lang": "painless", "source": """ int total = 0; for(int i = 0; i < doc['goals'].length; ++i){ total += doc['goals'][i]; } return total; """ } } } , "size": 2 }
{ "took" : 1, "timed_out" : false, "_shards" : { "total" : 1, "successful" : 1, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : { "value" : 12, "relation" : "eq" }, "max_score" : 1.0, "hits" : [ { "_index" : "hockey", "_type" : "_doc", "_id" : "1", "_score" : 1.0, "_source" : { "first" : "johnny", "last" : "gaudreau", "goals" : [ 9, 27, 1 ], "assists" : [ 17, 46, 0 ], "gp" : [ 26, 82, 1 ], "born" : "1993/08/13" }, "fields" : { "total_goals" : [ 37 ] } }, { "_index" : "hockey", "_type" : "_doc", "_id" : "2", "_score" : 1.0, "_source" : { "first" : "sean", "last" : "monohan", "goals" : [ 7, 54, 26 ], "assists" : [ 11, 26, 13 ], "gp" : [ 26, 82, 82 ], "born" : "1994/10/12" }, "fields" : { "total_goals" : [ 87 ] } } ] } }
如果要拼接的字段不存在,将抛出异常:A document doesn't have a value for a field! Use doc[<field>].size()==0 to check if a document is missing a field!
如果使用doc['last'].value, 将抛异常:Fielddata is disabled on text fields by default. Set fielddata=true on [last] in order to load fielddata in memory by uninverting the inverted index. Note that this can however use significant memory. Alternatively use a keyword field instead.
GET hockey/_search { "size": 2, "_source": true, "script_fields": { "fullname": { "script": { "lang": "painless", "source": """ if(doc['last.keyword'].size()==0 || doc['first.keyword'].size()==0){ return ""; } else { return doc['first.keyword'].value +' '+ doc['last.keyword'].value; } """ } } } }
{ "took" : 1, "timed_out" : false, "_shards" : { "total" : 1, "successful" : 1, "skipped" : 0, "failed" : 0 }, "hits" : { "total" : { "value" : 12, "relation" : "eq" }, "max_score" : 1.0, "hits" : [ { "_index" : "hockey", "_type" : "_doc", "_id" : "1", "_score" : 1.0, "_source" : { "first" : "johnny", "last" : "gaudreau", "goals" : [ 9, 27, 1 ], "assists" : [ 17, 46, 0 ], "gp" : [ 26, 82, 1 ], "born" : "1993/08/13" }, "fields" : { "fullname" : [ "johnny gaudreau" ] } }, { "_index" : "hockey", "_type" : "_doc", "_id" : "2", "_score" : 1.0, "_source" : { "first" : "sean", "last" : "monohan", "goals" : [ 7, 54, 26 ], "assists" : [ 11, 26, 13 ], "gp" : [ 26, 82, 82 ], "born" : "1994/10/12" }, "fields" : { "fullname" : [ "sean monohan" ] } } ] } }
你可以非常容易的更新字段值,你可以使用ctx._source.<field-name>
获取原始字段。
把球员_id=1的last name改为hockey
POST hockey/_update/1 { "script": { "lang": "painless", "source": "ctx._source.last = params.last", "params": { "last": "hockey" } } }
结果
{ "_index" : "hockey", "_type" : "_doc", "_id" : "1", "_version" : 2, "result" : "updated", "_shards" : { "total" : 2, "successful" : 2, "failed" : 0 }, "_seq_no" : 14, "_primary_term" : 1 }
新增球员昵称字段nick
POST hockey/_update/1 { "script": { "lang": "painless", "source": """ ctx._source.last = params.last; ctx._source.nick = params.nick """, "params": { "last": "gaudreau", "nick": "hockey" } } }
结果
{ "_index" : "hockey", "_type" : "_doc", "_id" : "1", "_version" : 3, "result" : "updated", "_shards" : { "total" : 2, "successful" : 2, "failed" : 0 }, "_seq_no" : 15, "_primary_term" : 1 }
date字段被解释为ZonedDateTime类,所以它支持像getYear,getDayOfWeek或者getMillis。为了在脚本中使用它们,去掉get前缀并且继续小写其余的方法名称,比如下面的例子
## 从出生日期中获取年份 GET hockey/_search { "script_fields": { "birth_year": { "script": { "source": "doc.born.value.year" } } } } ## 从出生日期中获取月份 GET hockey/_search { "_source": ["born"], "script_fields": { "birth_year": { "script": { "source": "doc.born.value.monthValue" } } } } ## 从出生日期中获取日 GET hockey/_search { "_source": ["born"], "script_fields": { "birth_day": { "script": { "source": "doc.born.value.dayOfMonth" } } } }
Painless原生就支持拥有语法结构的正则表达式:
-
\pattern\
-
=~
: 这个查找操作返回的是boolean值,如果文本的一个字序列匹配上了就返回true,否则返回false -
==~
: 这个匹配操作返回的是boolean值,如果文本匹配上返回true,否则返回false
示例1:
POST hockey/_update_by_query { "script": { "lang": "painless", "source": """ if (ctx._source.last =~ /b/) { ctx._source.last += "matched"; } else { ctx.op = "noop"; } """ } }
示例2:
POST hockey/_update_by_query { "script": { "lang": "painless", "source": """ if (ctx._source.last ==~ /[^aeiou].*[aeiou]/) { ctx._source.last += "matched"; } else { ctx.op = "noop"; } """ } }