Logstash 解析Json字符串,删除json嵌套字段
一、场景:此文以一个简单的json字符串文件为例,描述如何用logstash解析嵌套的json,并删除其中的某些字段
我们在linux中test.json的内容如下:
{"timestamp":"2018-08-02T14:42:50.084467+0800","flow_id":364959073190719,"in_iface":"eth1","event_type":"alert","src_ip":"10.0.0.4","src_port":80,"dest_ip":"10.0.0.5","dest_port":16781,"proto":"TCP","tx_id":0,"alert":{"action":"allowed","gid":1,"signature_id":2101201,"rev":10,"signature":"GPL WEB_SERVER 403 Forbidden","category":"Attempted Information Leak","severity":2},"http":{"hostname":"bapi.yahoo.com","url":"\/v1tns\/searchorderlist?_time=1533192163978","http_user_agent":"Mozilla\/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit\/537.36 (KHTML, like Gecko) Chrome\/67.0.3396.99 Safari\/537.36","xff":"39.106.108.38","http_content_type":"text\/html","http_method":"POST","protocol":"HTTP\/1.0","status":403,"length":568},"app_proto":"http","flow":{"pkts_toserver":5,"pkts_toclient":5,"bytes_toserver":1547,"bytes_toclient":1076,"start":"2018-08-02T14:42:50.082751+0800"}}
为了方便查看,formate后,为如下格式
{
"timestamp":"2018-08-02T14:42:50.084467+0800",
"flow_id":364959073190719,
"in_iface":"eth1",
"event_type":"alert",
"src_ip":"10.0.0.4",
"src_port":80,
"dest_ip":"10.0.0.5",
"dest_port":16781,
"proto":"TCP",
"tx_id":0,
"alert":{
"action":"allowed",
"gid":1,
"signature_id":2101201,
"rev":10,
"signature":"GPL WEB_SERVER 403 Forbidden",
"category":"Attempted Information Leak",
"severity":2
},
"http":{
"hostname":"bapi.yahoo.com",
"url":"\/v1tns\/searchorderlist?_time=1533192163978",
"http_user_agent":"Mozilla\/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit\/537.36 (KHTML, like Gecko) Chrome\/67.0.3396.99 Safari\/537.36",
"xff":"39.106.108.38",
"http_content_type":"text\/html",
"http_method":"POST",
"protocol":"HTTP\/1.0",
"status":403,
"length":568
},
"app_proto":"http",
"flow":{
"pkts_toserver":5,
"pkts_toclient":5,
"bytes_toserver":1547,
"bytes_toclient":1076,
"start":"2018-08-02T14:42:50.082751+0800"
}
}
二、目的: 我们需要解析这个json,并且删除json中"src_ip"字段和"http下的hostname"这个字段
我的配置文件如下:
input {
file {
path => "/usr/share/logstash/private.cond/nestjson.json"
codec => "json"
start_position => "beginning"
sincedb_path => "/dev/null"
}
}
filter {
json {
source => "message"
}
mutate {
remove_field => ["src_ip","[http][hostname]"]
}
}
output {
stdout {
codec => rubydebug
}
}
注意第14行删除
注意第14行删除字段和嵌套字段的写法
运行logstash我们得到如下输出:
{
"alert" => {
"gid" => 1,
"rev" => 10,
"severity" => 2,
"signature" => "GPL WEB_SERVER 403 Forbidden",
"action" => "allowed",
"signature_id" => 2101201,
"category" => "Attempted Information Leak"
},
"http" => {
"protocol" => "HTTP/1.0",
"http_content_type" => "text/html",
"http_user_agent" => "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36",
"http_method" => "POST",
"length" => 568,
"url" => "/v1tns/searchorderlist?_time=1533192163978",
"xff" => "39.106.108.38",
"status" => 403
},
"path" => "/usr/share/logstash/private.cond/test.json",
"event_type" => "alert",
"src_port" => 80,
"dest_port" => 16781,
"dest_ip" => "10.0.0.5",
"proto" => "TCP",
"flow_id" => 364959073190719,
"tx_id" => 0,
"@version" => "1",
"in_iface" => "eth1",
"timestamp" => "2018-08-02T14:42:50.084467+0800",
"flow" => {
"pkts_toserver" => 5,
"pkts_toclient" => 5,
"bytes_toserver" => 1547,
"start" => "2018-08-02T14:42:50.082751+0800",
"bytes_toclient" => 1076
},
"host" => "elk",
"app_proto" => "http",
"@timestamp" => 2018-08-02T10:14:14.372Z
}
我们可以看到src_ip和http下的hostname已经被成功删除
欢迎转载,注明出处。有任何问题和建议,欢迎留言讨论,也可以发我邮箱wenbya@outlook.com