logstash7.13之grok
nginx匹配示例
nginx日志格式 '$remote_user [$time_local] $http_x_Forwarded_for $remote_addr $request $status $upstream_status' '$http_x_forwarded_for' '$upstream_addr ' 'ups_resp_time: $upstream_response_time ' 'request_time: $request_time';
nginx日志示例 - [09/May/2023:15:01:31 +0800] 11.20.1.30 38.34.246.127 GET / HTTP/1.1 200 -11.20.1.30- ups_resp_time: - request_time: 0.000
grok匹配 filter { grok { match => { "message" => "%{DATA:remote_user} \[%{HTTPDATE:log_times}\]\s+(?:%{IPV4:http_x_Forwarded_for}|-)\s+(?:%{IPV4:remote_addr}|-)\s+%{WORD:request_method}\s+%{URIPATH:uri}(?:%{URIPARAM:params}|) HTTP/%{NUMBER:http_version} %{NUMBER:response_code} (?:%{NUMBER:upstream_status}|-)(?:%{IPV4:http_x_forwarded_for}|-) (?:%{HOSTPORT:upstream_addr}|-) ups_resp_time: (?:%{NUMBER:ups_resp_time}|-) request_time: (?:%{NUMBER:request_time}|-)" } } }
# ?:%{URI:referrer}|-)正则表示如果$referer字段为空,则用"-"表示,若不为空则显示referer的内容。经测试如果直接设置成%{URI:referrer},过滤时当referer为空时,会导致grokfailure,因此需要注意此字段的正则表达式。
匹配后数据 { "http_x_Forwarded_for" => "11.20.1.30", "host" => "elk3", "message" => "- [09/May/2023:15:01:31 +0800] 11.20.1.30 38.34.246.127 GET / HTTP/1.1 200 -11.20.1.30- ups_resp_time: - request_time: 0.000", "request_method" => "GET", "upstream_status" => "-", "ups_resp_time" => "-", "request_time" => "0.000", "remote_user" => "-", "log_times" => "09/May/2023:15:01:31 +0800", "upstream_addr" => "-", "@version" => "1", "@timestamp" => 2023-05-09T08:12:35.912Z, "http_version" => "1.1", "remote_addr" => "38.34.246.127", "http_x_forwarded_for" => "11.20.1.30", "uri" => "/", "response_code" => "200" }
grok使用格式
%{SYNTAX:SEMANTIC}
%{预定义好的表达式的名字:自定义命名}
内置正则
1 USERNAME [a-zA-Z0-9._-]+ 2 USER %{USERNAME} 3 EMAILLOCALPART [a-zA-Z][a-zA-Z0-9_.+-=:]+ 4 EMAILADDRESS %{EMAILLOCALPART}@%{HOSTNAME} 5 INT (?:[+-]?(?:[0-9]+)) 6 BASE10NUM (?<![0-9.+-])(?>[+-]?(?:(?:[0-9]+(?:\.[0-9]+)?)|(?:\.[0-9]+))) 7 NUMBER (?:%{BASE10NUM}) 8 BASE16NUM (?<![0-9A-Fa-f])(?:[+-]?(?:0x)?(?:[0-9A-Fa-f]+)) 9 BASE16FLOAT \b(?<![0-9A-Fa-f.])(?:[+-]?(?:0x)?(?:(?:[0-9A-Fa-f]+(?:\.[0-9A-Fa-f]*)?)|(?:\.[0-9A-Fa-f]+)))\b 10 11 POSINT \b(?:[1-9][0-9]*)\b 12 NONNEGINT \b(?:[0-9]+)\b 13 WORD \b\w+\b 14 NOTSPACE \S+ 15 SPACE \s* 16 DATA .*? 17 GREEDYDATA .* 18 QUOTEDSTRING (?>(?<!\\)(?>"(?>\\.|[^\\"]+)+"|""|(?>'(?>\\.|[^\\']+)+')|''|(?>`(?>\\.|[^\\`]+)+`)|``)) 19 UUID [A-Fa-f0-9]{8}-(?:[A-Fa-f0-9]{4}-){3}[A-Fa-f0-9]{12} 20 # URN, allowing use of RFC 2141 section 2.3 reserved characters 21 URN urn:[0-9A-Za-z][0-9A-Za-z-]{0,31}:(?:%[0-9a-fA-F]{2}|[0-9A-Za-z()+,.:=@;$_!*'/?#-])+ 22 23 # Networking 24 MAC (?:%{CISCOMAC}|%{WINDOWSMAC}|%{COMMONMAC}) 25 CISCOMAC (?:(?:[A-Fa-f0-9]{4}\.){2}[A-Fa-f0-9]{4}) 26 WINDOWSMAC (?:(?:[A-Fa-f0-9]{2}-){5}[A-Fa-f0-9]{2}) 27 COMMONMAC (?:(?:[A-Fa-f0-9]{2}:){5}[A-Fa-f0-9]{2}) 28 IPV6 ((([0-9A-Fa-f]{1,4}:){7}([0-9A-Fa-f]{1,4}|:))|(([0-9A-Fa-f]{1,4}:){6}(:[0-9A-Fa-f]{1,4}|((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){5}(((:[0-9A-Fa-f]{1,4}){1,2})|:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){4}(((:[0-9A-Fa-f]{1,4}){1,3})|((:[0-9A-Fa-f]{1,4})?:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){3}(((:[0-9A-Fa-f]{1,4}){1,4})|((:[0-9A-Fa-f]{1,4}){0,2}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){2}(((:[0-9A-Fa-f]{1,4}){1,5})|((:[0-9A-Fa-f]{1,4}){0,3}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){1}(((:[0-9A-Fa-f]{1,4}){1,6})|((:[0-9A-Fa-f]{1,4}){0,4}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(:(((:[0-9A-Fa-f]{1,4}){1,7})|((:[0-9A-Fa-f]{1,4}){0,5}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:)))(%.+)? 29 IPV4 (?<![0-9])(?:(?:[0-1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5])[.](?:[0-1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5])[.](?:[0-1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5])[.](?:[0-1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5]))(?![0-9]) 30 IP (?:%{IPV6}|%{IPV4}) 31 HOSTNAME \b(?:[0-9A-Za-z][0-9A-Za-z-]{0,62})(?:\.(?:[0-9A-Za-z][0-9A-Za-z-]{0,62}))*(\.?|\b) 32 IPORHOST (?:%{IP}|%{HOSTNAME}) 33 HOSTPORT %{IPORHOST}:%{POSINT} 34 35 # paths 36 PATH (?:%{UNIXPATH}|%{WINPATH}) 37 UNIXPATH (/([\w_%!$@:.,+~-]+|\\.)*)+ 38 TTY (?:/dev/(pts|tty([pq])?)(\w+)?/?(?:[0-9]+)) 39 WINPATH (?>[A-Za-z]+:|\\)(?:\\[^\\?*]*)+ 40 URIPROTO [A-Za-z]([A-Za-z0-9+\-.]+)+ 41 URIHOST %{IPORHOST}(?::%{POSINT:port})? 42 # uripath comes loosely from RFC1738, but mostly from what Firefox 43 # doesn't turn into %XX 44 URIPATH (?:/[A-Za-z0-9$.+!*'(){},~:;=@#%&_\-]*)+ 45 #URIPARAM \?(?:[A-Za-z0-9]+(?:=(?:[^&]*))?(?:&(?:[A-Za-z0-9]+(?:=(?:[^&]*))?)?)*)? 46 URIPARAM \?[A-Za-z0-9$.+!*'|(){},~@#%&/=:;_?\-\[\]<>]* 47 URIPATHPARAM %{URIPATH}(?:%{URIPARAM})? 48 URI %{URIPROTO}://(?:%{USER}(?::[^@]*)?@)?(?:%{URIHOST})?(?:%{URIPATHPARAM})? 49 50 # Months: January, Feb, 3, 03, 12, December 51 MONTH \b(?:[Jj]an(?:uary|uar)?|[Ff]eb(?:ruary|ruar)?|[Mm](?:a|ä)?r(?:ch|z)?|[Aa]pr(?:il)?|[Mm]a(?:y|i)?|[Jj]un(?:e|i)?|[Jj]ul(?:y)?|[Aa]ug(?:ust)?|[Ss]ep(?:tember)?|[Oo](?:c|k)?t(?:ober)?|[Nn]ov(?:ember)?|[Dd]e(?:c|z)(?:ember)?)\b 52 MONTHNUM (?:0?[1-9]|1[0-2]) 53 MONTHNUM2 (?:0[1-9]|1[0-2]) 54 MONTHDAY (?:(?:0[1-9])|(?:[12][0-9])|(?:3[01])|[1-9]) 55 56 # Days: Monday, Tue, Thu, etc... 57 DAY (?:Mon(?:day)?|Tue(?:sday)?|Wed(?:nesday)?|Thu(?:rsday)?|Fri(?:day)?|Sat(?:urday)?|Sun(?:day)?) 58 59 # Years? 60 YEAR (?>\d\d){1,2} 61 HOUR (?:2[0123]|[01]?[0-9]) 62 MINUTE (?:[0-5][0-9]) 63 # '60' is a leap second in most time standards and thus is valid. 64 SECOND (?:(?:[0-5]?[0-9]|60)(?:[:.,][0-9]+)?) 65 TIME (?!<[0-9])%{HOUR}:%{MINUTE}(?::%{SECOND})(?![0-9]) 66 # datestamp is YYYY/MM/DD-HH:MM:SS.UUUU (or something like it) 67 DATE_US %{MONTHNUM}[/-]%{MONTHDAY}[/-]%{YEAR} 68 DATE_EU %{MONTHDAY}[./-]%{MONTHNUM}[./-]%{YEAR} 69 ISO8601_TIMEZONE (?:Z|[+-]%{HOUR}(?::?%{MINUTE})) 70 ISO8601_SECOND (?:%{SECOND}|60) 71 TIMESTAMP_ISO8601 %{YEAR}-%{MONTHNUM}-%{MONTHDAY}[T ]%{HOUR}:?%{MINUTE}(?::?%{SECOND})?%{ISO8601_TIMEZONE}? 72 DATE %{DATE_US}|%{DATE_EU} 73 DATESTAMP %{DATE}[- ]%{TIME} 74 TZ (?:[APMCE][SD]T|UTC) 75 DATESTAMP_RFC822 %{DAY} %{MONTH} %{MONTHDAY} %{YEAR} %{TIME} %{TZ} 76 DATESTAMP_RFC2822 %{DAY}, %{MONTHDAY} %{MONTH} %{YEAR} %{TIME} %{ISO8601_TIMEZONE} 77 DATESTAMP_OTHER %{DAY} %{MONTH} %{MONTHDAY} %{TIME} %{TZ} %{YEAR} 78 DATESTAMP_EVENTLOG %{YEAR}%{MONTHNUM2}%{MONTHDAY}%{HOUR}%{MINUTE}%{SECOND} 79 80 # Syslog Dates: Month Day HH:MM:SS 81 SYSLOGTIMESTAMP %{MONTH} +%{MONTHDAY} %{TIME} 82 PROG [\x21-\x5a\x5c\x5e-\x7e]+ 83 SYSLOGPROG %{PROG:program}(?:\[%{POSINT:pid}\])? 84 SYSLOGHOST %{IPORHOST} 85 SYSLOGFACILITY <%{NONNEGINT:facility}.%{NONNEGINT:priority}> 86 HTTPDATE %{MONTHDAY}/%{MONTH}/%{YEAR}:%{TIME} %{INT} 87 88 # Shortcuts 89 QS %{QUOTEDSTRING} 90 91 # Log formats 92 SYSLOGBASE %{SYSLOGTIMESTAMP:timestamp} (?:%{SYSLOGFACILITY} )?%{SYSLOGHOST:logsource} %{SYSLOGPROG}: 93 94 # Log Levels 95 LOGLEVEL ([Aa]lert|ALERT|[Tt]race|TRACE|[Dd]ebug|DEBUG|[Nn]otice|NOTICE|[Ii]nfo|INFO|[Ww]arn?(?:ing)?|WARN?(?:ING)?|[Ee]rr?(?:or)?|ERR?(?:OR)?|[Cc]rit?(?:ical)?|CRIT?(?:ICAL)?|[Ff]atal|FATAL|[Ss]evere|SEVERE|EMERG(?:ENCY)?|[Ee]merg(?:ency)?)