logstash7.13之grok

nginx匹配示例

nginx日志格式
'$remote_user [$time_local]  $http_x_Forwarded_for $remote_addr  $request $status $upstream_status'
                       '$http_x_forwarded_for'
                       '$upstream_addr '
                       'ups_resp_time: $upstream_response_time '
                       'request_time: $request_time';
nginx日志示例
- [09/May/2023:15:01:31 +0800]  11.20.1.30 38.34.246.127  GET / HTTP/1.1 200 -11.20.1.30- ups_resp_time: - request_time: 0.000
grok匹配
filter {
   grok {
       match => {
         "message" => "%{DATA:remote_user} \[%{HTTPDATE:log_times}\]\s+(?:%{IPV4:http_x_Forwarded_for}|-)\s+(?:%{IPV4:remote_addr}|-)\s+%{WORD:request_method}\s+%{URIPATH:uri}(?:%{URIPARAM:params}|) HTTP/%{NUMBER:http_version} %{NUMBER:response_code} (?:%{NUMBER:upstream_status}|-)(?:%{IPV4:http_x_forwarded_for}|-) (?:%{HOSTPORT:upstream_addr}|-) ups_resp_time: (?:%{NUMBER:ups_resp_time}|-) request_time: (?:%{NUMBER:request_time}|-)"
       } 
   }
}
# ?:%{URI:referrer}|-)正则表示如果$referer字段为空,则用"-"表示,若不为空则显示referer的内容。经测试如果直接设置成%{URI:referrer},过滤时当referer为空时,会导致grokfailure,因此需要注意此字段的正则表达式。
匹配后数据
{
    "http_x_Forwarded_for" => "11.20.1.30",
                    "host" => "elk3",
                 "message" => "- [09/May/2023:15:01:31 +0800]  11.20.1.30 38.34.246.127  GET / HTTP/1.1 200 -11.20.1.30- ups_resp_time: - request_time: 0.000",
          "request_method" => "GET",
         "upstream_status" => "-",
           "ups_resp_time" => "-",
            "request_time" => "0.000",
             "remote_user" => "-",
               "log_times" => "09/May/2023:15:01:31 +0800",
           "upstream_addr" => "-",
                "@version" => "1",
              "@timestamp" => 2023-05-09T08:12:35.912Z,
            "http_version" => "1.1",
             "remote_addr" => "38.34.246.127",
    "http_x_forwarded_for" => "11.20.1.30",
                     "uri" => "/",
           "response_code" => "200"
}

 

grok使用格式

%{SYNTAX:SEMANTIC}
%{预定义好的表达式的名字:自定义命名}

内置正则

 1 USERNAME [a-zA-Z0-9._-]+
 2 USER %{USERNAME}
 3 EMAILLOCALPART [a-zA-Z][a-zA-Z0-9_.+-=:]+
 4 EMAILADDRESS %{EMAILLOCALPART}@%{HOSTNAME}
 5 INT (?:[+-]?(?:[0-9]+))
 6 BASE10NUM (?<![0-9.+-])(?>[+-]?(?:(?:[0-9]+(?:\.[0-9]+)?)|(?:\.[0-9]+)))
 7 NUMBER (?:%{BASE10NUM})
 8 BASE16NUM (?<![0-9A-Fa-f])(?:[+-]?(?:0x)?(?:[0-9A-Fa-f]+))
 9 BASE16FLOAT \b(?<![0-9A-Fa-f.])(?:[+-]?(?:0x)?(?:(?:[0-9A-Fa-f]+(?:\.[0-9A-Fa-f]*)?)|(?:\.[0-9A-Fa-f]+)))\b
10 
11 POSINT \b(?:[1-9][0-9]*)\b
12 NONNEGINT \b(?:[0-9]+)\b
13 WORD \b\w+\b
14 NOTSPACE \S+
15 SPACE \s*
16 DATA .*?
17 GREEDYDATA .*
18 QUOTEDSTRING (?>(?<!\\)(?>"(?>\\.|[^\\"]+)+"|""|(?>'(?>\\.|[^\\']+)+')|''|(?>`(?>\\.|[^\\`]+)+`)|``))
19 UUID [A-Fa-f0-9]{8}-(?:[A-Fa-f0-9]{4}-){3}[A-Fa-f0-9]{12}
20 # URN, allowing use of RFC 2141 section 2.3 reserved characters
21 URN urn:[0-9A-Za-z][0-9A-Za-z-]{0,31}:(?:%[0-9a-fA-F]{2}|[0-9A-Za-z()+,.:=@;$_!*'/?#-])+
22 
23 # Networking
24 MAC (?:%{CISCOMAC}|%{WINDOWSMAC}|%{COMMONMAC})
25 CISCOMAC (?:(?:[A-Fa-f0-9]{4}\.){2}[A-Fa-f0-9]{4})
26 WINDOWSMAC (?:(?:[A-Fa-f0-9]{2}-){5}[A-Fa-f0-9]{2})
27 COMMONMAC (?:(?:[A-Fa-f0-9]{2}:){5}[A-Fa-f0-9]{2})
28 IPV6 ((([0-9A-Fa-f]{1,4}:){7}([0-9A-Fa-f]{1,4}|:))|(([0-9A-Fa-f]{1,4}:){6}(:[0-9A-Fa-f]{1,4}|((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){5}(((:[0-9A-Fa-f]{1,4}){1,2})|:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){4}(((:[0-9A-Fa-f]{1,4}){1,3})|((:[0-9A-Fa-f]{1,4})?:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){3}(((:[0-9A-Fa-f]{1,4}){1,4})|((:[0-9A-Fa-f]{1,4}){0,2}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){2}(((:[0-9A-Fa-f]{1,4}){1,5})|((:[0-9A-Fa-f]{1,4}){0,3}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){1}(((:[0-9A-Fa-f]{1,4}){1,6})|((:[0-9A-Fa-f]{1,4}){0,4}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(:(((:[0-9A-Fa-f]{1,4}){1,7})|((:[0-9A-Fa-f]{1,4}){0,5}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:)))(%.+)?
29 IPV4 (?<![0-9])(?:(?:[0-1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5])[.](?:[0-1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5])[.](?:[0-1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5])[.](?:[0-1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5]))(?![0-9])
30 IP (?:%{IPV6}|%{IPV4})
31 HOSTNAME \b(?:[0-9A-Za-z][0-9A-Za-z-]{0,62})(?:\.(?:[0-9A-Za-z][0-9A-Za-z-]{0,62}))*(\.?|\b)
32 IPORHOST (?:%{IP}|%{HOSTNAME})
33 HOSTPORT %{IPORHOST}:%{POSINT}
34 
35 # paths
36 PATH (?:%{UNIXPATH}|%{WINPATH})
37 UNIXPATH (/([\w_%!$@:.,+~-]+|\\.)*)+
38 TTY (?:/dev/(pts|tty([pq])?)(\w+)?/?(?:[0-9]+))
39 WINPATH (?>[A-Za-z]+:|\\)(?:\\[^\\?*]*)+
40 URIPROTO [A-Za-z]([A-Za-z0-9+\-.]+)+
41 URIHOST %{IPORHOST}(?::%{POSINT:port})?
42 # uripath comes loosely from RFC1738, but mostly from what Firefox
43 # doesn't turn into %XX
44 URIPATH (?:/[A-Za-z0-9$.+!*'(){},~:;=@#%&_\-]*)+
45 #URIPARAM \?(?:[A-Za-z0-9]+(?:=(?:[^&]*))?(?:&(?:[A-Za-z0-9]+(?:=(?:[^&]*))?)?)*)?
46 URIPARAM \?[A-Za-z0-9$.+!*'|(){},~@#%&/=:;_?\-\[\]<>]*
47 URIPATHPARAM %{URIPATH}(?:%{URIPARAM})?
48 URI %{URIPROTO}://(?:%{USER}(?::[^@]*)?@)?(?:%{URIHOST})?(?:%{URIPATHPARAM})?
49 
50 # Months: January, Feb, 3, 03, 12, December
51 MONTH \b(?:[Jj]an(?:uary|uar)?|[Ff]eb(?:ruary|ruar)?|[Mm](?:a|ä)?r(?:ch|z)?|[Aa]pr(?:il)?|[Mm]a(?:y|i)?|[Jj]un(?:e|i)?|[Jj]ul(?:y)?|[Aa]ug(?:ust)?|[Ss]ep(?:tember)?|[Oo](?:c|k)?t(?:ober)?|[Nn]ov(?:ember)?|[Dd]e(?:c|z)(?:ember)?)\b
52 MONTHNUM (?:0?[1-9]|1[0-2])
53 MONTHNUM2 (?:0[1-9]|1[0-2])
54 MONTHDAY (?:(?:0[1-9])|(?:[12][0-9])|(?:3[01])|[1-9])
55 
56 # Days: Monday, Tue, Thu, etc...
57 DAY (?:Mon(?:day)?|Tue(?:sday)?|Wed(?:nesday)?|Thu(?:rsday)?|Fri(?:day)?|Sat(?:urday)?|Sun(?:day)?)
58 
59 # Years?
60 YEAR (?>\d\d){1,2}
61 HOUR (?:2[0123]|[01]?[0-9])
62 MINUTE (?:[0-5][0-9])
63 # '60' is a leap second in most time standards and thus is valid.
64 SECOND (?:(?:[0-5]?[0-9]|60)(?:[:.,][0-9]+)?)
65 TIME (?!<[0-9])%{HOUR}:%{MINUTE}(?::%{SECOND})(?![0-9])
66 # datestamp is YYYY/MM/DD-HH:MM:SS.UUUU (or something like it)
67 DATE_US %{MONTHNUM}[/-]%{MONTHDAY}[/-]%{YEAR}
68 DATE_EU %{MONTHDAY}[./-]%{MONTHNUM}[./-]%{YEAR}
69 ISO8601_TIMEZONE (?:Z|[+-]%{HOUR}(?::?%{MINUTE}))
70 ISO8601_SECOND (?:%{SECOND}|60)
71 TIMESTAMP_ISO8601 %{YEAR}-%{MONTHNUM}-%{MONTHDAY}[T ]%{HOUR}:?%{MINUTE}(?::?%{SECOND})?%{ISO8601_TIMEZONE}?
72 DATE %{DATE_US}|%{DATE_EU}
73 DATESTAMP %{DATE}[- ]%{TIME}
74 TZ (?:[APMCE][SD]T|UTC)
75 DATESTAMP_RFC822 %{DAY} %{MONTH} %{MONTHDAY} %{YEAR} %{TIME} %{TZ}
76 DATESTAMP_RFC2822 %{DAY}, %{MONTHDAY} %{MONTH} %{YEAR} %{TIME} %{ISO8601_TIMEZONE}
77 DATESTAMP_OTHER %{DAY} %{MONTH} %{MONTHDAY} %{TIME} %{TZ} %{YEAR}
78 DATESTAMP_EVENTLOG %{YEAR}%{MONTHNUM2}%{MONTHDAY}%{HOUR}%{MINUTE}%{SECOND}
79 
80 # Syslog Dates: Month Day HH:MM:SS
81 SYSLOGTIMESTAMP %{MONTH} +%{MONTHDAY} %{TIME}
82 PROG [\x21-\x5a\x5c\x5e-\x7e]+
83 SYSLOGPROG %{PROG:program}(?:\[%{POSINT:pid}\])?
84 SYSLOGHOST %{IPORHOST}
85 SYSLOGFACILITY <%{NONNEGINT:facility}.%{NONNEGINT:priority}>
86 HTTPDATE %{MONTHDAY}/%{MONTH}/%{YEAR}:%{TIME} %{INT}
87 
88 # Shortcuts
89 QS %{QUOTEDSTRING}
90 
91 # Log formats
92 SYSLOGBASE %{SYSLOGTIMESTAMP:timestamp} (?:%{SYSLOGFACILITY} )?%{SYSLOGHOST:logsource} %{SYSLOGPROG}:
93 
94 # Log Levels
95 LOGLEVEL ([Aa]lert|ALERT|[Tt]race|TRACE|[Dd]ebug|DEBUG|[Nn]otice|NOTICE|[Ii]nfo|INFO|[Ww]arn?(?:ing)?|WARN?(?:ING)?|[Ee]rr?(?:or)?|ERR?(?:OR)?|[Cc]rit?(?:ical)?|CRIT?(?:ICAL)?|[Ff]atal|FATAL|[Ss]evere|SEVERE|EMERG(?:ENCY)?|[Ee]merg(?:ency)?)

 

posted @ 2023-05-09 17:49  yxy_linux  阅读(61)  评论(0编辑  收藏  举报