(一)简介:

      丰富的过滤器插件的存在是 logstash 威力如此强大的重要因素。名为过滤器,其实提供的不单单是过滤的功能,它们扩展了进入过滤器的原始数据,进行复杂的逻辑处理,甚至可以无中生有的添加新的 logstash 事件到后续的流程中去!

     Grok 是 Logstash 最重要的插件。你可以在 grok 里预定义好命名正则表达式,在稍后(grok参数或者其他正则表达式里)引用它。

    大多数Linux使用人员都有过用正则表达式来查询机器中相关文件或文件里内容的经历,在Grok里,我们也是使用正则表达式来识别日志里的相关数据块。

有两种方式来使用正则表达式:

  1. 直接写正则来匹配

  2. 用Grok表达式映射正则来匹配

重要提示:Grok表达式很像C语言里的宏定义


(二)grok语法

grok表达式的打印复制格式的完整语法是下面这样的:

1
%{PATTERN_NAME:capture_name:data_type}

小贴士:data_type 目前只支持两个值:int 和 float

在线gork正则的地址:http://grokdebug.herokuapp.com/ 
Logstash基础正则地址:https://github.com/elastic/logstash/blob/v1.4.2/patterns/grok-patterns 


也可以在你的安装路径下查找grok-patterns内置的正则表达式:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
[root@localhost patterns] # cat /usr/local/logstash/vendor/bundle/jruby/1.9/gems/logstash-patterns-core-2.0.2/patterns/grok-patterns 
 
USERNAME [a-zA-Z0-9._-]+
USER %{USERNAME}
EMAILLOCALPART [a-zA-Z][a-zA-Z0-9_.+-=:]+
EMAILADDRESS %{EMAILLOCALPART}@%{HOSTNAME}
HTTPDUSER %{EMAILADDRESS}|%{USER}
INT (?:[+-]?(?:[0-9]+))
BASE10NUM (?<![0-9.+-])(?>[+-]?(?:(?:[0-9]+(?:\.[0-9]+)?)|(?:\.[0-9]+)))
NUMBER (?:%{BASE10NUM})
BASE16NUM (?<![0-9A-Fa-f])(?:[+-]?(?:0x)?(?:[0-9A-Fa-f]+))
BASE16FLOAT \b(?<![0-9A-Fa-f.])(?:[+-]?(?:0x)?(?:(?:[0-9A-Fa-f]+(?:\.[0-9A-Fa-f]*)?)|(?:\.[0-9A-Fa-f]+)))\b
POSINT \b(?:[1-9][0-9]*)\b
NONNEGINT \b(?:[0-9]+)\b
WORD \b\w+\b
NOTSPACE \S+
SPACE \s*
DATA .*?
GREEDYDATA .*
QUOTEDSTRING (?>(?<!\\)(?> "(?>\\.|[^\\" ]+)+ "|" "|(?> '(?>\\.|[^\\' ]+)+ ')|' '|(?>`(?>\\.|[^\\`]+)+`)|``))
UUID [A-Fa-f0-9]{8}-(?:[A-Fa-f0-9]{4}-){3}[A-Fa-f0-9]{12}
# Networking
MAC (?:%{CISCOMAC}|%{WINDOWSMAC}|%{COMMONMAC})
CISCOMAC (?:(?:[A-Fa-f0-9]{4}\.){2}[A-Fa-f0-9]{4})
WINDOWSMAC (?:(?:[A-Fa-f0-9]{2}-){5}[A-Fa-f0-9]{2})
COMMONMAC (?:(?:[A-Fa-f0-9]{2}:){5}[A-Fa-f0-9]{2})
IPV6 ((([0-9A-Fa-f]{1,4}:){7}([0-9A-Fa-f]{1,4}|:))|(([0-9A-Fa-f]{1,4}:){6}(:[0-9A-Fa-f]{1,4}|((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){5}(((:[0-9A-Fa-f]{1,4}){1,2})|:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){4}(((:[0-9A-Fa-f]{1,4}){1,3})|((:[0-9A-Fa-f]{1,4})?:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){3}(((:[0-9A-Fa-f]{1,4}){1,4})|((:[0-9A-Fa-f]{1,4}){0,2}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){2}(((:[0-9A-Fa-f]{1,4}){1,5})|((:[0-9A-Fa-f]{1,4}){0,3}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){1}(((:[0-9A-Fa-f]{1,4}){1,6})|((:[0-9A-Fa-f]{1,4}){0,4}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(:(((:[0-9A-Fa-f]{1,4}){1,7})|((:[0-9A-Fa-f]{1,4}){0,5}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:)))(%.+)?
IPV4 (?<![0-9])(?:(?:[0-1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5])[.](?:[0-1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5])[.](?:[0-1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5])[.](?:[0-1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5]))(?![0-9])
IP (?:%{IPV6}|%{IPV4})
HOSTNAME \b(?:[0-9A-Za-z][0-9A-Za-z-]{0,62})(?:\.(?:[0-9A-Za-z][0-9A-Za-z-]{0,62}))*(\.?|\b)
IPORHOST (?:%{IP}|%{HOSTNAME})
HOSTPORT %{IPORHOST}:%{POSINT}
# paths
PATH (?:%{UNIXPATH}|%{WINPATH})
UNIXPATH (/([\w_%!$@:.,~-]+|\\.)*)+
TTY (?: /dev/ (pts| tty ([pq])?)(\w+)?/?(?:[0-9]+))
WINPATH (?>[A-Za-z]+:|\\)(?:\\[^\\?*]*)+
URIPROTO [A-Za-z]+(\+[A-Za-z+]+)?
URIHOST %{IPORHOST}(?::%{POSINT:port})?
# uripath comes loosely from RFC1738, but mostly from what Firefox
# doesn't turn into %XX
URIPATH (?:/[A-Za-z0-9$.+!*'(){},~:;=@ #%_\-]*)+
#URIPARAM \?(?:[A-Za-z0-9]+(?:=(?:[^&]*))?(?:&(?:[A-Za-z0-9]+(?:=(?:[^&]*))?)?)*)?
URIPARAM \?[A-Za-z0-9$.+!*'|(){},~@ #%&/=:;_?\-\[\]<>]*
URIPATHPARAM %{URIPATH}(?:%{URIPARAM})?
URI %{URIPROTO}: // (?:%{USER}(?::[^@]*)?@)?(?:%{URIHOST})?(?:%{URIPATHPARAM})?
# Months: January, Feb, 3, 03, 12, December
MONTH \b(?:Jan(?:uary|uar)?|Feb(?:ruary|ruar)?|M(?:a|)?r(?:ch|z)?|Apr(?:il)?|Ma(?:y|i)?|Jun(?:e|i)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|O(?:c|k)?t(?:ober)?|Nov(?:ember)?|De(?:c|z)(?:ember)?)\b
MONTHNUM (?:0?[1-9]|1[0-2])
MONTHNUM2 (?:0[1-9]|1[0-2])
MONTHDAY (?:(?:0[1-9])|(?:[12][0-9])|(?:3[01])|[1-9])
# Days: Monday, Tue, Thu, etc...
DAY (?:Mon(?:day)?|Tue(?:sday)?|Wed(?:nesday)?|Thu(?:rsday)?|Fri(?:day)?|Sat(?:urday)?|Sun(?:day)?)
# Years?
YEAR (?>\d\d){1,2}
HOUR (?:2[0123]|[01]?[0-9])
MINUTE (?:[0-5][0-9])
# '60' is a leap second in most time standards and thus is valid.
SECOND (?:(?:[0-5]?[0-9]|60)(?:[:.,][0-9]+)?)
TIME (?!<[0-9])%{HOUR}:%{MINUTE}(?::%{SECOND})(?![0-9])
# datestamp is YYYY/MM/DD-HH:MM:SS.UUUU (or something like it)
DATE_US %{MONTHNUM}[ /- ]%{MONTHDAY}[ /- ]%{YEAR}
DATE_EU %{MONTHDAY}[. /- ]%{MONTHNUM}[. /- ]%{YEAR}
ISO8601_TIMEZONE (?:Z|[+-]%{HOUR}(?::?%{MINUTE}))
ISO8601_SECOND (?:%{SECOND}|60)
TIMESTAMP_ISO8601 %{YEAR}-%{MONTHNUM}-%{MONTHDAY}[T ]%{HOUR}:?%{MINUTE}(?::?%{SECOND})?%{ISO8601_TIMEZONE}?
DATE %{DATE_US}|%{DATE_EU}
DATESTAMP %{DATE}[- ]%{TIME}
TZ (?:[PMCE][SD]T|UTC)
DATESTAMP_RFC822 %{DAY} %{MONTH} %{MONTHDAY} %{YEAR} %{TIME} %{TZ}
DATESTAMP_RFC2822 %{DAY}, %{MONTHDAY} %{MONTH} %{YEAR} %{TIME} %{ISO8601_TIMEZONE}
DATESTAMP_OTHER %{DAY} %{MONTH} %{MONTHDAY} %{TIME} %{TZ} %{YEAR}
DATESTAMP_EVENTLOG %{YEAR}%{MONTHNUM2}%{MONTHDAY}%{HOUR}%{MINUTE}%{SECOND}
HTTPDERROR_DATE %{DAY} %{MONTH} %{MONTHDAY} %{TIME} %{YEAR}
# Syslog Dates: Month Day HH:MM:SS
SYSLOGTIMESTAMP %{MONTH} +%{MONTHDAY} %{TIME}
PROG [\x21-\x5a\x5c\x5e-\x7e]+
SYSLOGPROG %{PROG:program}(?:\[%{POSINT:pid}\])?
SYSLOGHOST %{IPORHOST}
SYSLOGFACILITY <%{NONNEGINT:facility}.%{NONNEGINT:priority}>
HTTPDATE %{MONTHDAY}/%{MONTH}/%{YEAR}:%{TIME} %{INT}
# Shortcuts
QS %{QUOTEDSTRING}
# Log formats
SYSLOGBASE %{SYSLOGTIMESTAMP:timestamp} (?:%{SYSLOGFACILITY} )?%{SYSLOGHOST:logsource} %{SYSLOGPROG}:
COMMONAPACHELOG %{IPORHOST:clientip} %{HTTPDUSER:ident} %{USER:auth} \[%{HTTPDATE:timestamp}\]  "(?:%{WORD:verb} %{NOTSPACE:request}(?: HTTP/%{NUMBER:httpversion})?|%{DATA:rawrequest})"  %{NUMBER:response} (?:%{NUMBER:bytes}|-)
COMBINEDAPACHELOG %{COMMONAPACHELOG} %{QS:referrer} %{QS:agent}
HTTPD20_ERRORLOG \[%{HTTPDERROR_DATE:timestamp}\] \[%{LOGLEVEL:loglevel}\] (?:\[client %{IPORHOST:clientip}\] ){0,1}%{GREEDYDATA:errormsg}
HTTPD24_ERRORLOG \[%{HTTPDERROR_DATE:timestamp}\] \[%{WORD:module}:%{LOGLEVEL:loglevel}\] \[pid %{POSINT:pid}:tid %{NUMBER:tid}\]( \(%{POSINT:proxy_errorcode}\)%{DATA:proxy_errormessage}:)?( \[client %{IPORHOST:client}:%{POSINT:clientport}\])? %{DATA:errorcode}: %{GREEDYDATA:message}
HTTPD_ERRORLOG %{HTTPD20_ERRORLOG}|%{HTTPD24_ERRORLOG}
# Log Levels
LOGLEVEL ([Aa]lert|ALERT|[Tt]race|TRACE|[Dd]ebug|DEBUG|[Nn]otice|NOTICE|[Ii]nfo|INFO|[Ww]arn?(?:ing)?|WARN?(?:ING)?|[Ee]rr?(?:or)?|ERR?(?:OR)?|[Cc]rit?(?:ical)?|CRIT?(?:ICAL)?|[Ff]atal|FATAL|[Ss]evere|SEVERE|EMERG(?:ENCY)?|[Ee]merg(?:ency)?)
[root@localhost patterns] #

(三)常用的表达式说明:


 (1),USERNAME或USER。用户名或用户,由数字.大小写字母.下划线及特殊字符(._-)组成的字符串

1
2
USERNAME [a-zA-Z0-9._-]+
USER %{USERNAME}

第一行,用普通的正则表达式来定义一个 grok 表达式;第二行,通过打印赋值格式,用前面定义好的 grok 表达式来定义另一个 grok 表达式。

1
eg:123,Alice,liqb 等等。

 (2),EMAILLOCALPART。电子邮件用户名部分,首位由大小写字母组成,其他部分是由大小写字母.数字及特殊字符(_.+-=:)组成的字符串(备注:注意,国内的QQ纯数字邮箱账号是无法匹配的,需要修改正则)。

1
EMAILLOCALPART [a-zA-Z][a-zA-Z0-9_.+-=:]+
1
eg: lqb,Grace_li,abc-wang

(3),EMAILADDRESS。电子邮件地址:定义了电子邮件用户名:%{EMAILLOCALPART},定义了主机名:%{HOSTNAME}

1
EMAILADDRESS  %{EMAILLOCALPART}@%{HOSTNAME}
1
eg:alice@yahoo.cn,alice@126.com,abc-123@qq.com

 (4),HTTPDUSER。定义了apache服务器的用户,可以是EMAILADDRESS 或 USER

1
HTTPDUSER %{EMAILADDRESS} | %{USER}


 (5) , INT :整数,包括0和正负整数

1
INT (?:[+-]?(?:[0-9]+))
1
eg:0 -123 123 2345

 (6),BASE10NUM 或NUMBER :十进制数字,包括整数和小数。

1
2
BASE10NUM (?<![0-9.+-])(?>[+-]?(?:(?:[0-9]+(?:\.[0-9]+)?)|(?:\.[0-9]+)))
eg:11   33 3.14

 (7),BASE16NUM:十六进制数字,整数

1
2
BASE16NUM (?<![0-9A-Fa-f])(?:[+-]?(?:0x)?(?:[0-9A-Fa-f]+))
eg:0x0045fa2d、-0x3F8709

(8),BASE16FLOAT:十六进制数字,整数和小数

1
BASE16FLOAT \b(?<![0-9A-Fa-f.])(?:[+-]?(?:0x)?(?:(?:[0-9A-Fa-f]+(?:\.[0-9A-Fa-f]*)?)|(?:\.[0-9A-Fa-f]+)))\b

(9),WORD:字符串,包括数字和大小写字母。

1
2
WORD \b\w+\b
eg:String .  34128   Ilove YOU

\b:匹配一个单词边界,也就是指单词和空格间的位置。例如, 'er\b' 可以匹配"never" 中的 'er',但不能匹配 "verb" 中的 'er'。

\s:匹配包括下划线的任何单词字符。等价于'[A-Za-z0-9_]'。

(10),NOTSPACE:不带任何空格的字符串

1
NOTSPACE \S+

\S:匹配任何非空白字符。等价于 [^ \f\n\r\t\v]。

(11),SPACE,空格字符串

1
SPACE \s*

\s:匹配任何空白字符,包括空格、制表符、换页符等等。等价于 [ \f\n\r\t\v]。

(12),QUOTEDSTRING 或QS:带引号的字符串。

1
2
QUOTEDSTRING (?>(?<!\\)(?> "(?>\\.|[^\\" ]+)+ "|" "|(?> '(?>\\.|[^\\' ]+)+ ')|' '|(?>`(?>\\.|[^\\`]+)+`)|``))
eg: "this is an apple"  "hello world"

(13),UUID:标准的uuid

1
2
UUID [A-Fa-f0-9]{8}-(?:[A-Fa-f0-9]{4}-){3}[A-Fa-f0-9]{12}
eg:b808962d-8f1a-4347-8389-5516d10e875a

(14),MAC:MAC 地址,可以是思科设备里得mac地址,也可以是windows里得mac地址

1
2
MAC (?:%{CISCOMAC}|%{WINDOWSMAC}|%{COMMONMAC})
eg:00:15:5D:6E:28:13

(15),IP :IP地址,可以是IPv4或IPv6地址

1
2
3
IPV6 ((([0-9A-Fa-f]{1,4}:){7}([0-9A-Fa-f]{1,4}|:))|(([0-9A-Fa-f]{1,4}:){6}(:[0-9A-Fa-f]{1,4}|((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){5}(((:[0-9A-Fa-f]{1,4}){1,2})|:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){4}(((:[0-9A-Fa-f]{1,4}){1,3})|((:[0-9A-Fa-f]{1,4})?:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){3}(((:[0-9A-Fa-f]{1,4}){1,4})|((:[0-9A-Fa-f]{1,4}){0,2}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){2}(((:[0-9A-Fa-f]{1,4}){1,5})|((:[0-9A-Fa-f]{1,4}){0,3}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){1}(((:[0-9A-Fa-f]{1,4}){1,6})|((:[0-9A-Fa-f]{1,4}){0,4}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(:(((:[0-9A-Fa-f]{1,4}){1,7})|((:[0-9A-Fa-f]{1,4}){0,5}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:)))(%.+)?
IPV4 (?<![0-9])(?:(?:[0-1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5])[.](?:[0-1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5])[.](?:[0-1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5])[.](?:[0-1]?[0-9]{1,2}|2[0-4][0-9]|25[0-5]))(?![0-9])
IP (?:%{IPV6}|%{IPV4})

(16),HOSTNAME:主机名称

1
HOSTNAME \b(?:[0-9A-Za-z][0-9A-Za-z-]{0,62})(?:\.(?:[0-9A-Za-z][0-9A-Za-z-]{0,62}))*(\.?|\b)

(17)IPORHOST:ip或者主机名称

1
IPORHOST (?:%{IP}|%{HOSTNAME})

(18),HOSTPORT :主机名(IP)+端口

1
2
HOSTPORT %{IPORHOST}:%{POSINT}
eg:192.168.180.21:3306

(19),PATH:UNIX系统或windows系统里得路径模式

1
2
PATH (?:%{UNIXPATH}|%{WINPATH})
eg:  /usr/local/tomcat/bin/startuo .sh     D:\upload_crm\startup.bat

(20),URIPROTO:URL协议

1
2
URIPROTO [A-Za-z]+(\+[A-Za-z+]+)?
eg:http   ftp   tcp  udp

(21),URIHOST:URL主机

1
2
URIHOST %{IPORHOST}(?::%{POSINT:port})?
eg:www.baidu.com  121.242.156.210:24444

(22),URIPATH:URL路径

1
2
URIPATH (?:/[A-Za-z0-9$.+!*'(){},~:;=@ #%_\-]*)+
eg:http: //udn .yyuap.com /doc/logstash-best-practice-cn/input/stdin .html

(23),URIPARAM :URL里得GET参数

1
2
URIPARAM \?[A-Za-z0-9$.+!*'|(){},~@ #%&/=:;_?\-\[\]<>]*
eg:?a=1&b=2&c=3

(24)URIPATHPARAM:URL路径+GET参数

1
URIPATHPARAM %{URIPATH}(?:%{URIPARAM})?

(25)URI:完整的URL路径

URI %{URIPROTO}://(?:%{USER}(?::[^@]*)?@)?(?:%{URIHOST})?(?:%{URIPATHPARAM})?


日期表达式:

(26)MONTH:月份名称

1
2
MONTH \b(?:Jan(?:uary|uar)?|Feb(?:ruary|ruar)?|M(?:a|)?r(?:ch|z)?|Apr(?:il)?|Ma(?:y|i)?|Jun(?:e|i)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|O(?:c|k)?t(?:ober)?|Nov(?:ember)?|De(?:c|z)(?:ember)?)\b
eg:jan january  Nov November

(27),MONTHNUM:月份数字

1
2
MONTHNUM (?:0?[1-9]|1[0-2])
eg:03 3 12

(28),MONTHDAY:日期数字

1
2
MONTHDAY (?:(?:0[1-9])|(?:[12][0-9])|(?:3[01])|[1-9])
eg: 03 9  31

(29),DAY :星期几名称

1
2
DAY (?:Mon(?:day)?|Tue(?:sday)?|Wed(?:nesday)?|Thu(?:rsday)?|Fri(?:day)?|Sat(?:urday)?|Sun(?:day)?)
eg   MON   MONDAY  Tue  Tuesday

(30),YEAR:年份数字

1
2
YEAR (?>\d\d){1,2}
eg  2012 2017

(31),小时数字

1
HOUR (?:2[0123]|[01]?[0-9])

(32),MINUTE :分钟数字

1
MINUTE (?:[0-5][0-9])

(33),SECOND:秒数字

1
SECOND (?:(?:[0-5]?[0-9]|60)(?:[:.,][0-9]+)?)

(34),TIME:时间

1
2
TIME (?!<[0-9])%{HOUR}:%{MINUTE}(?::%{SECOND})(?![0-9])
eg 01:00:45

(35),DATE_US:美国日期格式

1
2
DATE_US %{MONTHNUM}[ /- ]%{MONTHDAY}[ /- ]%{YEAR}
eg:10 /12/1986

(36),DATE_EU:欧洲日期格式

1
2
DATE_EU %{MONTHDAY}[. /- ]%{MONTHNUM}[. /- ]%{YEAR}
eg:15-10-1986

(37),ISO8601_TIMEZONE:ISO8601时间格式

1
2
ISO8601_TIMEZONE (?:Z|[+-]%{HOUR}(?::?%{MINUTE}))
eg:+15:19   -15:19

(38),TIMESTAMP_ISO8601:ISO8601时间戳格式

1
TIMESTAMP_ISO8601 %{YEAR}-%{MONTHNUM}-%{MONTHDAY}[T ]%{HOUR}:?%{MINUTE}(?::?%{SECOND})?%{ISO8601_TIMEZONE}?


(39),DATE :日期,美国日期或者欧洲日期

1
DATE %{DATE_US}|%{DATE_EU}

(40)DATESTAMP完整日期+时间

1
DATESTAMP %{DATE}[- ]%{TIME}

(41),http默认日期格式

1
HTTPDATE %{MONTHDAY}/%{MONTH}/%{YEAR}:%{TIME} %{INT}

(42),LOGLEVEL :日志等级

1
LOGLEVEL ([Aa]lert|ALERT|[Tt]race|TRACE|[Dd]ebug|DEBUG|[Nn]otice|NOTICE|[Ii]nfo|INFO|[Ww]arn?(?:ing)?|WARN?(?:ING)?|[Ee]rr?(?:or)?|ERR?(?:OR)?|[Cc]rit?(?:ical)?|CRIT?(?:ICAL)?|[Ff]atal|FATAL|[Ss]evere|SEVERE|EMERG(?:ENCY)?|[Ee]merg(?:ency)?)


(四).grok正则捕获

1
2
3
4
5
6
7
8
(? #...) 否 注释,抛弃
(?:...) 是 只集群,不捕获的圆括弧
命名分组格式为(?<grp name>)    
命名分组的匹配的结果存在在变量%+变量中,取命名分组值,$+{grp name}.    
数字 [0-9]  \d  
\d+  
空白  [\t\n\r\f] \s  
词  [a-zA-Z_0-9] \w
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
[root@localhost  test ] # vim  grok.conf     
input {stdin {}}
  filter {
   grok {
    match =>{
    "message"  => "\s+(?<request_time>\d+(?:\.\d+)?)\s+"
       }
   }
}
output {
         stdout {
                         codec => rubydebug
                 }
}
[root@localhost logstash] # /usr/local/logstash/bin/logstash -f test/grok.conf 
Settings: Default pipeline workers: 1
Logstash startup completed
  begin 123.45678 end
{
          "message"  =>  " begin 123.45678 end" ,
         "@version"  =>  "1" ,
       "@timestamp"  =>  "2017-05-26T02:30:22.884Z" ,
             "host"  =>  "localhost.localdomain" ,
     "request_time"  =>  "123.45678"
}

(2)使用grok正则语法

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
[root@localhost  test ] # vim grok_match.conf 
input {stdin {}}
  filter {
   grok {
    match =>{
    "message"  => "\s+(?<request_time>\d+(?:\.\d+)?)\s+"
       }
   }
}
filter {
     grok {
         match => {
             "message"  =>  "%{WORD} %{NUMBER:request_time:float} %{WORD}"
         }
     }
}
output {
         stdout {
                         codec => rubydebug
                 }
}
[root@localhost logstash] # /usr/local/logstash/bin/logstash -f test/grok_match.
conf 
begin 123.4321 end
{
          "message"  =>  "begin 123.4321 end" ,
         "@version"  =>  "1" ,
       "@timestamp"  =>  "2017-05-26T02:41:26.719Z" ,
             "host"  =>  "localhost.localdomain" ,
     "request_time"  => [
         [0]  "123.4321" ,
         [1] 123.4321
     ]
}
begin 2231 lqb
{
          "message"  =>  "begin 2231 lqb" ,
         "@version"  =>  "1" ,
       "@timestamp"  =>  "2017-05-26T02:39:33.826Z" ,
             "host"  =>  "localhost.localdomain" ,
     "request_time"  => [
         [0]  "2231" ,
         [1] 2231.0
     ]
}