Skip to content

Commit be63136

Browse files
committed
Issue 217, 220 Support printf grammars in the syslog decoder
1 parent 9923014 commit be63136

File tree

2 files changed

+165
-25
lines changed

2 files changed

+165
-25
lines changed

syslog/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
44

55
cmake_minimum_required(VERSION 3.0)
6-
project(syslog VERSION 1.0.8 LANGUAGES C)
6+
project(syslog VERSION 1.0.9 LANGUAGES C)
77
set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "Syslog parsers and collectors")
88
set(CPACK_DEBIAN_PACKAGE_DEPENDS "${PACKAGE_PREFIX}-lpeg (>= 1.0.5), ${PACKAGE_PREFIX}-socket (>= 3.0)")
99
string(REGEX REPLACE "[()]" "" CPACK_RPM_PACKAGE_REQUIRES ${CPACK_DEBIAN_PACKAGE_DEPENDS})

syslog/io_modules/decoders/syslog.lua

Lines changed: 164 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -11,16 +11,54 @@ decoders_syslog = {
1111
-- template (string) - The 'template' configuration string from rsyslog.conf
1212
-- see http://rsyslog-5-8-6-doc.neocities.org/rsyslog_conf_templates.html
1313
-- Default:
14-
-- template = "<%PRI%>%TIMESTAMP% %HOSTNAME% %syslogtag:1:32%%msg:::sp-if-no-1st-sp%%msg%" -- RSYSLOG_TraditionalForwardFormat
14+
-- template = "<%PRI%>%TIMESTAMP% %HOSTNAME% %syslogtag:1:32%%msg:::sp-if-no-1st-sp%%msg%", -- RSYSLOG_TraditionalForwardFormat
1515
16-
-- sub_decoders = {
17-
-- _programname_ (string) - Decoder module name or grammar module name
18-
-- kernel = "lpeg.linux.kernel", -- exports an lpeg grammar named 'syslog_grammar'
19-
-- nginx = "decoders.nginx.access", -- decoder module name
20-
-- }
16+
printf_messages = {
17+
-- array (string and/or array) the order specified here is the load and evaluation order.
18+
-- string: name of a module containing a `printf_messages` array to import
19+
-- array: creates an on the fly grammar using a printf format specifications.
20+
-- see: https://mozilla-services.github.io/lua_sandbox_extensions/lpeg/modules/lpeg/printf.html
2121
22-
-- When using sub decoders this stores the original log line in the message payload.
23-
-- payload_keep = false, -- default
22+
{"%s:%lu: invalid line", "path", "linenum"},
23+
"lpeg.openssh_portable", -- must export a `printf_messages` array
24+
},
25+
26+
sub_decoders = {
27+
-- programname_ (string/array)
28+
-- string: decoder or grammar module name
29+
-- array: (string and/or array) list of specific messages to parse
30+
-- string: Sample message used to locate the correct grammar
31+
-- If no grammar matches the sample message then an error is thrown
32+
-- and another grammar or module must be added to the printf_messages
33+
-- configuration. If multiple grammars match the message, the first
34+
-- grammar with the most specific match is selected.
35+
-- Note: a special token of `<<DROP>>` and `<<FAIL>>` are reserved for
36+
-- the last entry in the array to handle the no match case; <<DROP>>
37+
-- silently discards the message and <<FAIL>> reports an error. If
38+
-- neither is specified the default no match behavior is to inject the
39+
-- original message produced by the syslog decoder.
40+
-- array:
41+
-- column 1: (string/array)
42+
-- string: Sample message (see above)
43+
-- array: printf.build_grammar format specification
44+
-- column 2: (table/nil)
45+
-- Transformation table with Heka message field name keys and a
46+
-- value of the fully qualified transformation function name. The
47+
-- function returns no values but can error; it receives two
48+
-- arguments: the Heka message table and the field name to act on.
49+
-- The function can modify the message in any way.
50+
51+
nginx = "decoders.nginx.access", -- decoder module name
52+
kernel = "lpeg.linux.kernel", -- grammar module name, must export an lpeg grammar named 'syslog_grammar'
53+
sshd = {
54+
-- openssh_portable auth message, imported in printf_messages
55+
{"Accepted publickey for foobar from 10.11.12.13 port 4242 ssh2", {remote_addr = "geoip.heka.add_geoip"}},
56+
},
57+
foo = {
58+
"/tmp/input.tsv:23: invalid line", -- custom log defined in printf_messages
59+
{{"Status: %s", "status"}, nil}, -- inline printf spec, no transformation
60+
},
61+
},
2462
}
2563
```
2664
@@ -47,36 +85,134 @@ Decode and inject the resulting message
4785

4886
local module_name = ...
4987
local module_cfg = require "string".gsub(module_name, "%.", "_")
88+
local string = string
5089
local syslog = require "lpeg.syslog"
90+
local printf = require "lpeg.printf"
5191

5292
local cfg = read_config(module_cfg) or {}
5393
assert(type(cfg) == "table", module_cfg .. " must be a table")
5494
local template = cfg.template or "<%PRI%>%TIMESTAMP% %HOSTNAME% %syslogtag:1:32%%msg:::sp-if-no-1st-sp%%msg%"
5595
local grammar = syslog.build_rsyslog_grammar(template)
96+
5697
local sub_decoders = {}
98+
local grammars = nil
99+
if cfg.printf_messages then
100+
grammars = printf.load_messages(cfg.printf_messages)
101+
end
57102

58-
for k,v in pairs(cfg.sub_decoders or {}) do
59-
if type(v) == "string" then
60-
if v:match("^decoders%.") then
61-
local decode = require(v).decode
62-
assert(type(decode) == "function", "sub_decoders, no decode function defined: " .. k)
63-
sub_decoders[k] = decode
64-
else
65-
local grammar = require(v).syslog_grammar
66-
assert(type(grammar) == "userdata", "sub_decoders, no grammar defined: " .. k)
67-
sub_decoders[k] = function(data, dh) -- dh will contain the original parsed syslog message
68-
local fields = grammar:match(data)
69-
if not fields then return "parse failed" end
103+
if not (cfg.payload_keep ~= nil and type(cfg.payload_keep) == "boolean") then
104+
cfg.payload_keep = true
105+
end
106+
107+
108+
local function grammar_decode_fn(g)
109+
return function(data, dh) -- dh will contain the original parsed syslog message
110+
local fields = g:match(data)
111+
if not fields then return "parse failed" end
112+
for k,v in pairs(fields) do
113+
dh.Fields[k] = v
114+
end
115+
inject_message(dh)
116+
end
117+
end
118+
119+
120+
local FAIL_TOKEN = "<<FAIL>>"
121+
local DROP_TOKEN = "<<DROP>>"
122+
local function grammar_pick_fn(sd, nomatch_action)
123+
return function(data, dh) -- dh will contain the original parsed syslog message
124+
local fields
125+
for _,cpg in ipairs(sd) do -- individually check each grammar
126+
fields = cpg[1]:match(data)
127+
if fields then
70128
for k,v in pairs(fields) do
71129
dh.Fields[k] = v
72130
end
73-
inject_message(dh)
131+
if cpg[2] then -- apply user defined transformation functions
132+
for k,f in pairs(cpg[2]) do
133+
f(dh, k)
134+
end
135+
end
136+
break
137+
end
138+
end
139+
if not fields and nomatch_action then
140+
if nomatch_action == DROP_TOKEN then
141+
return
142+
elseif nomatch_action == FAIL_TOKEN then
143+
return "parse failed"
144+
end
145+
end
146+
inject_message(dh)
147+
end
148+
end
149+
150+
151+
for sdk,sd in pairs(cfg.sub_decoders or {}) do
152+
local sdt = type(sd)
153+
if sdt == "string" then
154+
if sd:match("^decoders%.") then
155+
local decode = require(sd).decode
156+
if type(decode) ~= "function" then
157+
string.format("sub_decoders, no decode function defined: %s", sdk)
158+
end
159+
sub_decoders[sdk] = decode
160+
else
161+
local g = require(sd).syslog_grammar
162+
if type(g) ~= "userdata" then
163+
string.format("sub_decoders, no grammar defined: %s", sdk)
164+
end
165+
sub_decoders[sdk] = grammar_decode_fn(g)
166+
end
167+
elseif sdt == "table" then -- cherry pick printf grammars
168+
local nomatch_action
169+
for i,cpg in ipairs(sd) do
170+
if type(cpg) ~= "table" then
171+
cpg = {cpg}
172+
sd[i] = cpg
173+
end
174+
175+
local g
176+
local typ = type(cpg[1])
177+
if typ == "string" then
178+
if (cpg[1] == DROP_TOKEN or cpg[1] == FAIL_TOKEN) and sd[i + 1] == nil then
179+
nomatch_action = cpg[1]
180+
sd[i] = nil
181+
break
182+
end
183+
g = printf.match_sample(grammars, cpg[1])
184+
if not g then
185+
error(string.format("No grammar found for: %s", cpg[1]))
186+
end
187+
elseif typ == "table" then
188+
g = printf.build_grammar(cpg[1])
189+
else
190+
error(string.format("sub_decoder: %s invalid entry: %d", sdk, i))
191+
end
192+
cpg[1] = g
193+
194+
if cpg[2] then
195+
for k,v in pairs(cpg[2]) do
196+
local fn
197+
local mname, fname = string.match(v, "(.-)%.([^.]+)$")
198+
if mname then
199+
fn = require(mname)[fname]
200+
else
201+
fn = _G[cpg[2]]
202+
end
203+
if type(fn) ~= "function" then
204+
error(string.format("Invalid transformation function %s=%s", k, v))
205+
end
206+
cpg[2][k] = fn
207+
end
74208
end
75209
end
210+
sub_decoders[sdk] = grammar_pick_fn(sd, nomatch_action)
76211
else
77-
error("sub_decoder, invalid type: " .. k)
212+
error(string.format("subdecoder: %s invalid type: %s", k, sdt))
78213
end
79214
end
215+
grammars = nil -- free the unused grammars
80216

81217
local pairs = pairs
82218
local type = type
@@ -90,7 +226,7 @@ local msg = {}
90226

91227
function decode(data, dh)
92228
local fields = grammar:match(data)
93-
if not fields then return "parse failed" end
229+
if not fields then return module_name .. " parse failed" end
94230
local programname = ""
95231

96232
if fields.pri then
@@ -150,7 +286,11 @@ function decode(data, dh)
150286
if df then
151287
local payload = msg.Payload
152288
if not cfg.payload_keep then msg.Payload = nil end
153-
return df(payload, msg)
289+
local err = df(payload, msg)
290+
if err then
291+
err = string.format("%s.%s %s", module_name, programname, err)
292+
end
293+
return err
154294
end
155295
inject_message(msg)
156296
end

0 commit comments

Comments
 (0)