Skip to content

Commit 9b44085

Browse files
author
Mike Trinkala
authored
Merge pull request #227 from ameihm0912/hh_cms
heavy hitters implementation with CMS
2 parents 46aeafc + 79ce978 commit 9b44085

File tree

7 files changed

+310
-0
lines changed

7 files changed

+310
-0
lines changed

moz_security/CMakeLists.txt

+2
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
cmake_minimum_required(VERSION 3.0)
66
project(moz-security VERSION 0.0.6 LANGUAGES C)
77
set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "Mozilla Infrastructure Security Analysis")
8+
set(CPACK_DEBIAN_PACKAGE_DEPENDS "${PACKAGE_PREFIX}-streaming-algorithms (>= 0.0.2)")
9+
string(REGEX REPLACE "[()]" "" CPACK_RPM_PACKAGE_REQUIRES ${CPACK_DEBIAN_PACKAGE_DEPENDS})
810
include(sandbox_module)
911

1012
add_test(NAME ${MODULE_NAME}_hindsight
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
-- This Source Code Form is subject to the terms of the Mozilla Public
2+
-- License, v. 2.0. If a copy of the MPL was not distributed with this
3+
-- file, You can obtain one at http://mozilla.org/MPL/2.0/.
4+
5+
--[[
6+
# Mozilla Security Heavy Hitters, CMS + moving average
7+
8+
For events matching the message_matcher, this analysis plugin attempts to calculate typical
9+
request rates while identifying and flagging anomolous request patterns.
10+
11+
Request counts within a given window are stored in a Count-Min sketch data structure; based on
12+
a sample encountered during the window this data structure is used to consult request rates for
13+
given identifiers, where an identifier exceeds a threshold it is added to an analysis list for
14+
submission by the plugin.
15+
16+
## Sample Configuration
17+
```lua
18+
filename = "moz_security_hh_cms.lua"
19+
message_matcher = "Logger == 'input.nginx'"
20+
ticker_interval = 60
21+
preserve_data = false -- This module cannot keep state at this time
22+
23+
id_field = "Fields[remote_addr]" -- field to use as the identifier
24+
-- id_field_capture = ",? *([^,]+)$", -- optional e.g. extract the last entry in a comma delimited list
25+
26+
sample_min_id = 5000 -- Minimum distinct identifers before a sample will be calculated
27+
sample_max_id = 10000 -- Maximum identifiers to use for sample calculation within a window
28+
sample_min_ev = 50000 -- Minimum number of events sampler must consume before calculation
29+
sample_window = 60 -- Sample window size
30+
sample_ticks = 2500 -- Recalculate sample every sample_ticks events
31+
threshold_cap = 10 -- Threshold will be calculated average + (calculated average * cap)
32+
-- cms_epsilon = 1 / 10000 -- optional CMS value for epsilon
33+
-- cms_delta = 0.0001 -- optional CMS value for delta
34+
```
35+
--]]
36+
37+
require "string"
38+
require "table"
39+
40+
local ostime = require "os".time
41+
42+
local sample_min_id = read_config("sample_min_id") or error("sample_min_id must be configured")
43+
local sample_max_id = read_config("sample_max_id") or error("sample_max_id must be configured")
44+
local sample_min_ev = read_config("sample_min_ev") or error("sample_min_ev must be configured")
45+
local sample_window = read_config("sample_window") or error("sample_window must be configured")
46+
local sample_ticks = read_config("sample_ticks") or error ("sample_ticks must be configured")
47+
local threshold_cap = read_config("threshold_cap") or error("threshold_cap must be configured")
48+
local id_field = read_config("id_field") or error("id_field must be configured")
49+
local id_fieldc = read_config("id_field_capture")
50+
local cms_epsilon = read_config("cms_epsilon") or 1 / 10000
51+
local cms_delta = read_config("cms_delta") or 0.0001
52+
53+
local cms = require "streaming_algorithms.cm_sketch".new(cms_epsilon, cms_delta)
54+
55+
local alist = {}
56+
57+
function alist:reset()
58+
self.l = {}
59+
end
60+
61+
function alist:add(i, c)
62+
if not i or not c then
63+
error("analysis list received nil argument")
64+
end
65+
self.l[i] = c
66+
end
67+
68+
function alist:flush(t)
69+
for k,v in pairs(self.l) do
70+
if v < t then self.l[k] = nil end
71+
end
72+
end
73+
74+
local sampler = {}
75+
76+
function sampler:reset()
77+
self.s = {}
78+
self.n = 0
79+
self.start_time = ostime()
80+
self.threshold = 0
81+
self.validtick = 0
82+
self.evcount = 0
83+
end
84+
85+
function sampler:calc()
86+
if self.n < sample_min_id or self.evcount < sample_min_ev then
87+
return
88+
end
89+
if self.validtick < sample_ticks then
90+
return
91+
end
92+
self.validtick = 0
93+
local cnt = 0
94+
local t = 0
95+
for k,v in pairs(self.s) do
96+
t = t + cms:point_query(k)
97+
cnt = cnt + 1
98+
end
99+
self.threshold = t / cnt
100+
self.threshold = self.threshold + (self.threshold * threshold_cap)
101+
-- Remove any elements in the analysis list that no longer conform
102+
-- to the set threshold
103+
alist:flush(self.threshold)
104+
end
105+
106+
function sampler:add(x)
107+
if self.start_time + sample_window < ostime() then
108+
self:reset()
109+
alist:reset()
110+
cms:clear()
111+
end
112+
self.evcount = self.evcount + 1
113+
self.validtick = self.validtick + 1
114+
if self.n >= sample_max_id then
115+
return
116+
end
117+
-- If x is already present in the sample, don't add it again
118+
if self.s[x] then return end
119+
self.s[x] = 1
120+
self.n = self.n + 1
121+
end
122+
123+
sampler:reset()
124+
alist:reset()
125+
126+
function process_message()
127+
local id = read_message(id_field)
128+
if not id then return -1, "no id_field" end
129+
if id_fieldc then
130+
id = string.match(id, id_fieldc)
131+
if not id then return 0 end -- no error as the capture may intentionally reject entries
132+
end
133+
134+
sampler:add(id)
135+
sampler:calc()
136+
local q = cms:update(id)
137+
if sampler.threshold ~= 0 and q > sampler.threshold then
138+
alist:add(id, q)
139+
end
140+
return 0
141+
end
142+
143+
function timer_event(ns)
144+
-- For now, just generate a tsv here but this could be modified to submit violations
145+
-- with a configured confidence to Tigerblood
146+
add_to_payload("sampler_threshold", "\t", sampler.threshold, "\n")
147+
add_to_payload("sampler_size", "\t", sampler.n, "\n")
148+
add_to_payload("sampler_evcount", "\t", sampler.evcount, "\n")
149+
for k,v in pairs(alist.l) do
150+
add_to_payload(k, "\t", v, "\n")
151+
end
152+
inject_payload("tsv", "statistics")
153+
end
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
filename = "moz_security_hh_cms.lua"
2+
message_matcher = "Logger == 'input.hh_cms'"
3+
ticker_interval = 0
4+
5+
id_field = "Fields[id]"
6+
7+
sample_min_id = 100
8+
sample_max_id = 10000
9+
sample_min_ev = 0
10+
sample_window = 60
11+
sample_ticks = 1000
12+
threshold_cap = 2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
filename = "generate_hh_cms.lua"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
-- This Source Code Form is subject to the terms of the Mozilla Public
2+
-- License, v. 2.0. If a copy of the MPL was not distributed with this
3+
-- file, You can obtain one at http://mozilla.org/MPL/2.0/.
4+
5+
--[[
6+
# Generates test data for moz_security_hh_cms
7+
--]]
8+
9+
require "string"
10+
11+
local msg = {
12+
Timestamp = 0,
13+
Logger = "input.hh_cms",
14+
Fields = {
15+
id = "",
16+
}
17+
}
18+
19+
local testtable = {}
20+
21+
function add_tt_entry(k, n)
22+
testtable[k] = {}
23+
testtable[k].max = n
24+
testtable[k].cur = 0
25+
end
26+
27+
function process_message()
28+
p = "10.0.0."
29+
for i = 1, 200 do
30+
add_tt_entry(string.format("%s%s", p, i), 80)
31+
end
32+
p = "10.0.1."
33+
for i = 1, 250 do
34+
add_tt_entry(string.format("%s%s", p, i), 150)
35+
end
36+
p = "10.0.2."
37+
for i = 1, 250 do
38+
add_tt_entry(string.format("%s%s", p, i), 80)
39+
end
40+
p = "10.0.3."
41+
for i = 1, 250 do
42+
add_tt_entry(string.format("%s%s", p, i), 80)
43+
end
44+
p = "192.168.1."
45+
for i = 1, 10 do
46+
add_tt_entry(string.format("%s%s", p, i), 1000)
47+
end
48+
p = "192.168.0."
49+
for i = 1, 20 do
50+
add_tt_entry(string.format("%s%s", p, i), 1500)
51+
end
52+
p = "10.0.4."
53+
for i = 1, 254 do
54+
add_tt_entry(string.format("%s%s", p, i), 80)
55+
end
56+
p = "10.0.5."
57+
for i = 1, 254 do
58+
add_tt_entry(string.format("%s%s", p, i), 60)
59+
end
60+
p = "10.0.6."
61+
for i = 1, 254 do
62+
add_tt_entry(string.format("%s%s", p, i), 95)
63+
end
64+
65+
local finished = false
66+
while not finished do
67+
local sent = false
68+
for k,v in pairs(testtable) do
69+
if v.cur < v.max then
70+
msg.Fields.id = k
71+
inject_message(msg)
72+
v.cur = v.cur + 1
73+
sent = true
74+
end
75+
end
76+
if not sent then
77+
finished = true
78+
end
79+
end
80+
81+
return 0
82+
end
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
filename = "hh_cms_verification.lua"
2+
message_matcher = "Logger == 'analysis.hh_cms'"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
-- This Source Code Form is subject to the terms of the Mozilla Public
2+
-- License, v. 2.0. If a copy of the MPL was not distributed with this
3+
-- file, You can obtain one at http://mozilla.org/MPL/2.0/.
4+
5+
--[[
6+
# Validates the moz_security_hh_cms output
7+
--]]
8+
9+
require "string"
10+
11+
local result =[[sampler_threshold 332.37485649
12+
sampler_size 1742
13+
sampler_evcount 193190
14+
192.168.0.18 1500
15+
192.168.0.5 1500
16+
192.168.1.10 1000
17+
192.168.0.2 1500
18+
192.168.0.6 1500
19+
192.168.1.2 1000
20+
192.168.0.20 1500
21+
192.168.0.11 1500
22+
192.168.0.9 1500
23+
192.168.0.10 1500
24+
192.168.1.6 1000
25+
192.168.1.5 1000
26+
192.168.0.4 1500
27+
192.168.1.8 1000
28+
192.168.1.7 1000
29+
192.168.0.17 1500
30+
192.168.1.4 1000
31+
192.168.1.9 1000
32+
192.168.1.3 1000
33+
192.168.0.14 1500
34+
192.168.0.13 1500
35+
192.168.0.7 1500
36+
192.168.0.8 1500
37+
192.168.0.15 1500
38+
192.168.1.1 1000
39+
192.168.0.3 1500
40+
192.168.0.16 1500
41+
192.168.0.1 1500
42+
192.168.0.19 1500
43+
192.168.0.12 1500
44+
]]
45+
46+
47+
local cnt = 0
48+
function process_message()
49+
local payload = read_message("Payload")
50+
assert(result == payload, payload)
51+
cnt = 1
52+
return 0
53+
end
54+
55+
56+
function timer_event()
57+
assert(cnt == 1, string.format("%d out of 1 tests ran", cnt))
58+
end

0 commit comments

Comments
 (0)