Skip to content

Commit 81dc58f

Browse files
author
Mike Trinkala
authored
Merge pull request #191 from mozilla-services/1423309
Bugzilla 1423309 - Add the submission and user agent fields
2 parents 8f8cce8 + b7c8167 commit 81dc58f

File tree

7 files changed

+40
-12
lines changed

7 files changed

+40
-12
lines changed

moz_ingest/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
44

55
cmake_minimum_required(VERSION 3.0)
6-
project(moz-ingest VERSION 0.0.2 LANGUAGES C)
6+
project(moz-ingest VERSION 0.0.3 LANGUAGES C)
77
set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "Mozilla Nginx Ingestion Data Processing")
88
set(CPACK_DEBIAN_PACKAGE_DEPENDS "${PACKAGE_PREFIX}-rjson (>= 1.1.1), ${PACKAGE_PREFIX}-lpeg (>= 1.0.0), ${PACKAGE_PREFIX}-lfs (>= 1.6.4), ${PACKAGE_PREFIX}-zlib (>= 0.3.1)")
99
string(REGEX REPLACE "[()]" "" CPACK_RPM_PACKAGE_REQUIRES ${CPACK_DEBIAN_PACKAGE_DEPENDS})

moz_ingest/io_modules/decoders/moz_ingest/json.lua

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
-- file, You can obtain one at http://mozilla.org/MPL/2.0/.
44

55
--[[
6-
# Mozilla Telemetry Decoder Module
6+
# Mozilla Telemetry JSON Decoder Module
77
88
## Decoder Configuration Table
99
```lua
@@ -17,6 +17,16 @@ decoders_moz_ingest_json = {
1717
1818
-- array of namespace directories to ignore
1919
-- namespace_ignore = {"heka", "metadata", "pioneer-study", "telemetry"},
20+
21+
-- Transform the User-Agent header into user_agent_browser, user_agent_version, user_agent_os.
22+
-- user_agent_transform = false, -- default
23+
24+
-- Always preserve the User-Agent header if transform is enabled.
25+
-- user_agent_keep = false, -- default
26+
27+
-- Only preserve the User-Agent header if transform is enabled and fails.
28+
-- user_agent_conditional = false, -- default
29+
2030
}
2131
```
2232
@@ -52,6 +62,7 @@ local module_cfg = string.gsub(module_name, "%.", "_")
5262
local rjson = require "rjson"
5363
local miu = require "moz_ingest.util"
5464
local lfs = require "lfs"
65+
local clf = require "lpeg.common_log_format"
5566

5667
local read_config = read_config
5768
local assert = assert
@@ -87,8 +98,8 @@ end
8798
local namespaces = load_namespaces(cfg.namespace_path)
8899

89100

90-
local submissionField = {value = nil, representation = "json"}
91101
local doc = rjson.parse("{}") -- reuse this object to avoid creating a lot of GC
102+
local submissionField = {value = doc, representation = "json"}
92103
local function process_json(hsr, msg)
93104
local ok, err = pcall(doc.parse_message, doc, hsr, "Fields[content]", nil, nil, true)
94105
if not ok then
@@ -108,6 +119,7 @@ local function process_json(hsr, msg)
108119
error(string.format("json\tnamespace: %s schema: %s version: %d error: %s",
109120
msg.Logger, msg.Fields.docType, msg.Fields.docVersion, err), 0)
110121
end
122+
msg.Fields.submission = submissionField
111123
end
112124

113125

@@ -119,8 +131,18 @@ function transform_message(hsr, msg)
119131
process_json(hsr, msg)
120132

121133
-- Migrate the original message data after the validation (avoids Field duplication in the error message)
122-
msg.Hostname = hsr:read_message("Hostname")
123-
msg.Fields.Host = hsr:read_message("Fields[Host]")
134+
msg.Hostname = hsr:read_message("Hostname")
135+
msg.Fields.Host = hsr:read_message("Fields[Host]")
136+
msg.Fields["User-Agent"] = hsr:read_message("Fields[User-Agent]")
137+
138+
if msg.Fields["User-Agent"] and cfg.user_agent_transform then
139+
msg.Fields.user_agent_browser,
140+
msg.Fields.user_agent_version,
141+
msg.Fields.user_agent_os = clf.normalize_user_agent(msg.Fields["User-Agent"])
142+
if not ((cfg.user_agent_conditional and not msg.Fields.user_agent_browser) or cfg.user_agent_keep) then
143+
msg.Fields["User-Agent"] = nil
144+
end
145+
end
124146

125147
local ok, err = pcall(inject_message, msg)
126148
if not ok then

moz_ingest/tests/hindsight/input.hpb

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,32 @@
11
`
2-
:�E�ӛIߣs�S>u�����۽��
2+
�GG���E�-���R��撝���
33
common.raw"
44
moz_ingestJ example.comR
55
remote_addr"192.30.255.112p
6-
6�.��mCu�pN�~������۽��
6+
�?d��B��{m#�H`�ɩ璝���
77
common.raw"
88
moz_ingestJ example.comR
99
uri"/foobarR
1010
remote_addr"192.30.255.112�
11-
�틩��I,���V�-j4����۽��
11+
��*0(C�3�&��xL�璝���
1212
common.raw"
1313
moz_ingestJ example.comRC
1414
uri"</submit/common/foobar/1/0055FAC4-8A1A-4FCA-B380-EBFDC8571A01R
1515
remote_addr"192.30.255.112�
16-
ӘӥKߧi��X�[����۽��
16+
�`��"C���x ���璝���
1717
common.raw"
1818
moz_ingestJ example.comRC
1919
uri"</submit/common/foobar/1/0055FAC4-8A1A-4FCA-B380-EBFDC8571A01R
2020
remote_addr"192.30.255.112�
21-
FJ��K�{]��I|���۽��
21+
&3�NB�D�������璝���
2222
common.raw"
2323
moz_ingestJ example.comR
2424
geoCity"HalifaxR
2525
remote_addr"192.30.255.112RD
2626
uri"=/submit/common/widget/99/0055FAC4-8A1A-4FCA-B380-EBFDC8571A02R
2727

2828
geoCountry"CA�
29-
_!e3<D����+L~������۽��
29+
�����C?�·�A�6g��璝���
3030
common.raw"
3131
moz_ingestJ example.comR
3232
remote_addr"192.30.255.112RD

moz_ingest/tests/hindsight/json.hpb

336 Bytes
Binary file not shown.

moz_ingest/tests/hindsight/run/analysis/verify_json_decoder.lua

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,11 @@ local messages = {
1414
docType = "bar",
1515
geoCity = "New York",
1616
geoCountry = "US",
17-
documentId = "0055FAC4-8A1A-4FCA-B380-EBFDC8571A01"
17+
documentId = "0055FAC4-8A1A-4FCA-B380-EBFDC8571A01",
18+
submission = [[{"exampleString":"string one"}]],
19+
user_agent_browser = "Firefox",
20+
user_agent_version = 59,
21+
user_agent_os = "Linux"
1822
}
1923
},
2024
{Logger = "foo", Type = "error", Hostname = "example.com", Fields = {

moz_ingest/tests/hindsight/run/input/generate_data.lua

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ function process_message()
4848
msg.Type = "json.raw"
4949
msg.Fields.uri = "/submit/foo/bar/1/0055FAC4-8A1A-4FCA-B380-EBFDC8571A01"
5050
msg.Fields.content = [[{"exampleString":"string one"}]]
51+
msg.Fields["User-Agent"] = "Mozilla/5.0 (X11; Linux x86_64; rv:59.0) Gecko/20100101 Firefox/59.0"
5152
inject_message(msg)
5253

5354
-- fails parsing

moz_ingest/tests/hindsight/run/input/test_json_decoder.cfg

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,4 +18,5 @@ decoders_moz_ingest_common = {
1818

1919
decoders_moz_ingest_json = {
2020
namespace_path = "namespaces",
21+
user_agent_transform = true,
2122
}

0 commit comments

Comments
 (0)