1
1
package json_extract
2
2
3
3
import (
4
- "bytes"
5
-
6
4
"github.com/go-faster/jx"
7
5
"github.com/ozontech/file.d/cfg"
8
6
"github.com/ozontech/file.d/fd"
@@ -11,7 +9,7 @@ import (
11
9
)
12
10
13
11
/*{ introduction
14
- It extracts a field from JSON-encoded event field and adds extracted field to the event root.
12
+ It extracts fields from JSON-encoded event field and adds extracted fields to the event root.
15
13
> If extracted field already exists in the event root, it will be overridden.
16
14
}*/
17
15
@@ -23,42 +21,65 @@ pipelines:
23
21
actions:
24
22
- type: json_extract
25
23
field: log
26
- extract_field: error.code
24
+ extract_fields:
25
+ - error.code
26
+ - level
27
+ - meta
28
+ - flags
27
29
...
28
30
```
29
31
The original event:
30
32
```json
31
33
{
32
- "log": "{\"level\":\"error\",\"message\":\"error occurred\",\"service\":\"my-service\",\"error \":{\"code\":2 ,\"args \":[]} }",
34
+ "log": "{\"level\":\"error\",\"message\":\"error occurred\",\"error\":{\"code\":2,\"args\":[]},\"meta\":{\" service\":\"my-service\",\"pod \":\"my-service-5c4dfcdcd4-4v5zw\"} ,\"flags \":[\"flag1\",\"flag2\"] }",
33
35
"time": "2024-03-01T10:49:28.263317941Z"
34
36
}
35
37
```
36
38
The resulting event:
37
39
```json
38
40
{
39
- "log": "{\"level\":\"error\",\"message\":\"error occurred\",\"service\":\"my-service\",\"error \":{\"code\":2 ,\"args \":[]} }",
41
+ "log": "{\"level\":\"error\",\"message\":\"error occurred\",\"error\":{\"code\":2,\"args\":[]},\"meta\":{\" service\":\"my-service\",\"pod \":\"my-service-5c4dfcdcd4-4v5zw\"} ,\"flags \":[\"flag1\",\"flag2\"] }",
40
42
"time": "2024-03-01T10:49:28.263317941Z",
41
- "code": 2
43
+ "code": 2,
44
+ "level": "error",
45
+ "meta": {
46
+ "service": "my-service",
47
+ "pod": "my-service-5c4dfcdcd4-4v5zw"
48
+ },
49
+ "flags": ["flag1", "flag2"]
42
50
}
43
51
```
44
52
}*/
45
53
46
54
/*{ benchmarks
47
55
Performance comparison of `json_extract` and `json_decode` plugins.
48
- `json_extract` on average 3 times faster than `json_decode`.
56
+ `json_extract` on average 2.5 times faster than `json_decode` and
57
+ doesn't allocate memory during the extract process.
58
+
59
+ ### Extract 1 field
60
+ | json (length) | json_extract (time ns) | json_decode (time ns) |
61
+ |---------------|------------------------|-----------------------|
62
+ | 309 | 300 | 560 |
63
+ | 2109 | 2570 | 7250 |
64
+ | 10909 | 13550 | 34250 |
65
+ | 21909 | 26000 | 67940 |
66
+ | 237909 | 262500 | 741530 |
49
67
68
+ ### Extract 5 fields
50
69
| json (length) | json_extract (time ns) | json_decode (time ns) |
51
70
|---------------|------------------------|-----------------------|
52
- | 129 | 33 | 176 |
53
- | 309 | 264 | 520 |
54
- | 2109 | 2263 | 6778 |
55
- | 10909 | 11289 | 32205 |
56
- | 21909 | 23277 | 62819 |
71
+ | 309 | 450 | 685 |
72
+ | 2109 | 2990 | 7410 |
73
+ | 10909 | 14540 | 35000 |
74
+ | 21909 | 28340 | 69950 |
75
+ | 237909 | 286600 | 741600 |
57
76
}*/
58
77
59
78
type Plugin struct {
60
- config * Config
61
- decoder * jx.Decoder
79
+ config * Config
80
+
81
+ extractFields * pathTree
82
+ decoder * jx.Decoder
62
83
}
63
84
64
85
// ! config-params
@@ -73,8 +94,14 @@ type Config struct {
73
94
// > @3@4@5@6
74
95
// >
75
96
// > Field to extract.
76
- ExtractField cfg.FieldSelector `json:"extract_field" parse:"selector" required:"true"` // *
97
+ // >> ⚠ DEPRECATED. Use `extract_fields` instead.
98
+ ExtractField cfg.FieldSelector `json:"extract_field" parse:"selector"` // *
77
99
ExtractField_ []string
100
+
101
+ // > @3@4@5@6
102
+ // >
103
+ // > Fields to extract.
104
+ ExtractFields []cfg.FieldSelector `json:"extract_fields" slice:"true"` // *
78
105
}
79
106
80
107
func init () {
@@ -88,9 +115,25 @@ func factory() (pipeline.AnyPlugin, pipeline.AnyConfig) {
88
115
return & Plugin {}, & Config {}
89
116
}
90
117
91
- func (p * Plugin ) Start (config pipeline.AnyConfig , _ * pipeline.ActionPluginParams ) {
118
+ func (p * Plugin ) Start (config pipeline.AnyConfig , params * pipeline.ActionPluginParams ) {
92
119
p .config = config .(* Config )
93
120
p .decoder = & jx.Decoder {}
121
+
122
+ p .extractFields = newPathTree ()
123
+ dupl := false
124
+ for _ , f := range p .config .ExtractFields {
125
+ if f == p .config .ExtractField {
126
+ dupl = true
127
+ }
128
+ p .extractFields .add (cfg .ParseFieldSelector (string (f )))
129
+ }
130
+ if ! dupl {
131
+ p .extractFields .add (p .config .ExtractField_ )
132
+ }
133
+
134
+ if len (p .extractFields .root .children ) == 0 {
135
+ params .Logger .Fatal ("extract fields are empty" )
136
+ }
94
137
}
95
138
96
139
func (p * Plugin ) Stop () {}
@@ -102,36 +145,52 @@ func (p *Plugin) Do(event *pipeline.Event) pipeline.ActionResult {
102
145
}
103
146
104
147
p .decoder .ResetBytes (jsonNode .AsBytes ())
105
- extract (event .Root , p .decoder , p .config . ExtractField_ , 0 , false )
148
+ extract (event .Root , p .decoder , p .extractFields . root . children , false )
106
149
return pipeline .ActionPass
107
150
}
108
151
109
- // extract extracts field from decoder and adds it to the root.
110
- // `skipAddField` flag is required for proper benchmarking.
111
- func extract (root * insaneJSON.Root , d * jx.Decoder , field []string , depth int , skipAddField bool ) {
152
+ // extract extracts fields from decoder and adds it to the root.
153
+ //
154
+ // [skipAddField] flag is required for proper benchmarking.
155
+ func extract (root * insaneJSON.Root , d * jx.Decoder , fields pathNodes , skipAddField bool ) {
112
156
objIter , err := d .ObjIter ()
113
157
if err != nil {
114
158
return
115
159
}
116
160
161
+ processed := len (fields )
117
162
for objIter .Next () {
118
- if bytes .Equal (objIter .Key (), pipeline .StringToByteUnsafe (field [depth ])) {
119
- if depth == len (field )- 1 { // add field
120
- if skipAddField {
121
- _ = d .Skip ()
122
- } else {
123
- addField (root , field [depth ], d )
124
- }
125
- } else { // go deep
126
- raw , err := d .Raw ()
127
- if err != nil {
128
- break
129
- }
130
- d .ResetBytes (raw )
131
- extract (root , d , field , depth + 1 , skipAddField )
163
+ // find the field at the current depth
164
+ n := fields .find (string (objIter .Key ()))
165
+ if n == nil {
166
+ if err = d .Skip (); err != nil {
167
+ break
132
168
}
133
- break
134
- } else if err = d .Skip (); err != nil {
169
+ continue
170
+ }
171
+
172
+ if len (n .children ) == 0 { // last field in path, add to root
173
+ if skipAddField {
174
+ _ = d .Skip ()
175
+ } else {
176
+ addField (root , n .data , d )
177
+ }
178
+ } else { // go deep
179
+ // Capture calls f and then rolls back to state before call
180
+ _ = d .Capture (func (d * jx.Decoder ) error {
181
+ // recursively extract child fields
182
+ extract (root , d , n .children , skipAddField )
183
+ return nil
184
+ })
185
+ // skip the current field because we have processed it
186
+ // and rolled back the state of the decoder
187
+ if err = d .Skip (); err != nil {
188
+ break
189
+ }
190
+ }
191
+
192
+ processed --
193
+ if processed == 0 {
135
194
break
136
195
}
137
196
}
@@ -154,6 +213,7 @@ func addField(root *insaneJSON.Root, field string, d *jx.Decoder) {
154
213
s , _ := d .StrBytes ()
155
214
root .AddFieldNoAlloc (root , field ).MutateToBytesCopy (root , s )
156
215
case jx .Null :
216
+ _ = d .Null ()
157
217
root .AddFieldNoAlloc (root , field ).MutateToNull ()
158
218
case jx .Bool :
159
219
b , _ := d .Bool ()
0 commit comments