|
| 1 | +package json_extract |
| 2 | + |
| 3 | +import ( |
| 4 | + "bytes" |
| 5 | + |
| 6 | + "github.com/go-faster/jx" |
| 7 | + "github.com/ozontech/file.d/cfg" |
| 8 | + "github.com/ozontech/file.d/fd" |
| 9 | + "github.com/ozontech/file.d/pipeline" |
| 10 | + insaneJSON "github.com/vitkovskii/insane-json" |
| 11 | +) |
| 12 | + |
| 13 | +/*{ introduction |
| 14 | +It extracts a field from JSON-encoded event field and adds extracted field to the event root. |
| 15 | +> If extracted field already exists in the event root, it will be overridden. |
| 16 | +}*/ |
| 17 | + |
| 18 | +/*{ examples |
| 19 | +```yaml |
| 20 | +pipelines: |
| 21 | + example_pipeline: |
| 22 | + ... |
| 23 | + actions: |
| 24 | + - type: json_extract |
| 25 | + field: log |
| 26 | + extract_field: error.code |
| 27 | + ... |
| 28 | +``` |
| 29 | +The original event: |
| 30 | +```json |
| 31 | +{ |
| 32 | + "log": "{\"level\":\"error\",\"message\":\"error occurred\",\"service\":\"my-service\",\"error\":{\"code\":2,\"args\":[]}}", |
| 33 | + "time": "2024-03-01T10:49:28.263317941Z" |
| 34 | +} |
| 35 | +``` |
| 36 | +The resulting event: |
| 37 | +```json |
| 38 | +{ |
| 39 | + "log": "{\"level\":\"error\",\"message\":\"error occurred\",\"service\":\"my-service\",\"error\":{\"code\":2,\"args\":[]}}", |
| 40 | + "time": "2024-03-01T10:49:28.263317941Z", |
| 41 | + "code": 2 |
| 42 | +} |
| 43 | +``` |
| 44 | +}*/ |
| 45 | + |
| 46 | +/*{ benchmarks |
| 47 | +Performance comparison of `json_extract` and `json_decode` plugins. |
| 48 | +`json_extract` on average 3 times faster than `json_decode`. |
| 49 | +
|
| 50 | +| json (length) | json_extract (time ns) | json_decode (time ns) | |
| 51 | +|---------------|------------------------|-----------------------| |
| 52 | +| 129 | 33 | 176 | |
| 53 | +| 309 | 264 | 520 | |
| 54 | +| 2109 | 2263 | 6778 | |
| 55 | +| 10909 | 11289 | 32205 | |
| 56 | +| 21909 | 23277 | 62819 | |
| 57 | +}*/ |
| 58 | + |
| 59 | +type Plugin struct { |
| 60 | + config *Config |
| 61 | + decoder *jx.Decoder |
| 62 | +} |
| 63 | + |
| 64 | +// ! config-params |
| 65 | +// ^ config-params |
| 66 | +type Config struct { |
| 67 | + // > @3@4@5@6 |
| 68 | + // > |
| 69 | + // > The event field from which to extract. Must be a string. |
| 70 | + Field cfg.FieldSelector `json:"field" parse:"selector" required:"true"` // * |
| 71 | + Field_ []string |
| 72 | + |
| 73 | + // > @3@4@5@6 |
| 74 | + // > |
| 75 | + // > Field to extract. |
| 76 | + ExtractField cfg.FieldSelector `json:"extract_field" parse:"selector" required:"true"` // * |
| 77 | + ExtractField_ []string |
| 78 | +} |
| 79 | + |
| 80 | +func init() { |
| 81 | + fd.DefaultPluginRegistry.RegisterAction(&pipeline.PluginStaticInfo{ |
| 82 | + Type: "json_extract", |
| 83 | + Factory: factory, |
| 84 | + }) |
| 85 | +} |
| 86 | + |
| 87 | +func factory() (pipeline.AnyPlugin, pipeline.AnyConfig) { |
| 88 | + return &Plugin{}, &Config{} |
| 89 | +} |
| 90 | + |
| 91 | +func (p *Plugin) Start(config pipeline.AnyConfig, _ *pipeline.ActionPluginParams) { |
| 92 | + p.config = config.(*Config) |
| 93 | + p.decoder = &jx.Decoder{} |
| 94 | +} |
| 95 | + |
| 96 | +func (p *Plugin) Stop() {} |
| 97 | + |
| 98 | +func (p *Plugin) Do(event *pipeline.Event) pipeline.ActionResult { |
| 99 | + jsonNode := event.Root.Dig(p.config.Field_...) |
| 100 | + if jsonNode == nil { |
| 101 | + return pipeline.ActionPass |
| 102 | + } |
| 103 | + |
| 104 | + p.decoder.ResetBytes(jsonNode.AsBytes()) |
| 105 | + extract(event.Root, p.decoder, p.config.ExtractField_, 0, false) |
| 106 | + return pipeline.ActionPass |
| 107 | +} |
| 108 | + |
| 109 | +// extract extracts field from decoder and adds it to the root. |
| 110 | +// `skipAddField` flag is required for proper benchmarking. |
| 111 | +func extract(root *insaneJSON.Root, d *jx.Decoder, field []string, depth int, skipAddField bool) { |
| 112 | + objIter, err := d.ObjIter() |
| 113 | + if err != nil { |
| 114 | + return |
| 115 | + } |
| 116 | + |
| 117 | + for objIter.Next() { |
| 118 | + if bytes.Equal(objIter.Key(), pipeline.StringToByteUnsafe(field[depth])) { |
| 119 | + if depth == len(field)-1 { // add field |
| 120 | + if skipAddField { |
| 121 | + _ = d.Skip() |
| 122 | + } else { |
| 123 | + addField(root, field[depth], d) |
| 124 | + } |
| 125 | + } else { // go deep |
| 126 | + raw, err := d.Raw() |
| 127 | + if err != nil { |
| 128 | + break |
| 129 | + } |
| 130 | + d.ResetBytes(raw) |
| 131 | + extract(root, d, field, depth+1, skipAddField) |
| 132 | + } |
| 133 | + break |
| 134 | + } else if err = d.Skip(); err != nil { |
| 135 | + break |
| 136 | + } |
| 137 | + } |
| 138 | +} |
| 139 | + |
| 140 | +func addField(root *insaneJSON.Root, field string, d *jx.Decoder) { |
| 141 | + switch d.Next() { |
| 142 | + case jx.Number: |
| 143 | + num, _ := d.Num() |
| 144 | + intVal, err := num.Int64() |
| 145 | + if err == nil { |
| 146 | + root.AddFieldNoAlloc(root, field).MutateToInt64(intVal) |
| 147 | + } else { |
| 148 | + floatVal, err := num.Float64() |
| 149 | + if err == nil { |
| 150 | + root.AddFieldNoAlloc(root, field).MutateToFloat(floatVal) |
| 151 | + } |
| 152 | + } |
| 153 | + case jx.String: |
| 154 | + s, _ := d.StrBytes() |
| 155 | + root.AddFieldNoAlloc(root, field).MutateToBytesCopy(root, s) |
| 156 | + case jx.Null: |
| 157 | + root.AddFieldNoAlloc(root, field).MutateToNull() |
| 158 | + case jx.Bool: |
| 159 | + b, _ := d.Bool() |
| 160 | + root.AddFieldNoAlloc(root, field).MutateToBool(b) |
| 161 | + case jx.Object, jx.Array: |
| 162 | + raw, _ := d.Raw() |
| 163 | + root.AddFieldNoAlloc(root, field).MutateToJSON(root, raw.String()) |
| 164 | + default: |
| 165 | + _ = d.Skip() |
| 166 | + } |
| 167 | +} |
0 commit comments