From b39cd4ebda691a17d16c19a0e4f22d2db9348f71 Mon Sep 17 00:00:00 2001 From: Chris Barker Date: Fri, 17 May 2019 10:22:29 -0400 Subject: [PATCH] Add face (#9) This commit adds the splunk_hec puppet face/app allowing for a cat json | puppet splunk_hec like workflow. The first functionality of this code is to enable sending pe metrics data to Splunk using the current CS best practices for collecting the CS data. This will need changes in the puppet metrics collector module managed by CS to enable that specific workflow: puppetlabs/puppetlabs-puppet_metrics_collector#6 This also adds the transaction_uuid to the fact event submission, so correlation of facts created, the catalog, and the report are now possible. This introduces an epochtime function to ensure that across all of our event submission code, we're always generating a timestamp of the same precision and in the same way. --- examples/foo.json | 8 + examples/orchestrator_metrics.json | 601 ++++++++++++++++++++++ lib/puppet/application/splunk_hec.rb | 61 +++ lib/puppet/indirector/facts/splunk_hec.rb | 6 +- lib/puppet/reports/splunk_hec.rb | 6 +- lib/puppet/util/splunk_hec.rb | 7 + 6 files changed, 684 insertions(+), 5 deletions(-) create mode 100644 examples/foo.json create mode 100644 examples/orchestrator_metrics.json create mode 100644 lib/puppet/application/splunk_hec.rb diff --git a/examples/foo.json b/examples/foo.json new file mode 100644 index 00000000..2ac2e0b0 --- /dev/null +++ b/examples/foo.json @@ -0,0 +1,8 @@ +{ + "timestamp": "2019-05-03T05:55:30+00:00", + "servers": { + "127-0-0-1":{ + "asdfsafsdf": { "bas":"bin"} + } + } +} \ No newline at end of file diff --git a/examples/orchestrator_metrics.json b/examples/orchestrator_metrics.json new file mode 100644 index 00000000..337f1727 --- /dev/null +++ b/examples/orchestrator_metrics.json @@ -0,0 +1,601 @@ +{ + "timestamp": "2019-05-03T02:59:33Z", + "servers": { + "puppet-c-splunk-217321-internal": { + "orchestrator": { + "broker-service": { + "service_version": "1.5.3", + "service_status_version": 1, + "detail_level": "debug", + "state": "running", + "status": { + "metrics": { + "puppetlabs.pcp.on-close": { + "rates": { + "1": 2.788760636356196e-20, + "5": 3.3877101432950975e-05, + "15": 0.011065951062135926, + "total": 1 + }, + "mean": 417353931.0, + "std-dev": 0.0, + "percentiles": { + "0.75": 417353931.0, + "0.95": 417353931.0, + "0.99": 417353931.0, + "0.999": 417353931.0, + "1.0": 417353931.0 + }, + "largest": 417353931, + "smallest": 417353931 + }, + "puppetlabs.pcp.on-connect": { + "rates": { + "1": 6.888187206729161e-20, + "5": 4.0696306647189904e-05, + "15": 0.011763559299922392, + "total": 12 + }, + "mean": 41894024.417897455, + "std-dev": 109757370.42593586, + "percentiles": { + "0.75": 28301604.0, + "0.95": 491414497.0, + "0.99": 491414497.0, + "0.999": 491414497.0, + "1.0": 491414497.0 + }, + "largest": 491414497, + "smallest": 4684587 + }, + "puppetlabs.pcp.on-message": { + "rates": { + "1": 2.81218054725494e-07, + "5": 0.002505776531083986, + "15": 0.005911137972860372, + "total": 22 + }, + "mean": 2758247.0595337525, + "std-dev": 1528318.9077125278, + "percentiles": { + "0.75": 3784909.0, + "0.95": 4057499.0, + "0.99": 4543275.0, + "0.999": 4543275.0, + "1.0": 50167287.0 + }, + "largest": 50167287, + "smallest": 424824 + }, + "puppetlabs.pcp.on-send": { + "rates": { + "1": 2.8121805472550845e-07, + "5": 0.0025082327356744194, + "15": 0.006163991430922259, + "total": 26 + }, + "mean": 349634.0596412754, + "std-dev": 87947.06774548466, + "percentiles": { + "0.75": 385273.0, + "0.95": 471062.0, + "0.99": 539459.0, + "0.999": 539459.0, + "1.0": 30715567.0 + }, + "largest": 30715567, + "smallest": 189908 + } + }, + "threads": { + "ThreadCount": 93, + "ObjectMonitorUsageSupported": true, + "PeakThreadCount": 99, + "ThreadAllocatedMemoryEnabled": true, + "ThreadContentionMonitoringSupported": true, + "DaemonThreadCount": 18, + "CurrentThreadCpuTime": 1260545, + "ThreadCpuTimeEnabled": true, + "ThreadCpuTimeSupported": true, + "SynchronizerUsageSupported": true, + "TotalStartedThreadCount": 365, + "ThreadAllocatedMemorySupported": true, + "ThreadContentionMonitoringEnabled": false, + "CurrentThreadUserTime": 0, + "CurrentThreadCpuTimeSupported": true + }, + "memory": { + "Verbose": true, + "ObjectPendingFinalizationCount": 0, + "HeapMemoryUsage": { + "committed": 725090304, + "init": 738197504, + "max": 725090304, + "used": 109287864 + }, + "NonHeapMemoryUsage": { + "committed": 155369472, + "init": 2555904, + "max": -1, + "used": 152582728 + } + } + }, + "active_alerts": [ + + ] + }, + "orchestrator-service": { + "service_version": "2019.1.0.62", + "service_status_version": 1, + "detail_level": "debug", + "state": "running", + "status": { + "db_up": true, + "classifier_up": true, + "rbac_up": true, + "puppetserver_up": true, + "puppetdb_up": true, + "pxp_up": true, + "replication": { + "mode": "source", + "status": "none" + }, + "metrics": { + "routes": { + "routes": { + "orchestrator-v1-tasks-:module-:task-name": { + "route-id": "orchestrator-v1-tasks-:module-:task-name", + "count": 0, + "mean": 0, + "aggregate": 0 + }, + "orchestrator-v1-tasks-:module-:task-name-permitted": { + "route-id": "orchestrator-v1-tasks-:module-:task-name-permitted", + "count": 0, + "mean": 0, + "aggregate": 0 + }, + "orchestrator-v1-jobs-:job-id-nodes": { + "route-id": "orchestrator-v1-jobs-:job-id-nodes", + "count": 5, + "mean": 56, + "aggregate": 280 + }, + "orchestrator-v1-scheduled_jobs": { + "route-id": "orchestrator-v1-scheduled_jobs", + "count": 0, + "mean": 0, + "aggregate": 0 + }, + "orchestrator-v1-environments-:environment": { + "route-id": "orchestrator-v1-environments-:environment", + "count": 0, + "mean": 0, + "aggregate": 0 + }, + "orchestrator-v1-environments": { + "route-id": "orchestrator-v1-environments", + "count": 0, + "mean": 0, + "aggregate": 0 + }, + "orchestrator-v1-environments-:environment-applications": { + "route-id": "orchestrator-v1-environments-:environment-applications", + "count": 0, + "mean": 0, + "aggregate": 0 + }, + "other": { + "route-id": "other", + "count": 0, + "mean": 0, + "aggregate": 0 + }, + "orchestrator-v1-plan_jobs-:job-id-events": { + "route-id": "orchestrator-v1-plan_jobs-:job-id-events", + "count": 6264, + "mean": 18, + "aggregate": 112752 + }, + "orchestrator-v1-scheduled_jobs-:job-id": { + "route-id": "orchestrator-v1-scheduled_jobs-:job-id", + "count": 0, + "mean": 0, + "aggregate": 0 + }, + "orchestrator-v1-usage": { + "route-id": "orchestrator-v1-usage", + "count": 1, + "mean": 1272, + "aggregate": 1272 + }, + "orchestrator-v1-inventory-:node": { + "route-id": "orchestrator-v1-inventory-:node", + "count": 1, + "mean": 102, + "aggregate": 102 + }, + "orchestrator-v1-tasks": { + "route-id": "orchestrator-v1-tasks", + "count": 2, + "mean": 206, + "aggregate": 412 + }, + "orchestrator-v1-internal-:command-name": { + "route-id": "orchestrator-v1-internal-:command-name", + "count": 0, + "mean": 0, + "aggregate": 0 + }, + "orchestrator-v1-jobs-:job-id-report": { + "route-id": "orchestrator-v1-jobs-:job-id-report", + "count": 0, + "mean": 0, + "aggregate": 0 + }, + "orchestrator-v1-dumplings-/1-9_d*/": { + "route-id": "orchestrator-v1-dumplings-/1-9_d*/", + "count": 0, + "mean": 0, + "aggregate": 0 + }, + "total": { + "route-id": "total", + "count": 6527, + "mean": 18, + "aggregate": 117486 + }, + "orchestrator-v1-plan_jobs-:job-id": { + "route-id": "orchestrator-v1-plan_jobs-:job-id", + "count": 0, + "mean": 0, + "aggregate": 0 + }, + "orchestrator-v1-jobs-:job-id-catalog": { + "route-id": "orchestrator-v1-jobs-:job-id-catalog", + "count": 0, + "mean": 0, + "aggregate": 0 + }, + "orchestrator-v1-jobs": { + "route-id": "orchestrator-v1-jobs", + "count": 4, + "mean": 84, + "aggregate": 336 + }, + "orchestrator-v1-jobs-:job-id": { + "route-id": "orchestrator-v1-jobs-:job-id", + "count": 14, + "mean": 57, + "aggregate": 798 + }, + "orchestrator-v1-swagger_json": { + "route-id": "orchestrator-v1-swagger_json", + "count": 0, + "mean": 0, + "aggregate": 0 + }, + "orchestrator-v1-command-:command-name": { + "route-id": "orchestrator-v1-command-:command-name", + "count": 4, + "mean": 476, + "aggregate": 1904 + }, + "orchestrator-v1-plan_jobs": { + "route-id": "orchestrator-v1-plan_jobs", + "count": 17, + "mean": 47, + "aggregate": 799 + }, + "orchestrator-v1-dumplings": { + "route-id": "orchestrator-v1-dumplings", + "count": 0, + "mean": 0, + "aggregate": 0 + }, + "/*/": { + "route-id": "/*/", + "count": 0, + "mean": 0, + "aggregate": 0 + }, + "orchestrator-v1-environments-:environment-instances": { + "route-id": "orchestrator-v1-environments-:environment-instances", + "count": 0, + "mean": 0, + "aggregate": 0 + }, + "orchestrator-v1-//": { + "route-id": "orchestrator-v1-//", + "count": 0, + "mean": 0, + "aggregate": 0 + }, + "orchestrator-v1-status": { + "route-id": "orchestrator-v1-status", + "count": 0, + "mean": 0, + "aggregate": 0 + }, + "orchestrator-v1-jobs-:job-id-events": { + "route-id": "orchestrator-v1-jobs-:job-id-events", + "count": 214, + "mean": 38, + "aggregate": 8132 + }, + "orchestrator-v1-inventory": { + "route-id": "orchestrator-v1-inventory", + "count": 1, + "mean": 88, + "aggregate": 88 + } + }, + "sorted-routes": [ + { + "route-id": "total", + "count": 6527, + "mean": 18, + "aggregate": 117486 + }, + { + "route-id": "orchestrator-v1-plan_jobs-:job-id-events", + "count": 6264, + "mean": 18, + "aggregate": 112752 + }, + { + "route-id": "orchestrator-v1-jobs-:job-id-events", + "count": 214, + "mean": 38, + "aggregate": 8132 + }, + { + "route-id": "orchestrator-v1-command-:command-name", + "count": 4, + "mean": 476, + "aggregate": 1904 + }, + { + "route-id": "orchestrator-v1-usage", + "count": 1, + "mean": 1272, + "aggregate": 1272 + }, + { + "route-id": "orchestrator-v1-plan_jobs", + "count": 17, + "mean": 47, + "aggregate": 799 + }, + { + "route-id": "orchestrator-v1-jobs-:job-id", + "count": 14, + "mean": 57, + "aggregate": 798 + }, + { + "route-id": "orchestrator-v1-tasks", + "count": 2, + "mean": 206, + "aggregate": 412 + }, + { + "route-id": "orchestrator-v1-jobs", + "count": 4, + "mean": 84, + "aggregate": 336 + }, + { + "route-id": "orchestrator-v1-jobs-:job-id-nodes", + "count": 5, + "mean": 56, + "aggregate": 280 + }, + { + "route-id": "orchestrator-v1-inventory-:node", + "count": 1, + "mean": 102, + "aggregate": 102 + }, + { + "route-id": "orchestrator-v1-inventory", + "count": 1, + "mean": 88, + "aggregate": 88 + }, + { + "route-id": "orchestrator-v1-tasks-:module-:task-name", + "count": 0, + "mean": 0, + "aggregate": 0 + }, + { + "route-id": "orchestrator-v1-tasks-:module-:task-name-permitted", + "count": 0, + "mean": 0, + "aggregate": 0 + }, + { + "route-id": "orchestrator-v1-scheduled_jobs", + "count": 0, + "mean": 0, + "aggregate": 0 + }, + { + "route-id": "orchestrator-v1-environments-:environment", + "count": 0, + "mean": 0, + "aggregate": 0 + }, + { + "route-id": "orchestrator-v1-environments", + "count": 0, + "mean": 0, + "aggregate": 0 + }, + { + "route-id": "orchestrator-v1-environments-:environment-applications", + "count": 0, + "mean": 0, + "aggregate": 0 + }, + { + "route-id": "other", + "count": 0, + "mean": 0, + "aggregate": 0 + }, + { + "route-id": "orchestrator-v1-scheduled_jobs-:job-id", + "count": 0, + "mean": 0, + "aggregate": 0 + }, + { + "route-id": "orchestrator-v1-internal-:command-name", + "count": 0, + "mean": 0, + "aggregate": 0 + }, + { + "route-id": "orchestrator-v1-jobs-:job-id-report", + "count": 0, + "mean": 0, + "aggregate": 0 + }, + { + "route-id": "orchestrator-v1-dumplings-/1-9_d*/", + "count": 0, + "mean": 0, + "aggregate": 0 + }, + { + "route-id": "orchestrator-v1-plan_jobs-:job-id", + "count": 0, + "mean": 0, + "aggregate": 0 + }, + { + "route-id": "orchestrator-v1-jobs-:job-id-catalog", + "count": 0, + "mean": 0, + "aggregate": 0 + }, + { + "route-id": "orchestrator-v1-swagger_json", + "count": 0, + "mean": 0, + "aggregate": 0 + }, + { + "route-id": "orchestrator-v1-dumplings", + "count": 0, + "mean": 0, + "aggregate": 0 + }, + { + "route-id": "/*/", + "count": 0, + "mean": 0, + "aggregate": 0 + }, + { + "route-id": "orchestrator-v1-environments-:environment-instances", + "count": 0, + "mean": 0, + "aggregate": 0 + }, + { + "route-id": "orchestrator-v1-//", + "count": 0, + "mean": 0, + "aggregate": 0 + }, + { + "route-id": "orchestrator-v1-status", + "count": 0, + "mean": 0, + "aggregate": 0 + } + ] + }, + "app": { + "deploy-queue.length": 0, + "jobs-created": 3, + "puppet-run-time": 0.0 + } + } + }, + "active_alerts": [ + + ] + }, + "status-service": { + "service_version": "1.1.0", + "service_status_version": 1, + "detail_level": "debug", + "state": "running", + "status": { + "experimental": { + "jvm-metrics": { + "cpu-usage": 1.5996801, + "up-time-ms": 2632100, + "gc-cpu-usage": 0.0, + "threading": { + "thread-count": 90, + "peak-thread-count": 99 + }, + "heap-memory": { + "committed": 725090304, + "init": 738197504, + "max": 725090304, + "used": 108667120 + }, + "gc-stats": { + "PS Scavenge": { + "count": 57, + "total-time-ms": 1504, + "last-gc-info": { + "duration-ms": 11 + } + }, + "PS MarkSweep": { + "count": 3, + "total-time-ms": 503, + "last-gc-info": { + "duration-ms": 287 + } + } + }, + "start-time-ms": 1556849741982, + "file-descriptors": { + "used": 165, + "max": 100000 + }, + "non-heap-memory": { + "committed": 155369472, + "init": 2555904, + "max": -1, + "used": 152588464 + } + } + } + }, + "active_alerts": [ + + ] + }, + "error": [ + + ], + "error_count": 0, + "api-query-start": "2019-05-03T02:59:33Z", + "api-query-duration": 0.299507934 + } + }, + "foo":{ + "bar": {} + } + } +} \ No newline at end of file diff --git a/lib/puppet/application/splunk_hec.rb b/lib/puppet/application/splunk_hec.rb new file mode 100644 index 00000000..58ba9550 --- /dev/null +++ b/lib/puppet/application/splunk_hec.rb @@ -0,0 +1,61 @@ +require 'puppet/application' +require 'puppet/util/splunk_hec' + +class Puppet::Application::Splunk_hec < Puppet::Application + include Puppet::Util::Splunk_hec + + RUN_HELP = _("Run 'puppet splunk_hec --help' for more details").freeze + + run_mode :master + + # Options for splunk_hec + + option('--sourcetype SOURCETYPE') do |format| + options[:sourcetype] = format.downcase.to_sym + end + + option('--pe_metrics') + + def get_name(servername) + if servername.to_s == '127-0-0-1' + name = Puppet[:certname].to_s + else + name = servername + end + name.to_s + end + + def send_pe_metrics(data, sourcetype) + timestamp = sourcetypetime(data['timestamp']) + event_template = { + 'time' => timestamp, + 'sourcetype' => sourcetype.to_s, + 'event' => {}, + } + data['servers'].keys.each do |server| + name = get_name(server.to_s) + content = data['servers'][server.to_s] + content.keys.each do |serv| + event = event_template.clone + event['host'] = name + event['event'] = content[serv.to_s] + event['event']['pe_service'] = serv.to_s + Puppet.info "Submitting metrics to Splunk at #{splunk_url}" + submit_request(event) + end + end + end + + def main + data = JSON.parse(STDIN.read) + + sourcetype = options[:sourcetype].to_s + + if options[:pe_metrics] + send_pe_metrics(data, sourcetype) + end + end +end + + + diff --git a/lib/puppet/indirector/facts/splunk_hec.rb b/lib/puppet/indirector/facts/splunk_hec.rb index cd85261e..eeecb564 100644 --- a/lib/puppet/indirector/facts/splunk_hec.rb +++ b/lib/puppet/indirector/facts/splunk_hec.rb @@ -16,6 +16,7 @@ def save(request) begin host = request.instance.name.dup incoming_facts = request.instance.values.dup + transaction_uuid = request.options[:transaction_uuid] hardcoded = [ 'os', @@ -36,8 +37,9 @@ def save(request) facts['trusted'] = get_trusted_info(request.node) facts['environment'] = request.options[:environment] || request.environment.to_s - facts['producer'] = Puppet[:node_name_value] + facts['producer'] = Puppet[:certname] facts['pe_console'] = pe_console + facts['transaction_uuid'] = transaction_uuid event = { 'host' => host, @@ -48,7 +50,7 @@ def save(request) Puppet.info "Submitting facts to Splunk at #{splunk_url}" submit_request event rescue StandardError => e - Puppet.err "Could not send facts to Satellite: #{e}\n#{e.backtrace}" + Puppet.err "Could not send facts to Splunk: #{e}\n#{e.backtrace}" end end end diff --git a/lib/puppet/reports/splunk_hec.rb b/lib/puppet/reports/splunk_hec.rb index 7677af3a..bb35e73e 100644 --- a/lib/puppet/reports/splunk_hec.rb +++ b/lib/puppet/reports/splunk_hec.rb @@ -7,8 +7,8 @@ include Puppet::Util::Splunk_hec def process # now we can create the event with the timestamp from the report - time = DateTime.parse(self.time.to_s) - epoch = time.strftime('%Q').to_s.insert(-4, '.') + + epoch = sourcetypetime(time.iso8601(3)) # pass simple metrics for report processing later # STATES = [:skipped, :failed, :failed_to_restart, :restarted, :changed, :out_of_sync, :scheduled, :corrective_change] @@ -48,7 +48,7 @@ def process 'puppet_version' => puppet_version, 'report_format' => report_format, 'status' => status, - 'time' => time, + 'time' => time.iso8601(3), 'transaction_uuid' => transaction_uuid, }, } diff --git a/lib/puppet/util/splunk_hec.rb b/lib/puppet/util/splunk_hec.rb index 3a4b24c5..e91c3614 100644 --- a/lib/puppet/util/splunk_hec.rb +++ b/lib/puppet/util/splunk_hec.rb @@ -5,6 +5,7 @@ require 'uri' require 'yaml' require 'json' +require 'time' # splunk_hec.rb module Puppet::Util::Splunk_hec @@ -59,4 +60,10 @@ def splunk_url def pe_console settings['pe_console'] || Puppet[:certname] end + + # standard function to make sure we're using the same time format our sourcetypes are set to parse + def sourcetypetime(timestamp) + time = Time.parse(timestamp) + "%10.3f" % time.to_f + end end