Skip to content
This repository was archived by the owner on Jun 1, 2023. It is now read-only.

Commit e7a0478

Browse files
author
suckatrash
committed
Add profiles, move puppetdb metric defaults
1 parent 91186d7 commit e7a0478

16 files changed

+484
-312
lines changed

.fixtures.yml

+3
Original file line numberDiff line numberDiff line change
@@ -15,4 +15,7 @@ fixtures:
1515
yumrepo:
1616
repo: https://github.com/puppetlabs/puppetlabs-yumrepo_core.git
1717
ref: 1.0.2
18+
telegraf:
19+
repo: https://github.com/voxpupuli/puppet-telegraf.git
20+
ref: v2.1.0
1821
forge_modules:

Gemfile

+1
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ group :development do
2323
gem "json", '= 1.8.1', require: false if Gem::Version.new(RUBY_VERSION.dup) == Gem::Version.new('2.1.9')
2424
gem "json", '= 2.0.4', require: false if Gem::Requirement.create('~> 2.4.2').satisfied_by?(Gem::Version.new(RUBY_VERSION.dup))
2525
gem "json", '= 2.1.0', require: false if Gem::Requirement.create(['>= 2.5.0', '< 2.7.0']).satisfied_by?(Gem::Version.new(RUBY_VERSION.dup))
26+
gem "toml-rb", '= 1.1.2', require: false, platforms: [:ruby]
2627
gem "puppet-module-posix-default-r#{minor_version}", '~> 0.3', require: false, platforms: [:ruby]
2728
gem "puppet-module-posix-dev-r#{minor_version}", '~> 0.3', require: false, platforms: [:ruby]
2829
gem "puppet-module-win-default-r#{minor_version}", '~> 0.3', require: false, platforms: [:mswin, :mingw, :x64_mingw]

manifests/init.pp

+14-9
Original file line numberDiff line numberDiff line change
@@ -86,16 +86,19 @@
8686
# Defaults to `[$trusted['certname']]`
8787
#
8888
# @param influxdb_urls
89-
# The string for telegraf's config defining where influxdb is
89+
# An array for telegraf's config defining where influxdb instances are
9090
#
9191
# @param telegraf_db_name
9292
# The database in influxdb where telefraf metrics are stored
9393
#
9494
# @param telegraf_agent_interval
95-
# How often the telefraf agent queries for metrics
95+
# How often the telefraf agent queries for metrics. Defaults to "5s"
9696
#
9797
# @param http_response_timeout
98-
# How long to wait for the queries by telegraf to finish before giving up
98+
# How long to wait for the queries by telegraf to finish before giving up. Defaults to "5s"
99+
#
100+
# @param pg_query_interval
101+
# How often postgres queries will run when monitoring a postgres host. Defaults to "10m"
99102
#
100103
# @param overwrite_dashboards
101104
# Whether to overwrite the example Grafana dashboards.
@@ -203,13 +206,15 @@
203206
Boolean $enable_telegraf = $puppet_metrics_dashboard::params::enable_telegraf,
204207
Boolean $configure_telegraf = $puppet_metrics_dashboard::params::configure_telegraf,
205208
Boolean $consume_graphite = $puppet_metrics_dashboard::params::consume_graphite,
206-
Puppet_metrics_dashboard::HostList $master_list = $puppet_metrics_dashboard::params::master_list,
207-
Puppet_metrics_dashboard::HostList $puppetdb_list = $puppet_metrics_dashboard::params::puppetdb_list,
208-
Puppet_metrics_dashboard::HostList $postgres_host_list = $puppet_metrics_dashboard::params::postgres_host_list,
209-
String $influxdb_urls = $puppet_metrics_dashboard::params::influxdb_urls,
209+
Puppet_metrics_dashboard::HostList $master_list = $puppet_metrics_dashboard::params::master_list,
210+
Puppet_metrics_dashboard::HostList $puppetdb_list = $puppet_metrics_dashboard::params::puppetdb_list,
211+
Puppet_metrics_dashboard::HostList $postgres_host_list = $puppet_metrics_dashboard::params::postgres_host_list,
212+
Puppet_metrics_dashboard::Puppetdb_metric $puppetdb_metrics = $puppet_metrics_dashboard::params::puppetdb_metrics,
213+
Array[String] $influxdb_urls = $puppet_metrics_dashboard::params::influxdb_urls,
210214
String $telegraf_db_name = $puppet_metrics_dashboard::params::telegraf_db_name,
211-
Integer[1] $telegraf_agent_interval = $puppet_metrics_dashboard::params::telegraf_agent_interval,
212-
Integer[1] $http_response_timeout = $puppet_metrics_dashboard::params::http_response_timeout,
215+
String[2] $telegraf_agent_interval = $puppet_metrics_dashboard::params::telegraf_agent_interval,
216+
String[2] $http_response_timeout = $puppet_metrics_dashboard::params::http_response_timeout,
217+
String[2] $pg_query_interval = $puppet_metrics_dashboard::params::pg_query_interval,
213218
) inherits puppet_metrics_dashboard::params {
214219
if $manage_repos {
215220
contain puppet_metrics_dashboard::repos

manifests/params.pp

+171-3
Original file line numberDiff line numberDiff line change
@@ -27,13 +27,181 @@
2727
$master_list = [$trusted['certname']]
2828
$puppetdb_list = [$trusted['certname']]
2929
$postgres_host_list = [$trusted['certname']]
30-
$influxdb_urls = "['http://localhost:8086']"
30+
$influxdb_urls = ['http://localhost:8086']
3131
$telegraf_db_name = 'telegraf'
32-
$telegraf_agent_interval = 5
33-
$http_response_timeout = 5 # this is the default value for the HTTP JSON Input
32+
$telegraf_agent_interval = '5s'
33+
$http_response_timeout = '5s' # this is the default value for the HTTP JSON Input
34+
$pg_query_interval = '10m'
3435

3536
$overwrite_dashboards_file = '/opt/puppetlabs/puppet/cache/state/overwrite_dashboards_disabled'
3637

38+
$activemq_metrics = [
39+
{ 'name' => 'amq_metrics',
40+
'url' => 'org.apache.activemq:type=Broker,brokerName=localhost,destinationType=Queue,destinationName=puppetlabs.puppetdb.commands' },
41+
]
42+
43+
$base_metrics = [
44+
{ 'name' => 'global_command-parse-time',
45+
'url' => 'puppetlabs.puppetdb.mq:name=global.command-parse-time' },
46+
{ 'name' => 'global_discarded',
47+
'url' => 'puppetlabs.puppetdb.mq:name=global.discarded' },
48+
{ 'name' => 'global_fatal',
49+
'url' => 'puppetlabs.puppetdb.mq:name=global.fatal' },
50+
{ 'name' => 'global_message-persistence-time',
51+
'url' => 'puppetlabs.puppetdb.mq:name=global.message-persistence-time' },
52+
{ 'name' => 'global_retried',
53+
'url' => 'puppetlabs.puppetdb.mq:name=global.retried' },
54+
{ 'name' => 'global_retry-counts',
55+
'url' => 'puppetlabs.puppetdb.mq:name=global.retry-counts' },
56+
{ 'name' => 'global_seen',
57+
'url' => 'puppetlabs.puppetdb.mq:name=global.seen' },
58+
{ 'name' => 'global_processed',
59+
'url' => 'puppetlabs.puppetdb.mq:name=global.processed' },
60+
{ 'name' => 'global_processing-time',
61+
'url' => 'puppetlabs.puppetdb.mq:name=global.processing-time' },
62+
]
63+
64+
$base_metrics_through_4_2 = [
65+
{ 'name' => 'global_generate-retry-message-time',
66+
'url' => 'puppetlabs.puppetdb.mq:name=global.generate-retry-message-time' },
67+
{ 'name' => 'global_retry-persistence-time',
68+
'url' => 'puppetlabs.puppetdb.mq:name=global.retry-persistence-time' },
69+
]
70+
71+
$storage_metrics = [
72+
{ 'name' => 'storage_add-edges',
73+
'url' => 'puppetlabs.puppetdb.storage:name=add-edges' },
74+
{ 'name' => 'storage_add-resources',
75+
'url' => 'puppetlabs.puppetdb.storage:name=add-resources' },
76+
{ 'name' => 'storage_catalog-hash',
77+
'url' => 'puppetlabs.puppetdb.storage:name=catalog-hash' },
78+
{ 'name' => 'storage_catalog-hash-match-time',
79+
'url' => 'puppetlabs.puppetdb.storage:name=catalog-hash-match-time' },
80+
{ 'name' => 'storage_catalog-hash-miss-time',
81+
'url' => 'puppetlabs.puppetdb.storage:name=catalog-hash-miss-time' },
82+
{ 'name' => 'storage_gc-catalogs-time',
83+
'url' => 'puppetlabs.puppetdb.storage:name=gc-catalogs-time' },
84+
{ 'name' => 'storage_gc-environments-time',
85+
'url' => 'puppetlabs.puppetdb.storage:name=gc-environments-time' },
86+
{ 'name' => 'storage_gc-fact-paths',
87+
'url' => 'puppetlabs.puppetdb.storage:name=gc-fact-paths' },
88+
{ 'name' => 'storage_gc-params-time',
89+
'url' => 'puppetlabs.puppetdb.storage:name=gc-params-time' },
90+
{ 'name' => 'storage_gc-report-statuses',
91+
'url' => 'puppetlabs.puppetdb.storage:name=gc-report-statuses' },
92+
{ 'name' => 'storage_gc-time',
93+
'url' => 'puppetlabs.puppetdb.storage:name=gc-time' },
94+
{ 'name' => 'storage_new-catalog-time',
95+
'url' => 'puppetlabs.puppetdb.storage:name=new-catalog-time' },
96+
{ 'name' => 'storage_new-catalogs',
97+
'url' => 'puppetlabs.puppetdb.storage:name=new-catalogs' },
98+
{ 'name' => 'storage_replace-catalog-time',
99+
'url' => 'puppetlabs.puppetdb.storage:name=replace-catalog-time' },
100+
{ 'name' => 'storage_replace-facts-time',
101+
'url' => 'puppetlabs.puppetdb.storage:name=replace-facts-time' },
102+
{ 'name' => 'storage_resource-hashes',
103+
'url' => 'puppetlabs.puppetdb.storage:name=resource-hashes' },
104+
{ 'name' => 'storage_store-report-time',
105+
'url' => 'puppetlabs.puppetdb.storage:name=store-report-time' },
106+
]
107+
108+
#TODO: Track these on a less frequent cadence because they are slow to run
109+
$storage_metrics_db_queries = [
110+
{ 'name' => 'storage_catalog-volitilty',
111+
'url' => 'puppetlabs.puppetdb.storage:name=catalog-volitilty' },
112+
{ 'name' => 'storage_duplicate-catalogs',
113+
'url' => 'puppetlabs.puppetdb.storage:name=duplicate-catalogs' },
114+
{ 'name' => 'storage_duplicate-pct',
115+
'url' => 'puppetlabs.puppetdb.storage:name=duplicate-pct' },
116+
]
117+
118+
$numbers = $facts['pe_server_version'] ? {
119+
/^2015.2/ => {'catalogs' => 6, 'facts' => 4, 'reports' => 6},
120+
/^2015.3/ => {'catalogs' => 7, 'facts' => 4, 'reports' => 6},
121+
/^2016.(1|2)/ => {'catalogs' => 8, 'facts' => 4, 'reports' => 7},
122+
/^2016.(4|5)/ => {'catalogs' => 9, 'facts' => 5, 'reports' => 8},
123+
/^2017.(1|2)/ => {'catalogs' => 9, 'facts' => 5, 'reports' => 8},
124+
default => {'catalogs' => 9, 'facts' => 5, 'reports' => 8},
125+
}
126+
127+
$version_specific_metrics = [
128+
{ 'name' => 'mq_replace_catalog_retried',
129+
'url' => "puppetlabs.puppetdb.mq:name=replace catalog.${numbers['catalogs']}.retried" },
130+
{ 'name' => 'mq_replace_catalog_retry-counts',
131+
'url' => "puppetlabs.puppetdb.mq:name=replace catalog.${numbers['catalogs']}.retry-counts" },
132+
{ 'name' => 'mq_replace_facts_retried',
133+
'url' => "puppetlabs.puppetdb.mq:name=replace facts.${numbers['facts']}.retried" },
134+
{ 'name' => 'mq_replace_facts_retry-counts',
135+
'url' => "puppetlabs.puppetdb.mq:name=replace facts.${numbers['facts']}.retry-counts" },
136+
{ 'name' => 'mq_store_report_retried',
137+
'url' => "puppetlabs.puppetdb.mq:name=store report.${numbers['reports']}.retried" },
138+
{ 'name' => 'mq_store_reports_retry-counts',
139+
'url' => "puppetlabs.puppetdb.mq:name=store report.${numbers['reports']}.retry-counts" },
140+
]
141+
142+
$connection_pool_metrics = [
143+
{ 'name' => 'PDBReadPool_pool_ActiveConnections',
144+
'url' => 'puppetlabs.puppetdb.database:name=PDBReadPool.pool.ActiveConnections' },
145+
{ 'name' => 'PDBReadPool_pool_IdleConnections',
146+
'url' => 'puppetlabs.puppetdb.database:name=PDBReadPool.pool.IdleConnections' },
147+
{ 'name' => 'PDBReadPool_pool_PendingConnections',
148+
'url' => 'puppetlabs.puppetdb.database:name=PDBReadPool.pool.PendingConnections' },
149+
{ 'name' => 'PDBReadPool_pool_TotalConnections',
150+
'url' => 'puppetlabs.puppetdb.database:name=PDBReadPool.pool.TotalConnections' },
151+
{ 'name' => 'PDBReadPool_pool_Usage',
152+
'url' => 'puppetlabs.puppetdb.database:name=PDBReadPool.pool.Usage' },
153+
{ 'name' => 'PDBReadPool_pool_Wait',
154+
'url' => 'puppetlabs.puppetdb.database:name=PDBReadPool.pool.Wait' },
155+
{ 'name' => 'PDBWritePool_pool_ActiveConnections',
156+
'url' => 'puppetlabs.puppetdb.database:name=PDBWritePool.pool.ActiveConnections' },
157+
{ 'name' => 'PDBWritePool_pool_IdleConnections',
158+
'url' => 'puppetlabs.puppetdb.database:name=PDBWritePool.pool.IdleConnections' },
159+
{ 'name' => 'PDBWritePool_pool_PendingConnections',
160+
'url' => 'puppetlabs.puppetdb.database:name=PDBWritePool.pool.PendingConnections' },
161+
{ 'name' => 'PDBWritePool_pool_TotalConnections',
162+
'url' => 'puppetlabs.puppetdb.database:name=PDBWritePool.pool.TotalConnections' },
163+
{ 'name' => 'PDBWritePool_pool_Usage',
164+
'url' => 'puppetlabs.puppetdb.database:name=PDBWritePool.pool.Usage' },
165+
{ 'name' => 'PDBWritePool_pool_Wait',
166+
'url' => 'puppetlabs.puppetdb.database:name=PDBWritePool.pool.Wait' },
167+
]
168+
169+
$ha_sync_metrics = [
170+
{ 'name' => 'ha_last-sync-succeeded',
171+
'url' => 'puppetlabs.puppetdb.ha:name=last-sync-succeeded' },
172+
{ 'name' => 'ha_seconds-since-last-successful-sync',
173+
'url' => 'puppetlabs.puppetdb.ha:name=seconds-since-last-successful-sync' },
174+
{ 'name' => 'ha_failed-request-counter',
175+
'url' => 'puppetlabs.puppetdb.ha:name=failed-request-counter' },
176+
{ 'name' => 'ha_sync-duration',
177+
'url' => 'puppetlabs.puppetdb.ha:name=sync-duration' },
178+
{ 'name' => 'ha_catalogs-sync-duration',
179+
'url' => 'puppetlabs.puppetdb.ha:name=catalogs-sync-duration' },
180+
{ 'name' => 'ha_reports-sync-duration',
181+
'url' => 'puppetlabs.puppetdb.ha:name=reports-sync-duration' },
182+
{ 'name' => 'ha_factsets-sync-duration',
183+
'url' => 'puppetlabs.puppetdb.ha:name=factsets-sync-duration' },
184+
{ 'name' => 'ha_nodes-sync-duration',
185+
'url' => 'puppetlabs.puppetdb.ha:name=nodes-sync-duration' },
186+
{ 'name' => 'ha_record-transfer-duration',
187+
'url' => 'puppetlabs.puppetdb.ha:name=record-transfer-duration' },
188+
]
189+
190+
# lint:ignore:140chars
191+
$puppetdb_metrics = $facts['pe_server_version'] ? {
192+
/^2015./ =>
193+
$activemq_metrics,
194+
/^2016\.[45]\./ =>
195+
$activemq_metrics + $base_metrics + $base_metrics_through_4_2 + $storage_metrics + $connection_pool_metrics + $version_specific_metrics + $ha_sync_metrics,
196+
/^2016./ =>
197+
$activemq_metrics + $base_metrics + $base_metrics_through_4_2 + $storage_metrics + $connection_pool_metrics + $version_specific_metrics,
198+
/^201[78]\./ =>
199+
$activemq_metrics + $base_metrics + $storage_metrics + $connection_pool_metrics + $version_specific_metrics + $ha_sync_metrics,
200+
default =>
201+
$base_metrics + $storage_metrics + $connection_pool_metrics + $version_specific_metrics,
202+
}
203+
# lint:endignore
204+
37205
case $facts['os']['family'] {
38206
'RedHat': {
39207
$influx_db_service_name = 'influxdb'

manifests/profile/compiler.pp

+41
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
# @summary Apply this class to a master or compiler to collect puppetserver metrics
2+
#
3+
# @param timeout
4+
# Deafault timeout of http calls. Defaults to 5 seconds
5+
#
6+
# @param compiler
7+
# The FQDN of the compiler / master. Defaults to the FQDN of the server where the profile is applied
8+
#
9+
define puppet_metrics_dashboard::profile::compiler (
10+
String[2] $timeout = $puppet_metrics_dashboard::params::http_response_timeout,
11+
Variant[String,Tuple[String, Integer]] $compiler = $facts['networking']['fqdn'],
12+
Integer[1] $port = 8140,
13+
String[2] $interval = '5s',
14+
){
15+
16+
telegraf::input { "puppetserver_metrics_${compiler}":
17+
plugin_type => 'httpjson',
18+
options => [{
19+
'name' => 'puppet_stats',
20+
'servers' => [ "https://${compiler}:${port}/status/v1/services?level=debug" ],
21+
'method' => 'GET',
22+
'insecure_skip_verify' => true,
23+
'response_timeout' => $timeout,
24+
}],
25+
notify => Service['telegraf'],
26+
require => Package['telegraf'],
27+
}
28+
29+
telegraf::input { "pe_last_file_sync_${compiler}":
30+
plugin_type => 'http',
31+
options => [{
32+
'urls' => [ "https://${compiler}:${port}/status/v1/services/file-sync-client-service?level=debug" ],
33+
'insecure_skip_verify' => true,
34+
'data_format' => 'json',
35+
'json_string_fields' => ['status_repos_puppet-code_latest_commit_date'],
36+
'timeout' => $timeout,
37+
}],
38+
notify => Service['telegraf'],
39+
require => Package['telegraf'],
40+
}
41+
}

manifests/profile/master/postgres.pp

+54
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
# @summary Aplly this class to an agent running pe-postgresql to collect postgres metrics
2+
#
3+
# @param query_interval
4+
# How often to run the queries in minutes. Defaults to 10 minutes.
5+
#
6+
# @param postgres_host
7+
# The FQDN of the postgres host. Defaults to the FQDN of the server where the profile is applied
8+
#
9+
define puppet_metrics_dashboard::profile::master::postgres (
10+
Variant[String,Tuple[String, Integer]] $postgres_host = $facts['networking']['fqdn'],
11+
String[2] $query_interval = $puppet_metrics_dashboard::params::pg_query_interval,
12+
Integer[1] $port = 5432,
13+
){
14+
15+
if ! defined(Puppet_metrics_dashboard::Certs['telegraf']) {
16+
puppet_metrics_dashboard::certs{'telegraf':
17+
notify => Service['telegraf'],
18+
require => Package['telegraf'],
19+
before => Service['telegraf'],
20+
}
21+
}
22+
23+
telegraf::input { "pe_postgres_${postgres_host}":
24+
plugin_type => 'postgresql_extensible',
25+
options => [{
26+
'interval' => $query_interval,
27+
'address' => "postgres://telegraf@${postgres_host}:${port}/pe-puppetdb?sslmode=require&sslkey=/etc/telegraf/${trusted['certname']}_key.pem&sslcert=/etc/telegraf/${trusted['certname']}_cert.pem&sslrootcert=/etc/telegraf/ca.pem",
28+
'outputaddress' => $facts['networking']['fqdn'],
29+
'databases' => ['pe-puppetdb','pe-rbac','pe-activity','pe-classifier'],
30+
'query' => [{
31+
'sqlquery' => 'SELECT * FROM pg_stat_database',
32+
'version' => 901,
33+
'withdbname' => false,
34+
},{
35+
'sqlquery' => 'SELECT relname as s_table, pg_relation_size(relid) as size FROM pg_catalog.pg_statio_user_tables ORDER BY pg_total_relation_size(relid) DESC',
36+
'version' => 901,
37+
'withdbname' => false,
38+
'tagvalue' => 's_table',
39+
},{
40+
'sqlquery' => 'SELECT relname as v_table, autovacuum_count, vacuum_count, n_live_tup, n_dead_tup FROM pg_stat_user_tables',
41+
'version' => 901,
42+
'withdbname' => false,
43+
'tagvalue' => 'v_table',
44+
},{
45+
'sqlquery' => 'SELECT relname as io_table, heap_blks_read, heap_blks_hit, idx_blks_read, idx_blks_hit, toast_blks_read, toast_blks_hit, tidx_blks_read, tidx_blks_hit FROM pg_statio_user_tables',
46+
'version' => 901,
47+
'withdbname' => false,
48+
'tagvalue' => 'io_table',
49+
}]
50+
}],
51+
notify => Service['telegraf'],
52+
require => Package['telegraf'],
53+
}
54+
}

0 commit comments

Comments
 (0)