From 8e591212e491eb27e0c17f5cf5951889bd83ec87 Mon Sep 17 00:00:00 2001 From: Dimitri Tischenko Date: Fri, 23 Sep 2022 13:32:49 +0200 Subject: [PATCH 01/46] code layout --- functions/assert_supported_architecture.pp | 32 ++++----- functions/assert_supported_bolt_version.pp | 4 +- functions/assert_supported_pe_version.pp | 6 +- functions/convert_status.pp | 11 ++- functions/determine_status.pp | 24 +++---- functions/file_or_content.pp | 2 - functions/get_targets.pp | 4 +- plans/add_compiler.pp | 29 ++++---- plans/add_database.pp | 84 +++++++++++----------- plans/install.pp | 3 +- types/ldap_config.pp | 46 ++++++------ 11 files changed, 118 insertions(+), 127 deletions(-) diff --git a/functions/assert_supported_architecture.pp b/functions/assert_supported_architecture.pp index 79739bcc..3d426227 100644 --- a/functions/assert_supported_architecture.pp +++ b/functions/assert_supported_architecture.pp @@ -13,26 +13,26 @@ function peadm::assert_supported_architecture ( !!($replica_postgresql_host), ] { [true, false, false, false]: { # Standard or Large, no DR - ({ 'disaster-recovery' => false, 'architecture' => $compiler_hosts ? { - undef => 'standard', - default => 'large', - }}) + ( { 'disaster-recovery' => false, 'architecture' => $compiler_hosts ? { + undef => 'standard', + default => 'large', + } }) } - [true, true, false, false]: { # Standard or Large, DR - ({ 'disaster-recovery' => true, 'architecture' => $compiler_hosts ? { - undef => 'standard', - default => 'large', - }}) + [true, true, false, false]: { # Standard or Large, DR + ( { 'disaster-recovery' => true, 'architecture' => $compiler_hosts ? { + undef => 'standard', + default => 'large', + } }) } - [true, false, true, false]: { # Extra Large, no DR - ({ 'disaster-recovery' => false, 'architecture' => 'extra-large' }) + [true, false, true, false]: { # Extra Large, no DR + ( { 'disaster-recovery' => false, 'architecture' => 'extra-large' }) } - [true, true, true, true]: { # Extra Large, DR - ({ 'disaster-recovery' => true, 'architecture' => 'extra-large' }) + [true, true, true, true]: { # Extra Large, DR + ( { 'disaster-recovery' => true, 'architecture' => 'extra-large' }) } - default: { # Invalid + default: { # Invalid out::message(inline_epp(@(HEREDOC))) - Invalid architecture! Recieved: + Invalid architecture! Recieved: - primary <% if $replica_host { -%> - primary-replica @@ -77,5 +77,5 @@ function peadm::assert_supported_architecture ( } # Return value - return({ 'supported' => true } + $result) + return( { 'supported' => true } + $result) } diff --git a/functions/assert_supported_bolt_version.pp b/functions/assert_supported_bolt_version.pp index a138ef50..e4ba92c9 100644 --- a/functions/assert_supported_bolt_version.pp +++ b/functions/assert_supported_bolt_version.pp @@ -5,7 +5,7 @@ # Accepts a parameter for the $supported_bolt_version for unit testing purposes function peadm::assert_supported_bolt_version ( # No arguments -) >> Struct[{'supported' => Boolean}] { +) >> Struct[{ 'supported' => Boolean }] { $supported_bolt_version = '>= 3.17.0 < 4.0.0' $supported = (peadm::bolt_version() =~ SemVerRange($supported_bolt_version)) @@ -20,5 +20,5 @@ function peadm::assert_supported_bolt_version ( | REASON } - return({ 'supported' => $supported }) + return( { 'supported' => $supported }) } diff --git a/functions/assert_supported_pe_version.pp b/functions/assert_supported_pe_version.pp index 38cb9038..071ea850 100644 --- a/functions/assert_supported_pe_version.pp +++ b/functions/assert_supported_pe_version.pp @@ -11,14 +11,14 @@ function peadm::assert_supported_pe_version ( if $permit_unsafe_versions { warning(@("WARN"/L)) - WARNING: Permitting unsafe PE versions. This is not supported or tested. + WARNING: Permitting unsafe PE versions. This is not supported or tested. Proceeding with this action could result in a broken PE Infrastructure. | WARN } if (!$supported and $permit_unsafe_versions) { warning(@("WARN"/L)) - WARNING: PE version ${version} is NOT SUPPORTED! + WARNING: PE version ${version} is NOT SUPPORTED! | WARN } elsif (!$supported) { @@ -34,5 +34,5 @@ function peadm::assert_supported_pe_version ( | REASON } - return({ 'supported' => $supported }) + return( { 'supported' => $supported }) } diff --git a/functions/convert_status.pp b/functions/convert_status.pp index 27f7322f..70c02331 100644 --- a/functions/convert_status.pp +++ b/functions/convert_status.pp @@ -1,4 +1,3 @@ - # @summary Transforms a value in a human readable status with or without colors # @param status A value of true, false, degraded, or an Integer that represents number of non operationally services # If using an integer, you must also supply the total amount of services @@ -19,7 +18,7 @@ function peadm::convert_status( Variant[String,Boolean, Integer] $status, Optional[Integer] $total = 0, Optional[Boolean] $use_colors = true - ) >> String { +) >> String { if $status =~ Integer { if ( $status < 1 ) { $result = 'operational' @@ -30,10 +29,10 @@ function peadm::convert_status( } } else { $result = $status ? { - true => 'operational', - false => 'failed', - /degraded/ => 'degraded', - default => 'unknown' + true => 'operational', + false => 'failed', + /degraded/ => 'degraded', + default => 'unknown' } } if $use_colors { diff --git a/functions/determine_status.pp b/functions/determine_status.pp index 38551314..71a687ba 100644 --- a/functions/determine_status.pp +++ b/functions/determine_status.pp @@ -47,16 +47,16 @@ # } function peadm::determine_status(Array $status_data, Boolean $use_colors = true) >> Hash { # convert the data into a hash with the sevice names as the keys - $hash_data = $status_data.reduce({}) | $res, $data | { - $res.merge({$data[service] => $data }) - } - $out = $hash_data.reduce({}) | $res, $svc_data | { - $service_name = $svc_data[0] - $server = $svc_data[1][server] - $res.merge("${service_name}/${$server}" => $svc_data[1][state] == 'running') - } - $bad_status = $out.filter | $item | { ! $item[1] } - $passed_status = $out.filter | $item | { $item[1] } - $overall_status = peadm::convert_status($bad_status.count, $out.count, $use_colors) - return { status => $overall_status, state => $out, failed => $bad_status, passed => $passed_status } + $hash_data = $status_data.reduce( {}) | $res, $data | { + $res.merge( { $data[service] => $data }) + } + $out = $hash_data.reduce( {}) | $res, $svc_data | { + $service_name = $svc_data[0] + $server = $svc_data[1][server] + $res.merge("${service_name}/${$server}" => $svc_data[1][state] == 'running') + } + $bad_status = $out.filter | $item | { ! $item[1] } + $passed_status = $out.filter | $item | { $item[1] } + $overall_status = peadm::convert_status($bad_status.count, $out.count, $use_colors) + return { status => $overall_status, state => $out, failed => $bad_status, passed => $passed_status } } diff --git a/functions/file_or_content.pp b/functions/file_or_content.pp index ed5be20a..e7e35b0d 100644 --- a/functions/file_or_content.pp +++ b/functions/file_or_content.pp @@ -3,7 +3,6 @@ function peadm::file_or_content( Variant[String, Undef] $file, Variant[String, Undef] $content, ) { - $value = [ $file, $content, @@ -15,5 +14,4 @@ function peadm::file_or_content( undef => $content, # content supplied directly, use as-is }, } - } diff --git a/functions/get_targets.pp b/functions/get_targets.pp index 2e34239e..98fb61e8 100644 --- a/functions/get_targets.pp +++ b/functions/get_targets.pp @@ -10,8 +10,8 @@ function peadm::get_targets( # $count is 1, return the result of get_target() in an array. If $count is # undef, return get_targets(). case $spec { - Undef, [ ]: { - [ ] # Return empty array + Undef, []: { + [] # Return empty array } default: { $count ? { diff --git a/plans/add_compiler.pp b/plans/add_compiler.pp index 023cce62..7e2fbece 100644 --- a/plans/add_compiler.pp +++ b/plans/add_compiler.pp @@ -33,8 +33,8 @@ # Stop puppet.service run_command('systemctl stop puppet.service', peadm::flatten_compact([ - $primary_postgresql_target, - $replica_puppetdb_target + $primary_postgresql_target, + $replica_puppetdb_target, ])) apply($replica_puppetdb_target) { @@ -78,11 +78,11 @@ # Check for and merge csr_attributes. run_plan('peadm::util::insert_csr_extension_requests', $compiler_target, - extension_requests => { - peadm::oid('pp_auth_role') => 'pe_compiler', - peadm::oid('peadm_availability_group') => $avail_group_letter - } - ) + extension_requests => { + peadm::oid('pp_auth_role') => 'pe_compiler', + peadm::oid('peadm_availability_group') => $avail_group_letter, + } + ) # we first assume that there is no agent installed on the node. If there is, nothing will happen. run_task('peadm::agent_install', $compiler_target, @@ -95,10 +95,10 @@ # If necessary, manually submit a CSR # ignoring errors to simplify logic - run_task('peadm::submit_csr', $compiler_target, {'_catch_errors' => true}) + run_task('peadm::submit_csr', $compiler_target, { '_catch_errors' => true }) # On primary, if necessary, sign the certificate request - run_task('peadm::sign_csr', $primary_target, { 'certnames' => [$compiler_target.peadm::certname()] } ) + run_task('peadm::sign_csr', $primary_target, { 'certnames' => [$compiler_target.peadm::certname()] }) # If there was already a signed cert, force the certificate extensions we want # TODO: update peadm::util::add_cert_extensions to take care of dns alt names @@ -121,17 +121,16 @@ # On run the puppet agent run_task('peadm::puppet_runonce', peadm::flatten_compact([ - $primary_postgresql_target, - $replica_puppetdb_target + $primary_postgresql_target, + $replica_puppetdb_target, ])) # On start puppet.service run_command('systemctl start puppet.service', peadm::flatten_compact([ - $primary_postgresql_target, - $replica_puppetdb_target, - $compiler_target, + $primary_postgresql_target, + $replica_puppetdb_target, + $compiler_target, ])) return("Adding or replacing compiler ${$compiler_target.peadm::certname()} succeeded.") - } diff --git a/plans/add_database.pp b/plans/add_database.pp index 3527c2eb..f1986591 100644 --- a/plans/add_database.pp +++ b/plans/add_database.pp @@ -3,14 +3,13 @@ Peadm::SingleTargetSpec $primary_host, Optional[Enum['init', 'pair']] $mode = undef, Optional[Enum[ - 'init-db-node', - 'replicate-db', - 'update-classification', - 'update-db-settings', - 'cleanup-db', - 'finalize']] $begin_at_step = undef, + 'init-db-node', + 'replicate-db', + 'update-classification', + 'update-db-settings', + 'cleanup-db', + 'finalize']] $begin_at_step = undef, ) { - $primary_target = peadm::get_targets($primary_host, 1) $postgresql_target = peadm::get_targets($targets, 1) @@ -44,8 +43,8 @@ } else { # If array is empty then no external databases were previously configured $no_external_db = peadm::flatten_compact([ - $postgresql_a_host, - $postgresql_b_host + $postgresql_a_host, + $postgresql_b_host, ]).empty # Pick operating mode based on array check @@ -60,9 +59,9 @@ if $operating_mode == 'init' { # If no other PSQL node then match primary group letter $avail_group_letter = peadm::flatten_compact($roles['server'].map |$k,$v| { - if $v == $primary_host { - $k - } + if $v == $primary_host { + $k + } })[0] # Assume PuppetDB backend hosted on Primary if in init mode $source_db_host = $primary_host @@ -70,14 +69,14 @@ # The letter which doesn't yet have a server assigned or in the event this # is a replacement operation, the letter this node was assigned to previously $avail_group_letter = peadm::flatten_compact($roles['postgresql'].map |$k,$v| { - if (! $v) or ($v == $postgresql_host) { - $k - } + if (! $v) or ($v == $postgresql_host) { + $k + } })[0] # When in pair mode we assume the other PSQL node will serve as our source $source_db_host = peadm::flatten_compact([ - $postgresql_a_host, - $postgresql_b_host + $postgresql_a_host, + $postgresql_b_host, ]).reject($postgresql_host)[0] } @@ -98,11 +97,11 @@ # Stop Puppet to ensure catalogs are not being compiled for PE infrastructure nodes run_command('systemctl stop puppet.service', peadm::flatten_compact([ - $postgresql_target, - $compilers, - $primary_target, - $replica_target, - $source_db_target + $postgresql_target, + $compilers, + $primary_target, + $replica_target, + $source_db_target, ])) # Stop frontend compiler services that causes changes to PuppetDB backend when @@ -120,7 +119,6 @@ # Update classification and database.ini settings, assume a replica PSQL # does not exist peadm::plan_step('update-classification') || { - # To ensure everything is functional when a replica exists but only a single # PostgreSQL node has been deployed, configure alternate availability group # to connect to other group's new node @@ -145,23 +143,22 @@ peadm::plan_step('update-db-settings') || { run_plan('peadm::util::update_db_setting', peadm::flatten_compact([ - $compilers, - $primary_target, - $replica_target - ]), + $compilers, + $primary_target, + $replica_target, + ]), postgresql_host => $postgresql_host, peadm_config => $peadm_config ) # (Re-)Start PuppetDB now that we are done making modifications run_command('systemctl restart pe-puppetdb.service', peadm::flatten_compact([ - $primary_target, - $replica_target + $primary_target, + $replica_target, ])) } peadm::plan_step('cleanup-db') || { - if $operating_mode == 'init' { # Clean up old puppetdb database on primary and those which were copied to # new host. @@ -170,7 +167,7 @@ 'pe-classifier', 'pe-inventory', 'pe-orchestrator', - 'pe-rbac' + 'pe-rbac', ] # If a primary replica exists then pglogical is enabled and will prevent @@ -181,9 +178,9 @@ # Clean up old databases $clean_source = peadm::flatten_compact([ - $source_db_target, - $primary_target, - $replica_target + $source_db_target, + $primary_target, + $replica_target, ]) run_plan('peadm::util::db_purge', $clean_source, databases => ['pe-puppetdb']) @@ -197,23 +194,22 @@ # agents run_command('systemctl start pe-puppetserver.service pe-puppetdb.service', $compilers) - peadm::plan_step('finalize') || { # Run Puppet to sweep up but no restarts should occur so do them in parallel run_task('peadm::puppet_runonce', peadm::flatten_compact([ - $postgresql_target, - $primary_target, - $compilers, - $replica_target + $postgresql_target, + $primary_target, + $compilers, + $replica_target, ])) # Start Puppet agent run_command('systemctl start puppet.service', peadm::flatten_compact([ - $postgresql_target, - $compilers, - $primary_target, - $replica_target, - $source_db_target + $postgresql_target, + $compilers, + $primary_target, + $replica_target, + $source_db_target, ])) } } diff --git a/plans/install.pp b/plans/install.pp index 2a77672c..9393d3cb 100644 --- a/plans/install.pp +++ b/plans/install.pp @@ -42,7 +42,7 @@ Optional[String] $compiler_pool_address = undef, Optional[String] $internal_compiler_a_pool_address = undef, Optional[String] $internal_compiler_b_pool_address = undef, - Optional[Hash] $pe_conf_data = { }, + Optional[Hash] $pe_conf_data = {}, Optional[Peadm::Ldap_config] $ldap_config = undef, # Code Manager @@ -126,4 +126,3 @@ # Return a string banner reporting on what was done return([$install_result, $configure_result]) } - diff --git a/types/ldap_config.pp b/types/ldap_config.pp index ab75d005..924ec128 100644 --- a/types/ldap_config.pp +++ b/types/ldap_config.pp @@ -1,25 +1,25 @@ type Peadm::Ldap_config = Struct[{ - base_dn => String, - connect_timeout => Integer, - disable_ldap_matching_rule_in_chain => Boolean, - display_name => String, - group_lookup_attr => String, - group_member_attr => String, - group_name_attr => String, - group_object_class => String, - Optional[group_rdn] => Optional[String], - Optional[help_link] => Optional[String], - hostname => String, - Optional[login] => Optional[String], - Optional[password] => Optional[String], - port => Integer, - search_nested_groups => Boolean, - ssl => Boolean, - ssl_hostname_validation => Boolean, - ssl_wildcard_validation => Boolean, - start_tls => Boolean, - user_display_name_attr => String, - user_email_attr => String, - user_lookup_attr => String, - Optional[user_rdn] => Optional[String], + base_dn => String, + connect_timeout => Integer, + disable_ldap_matching_rule_in_chain => Boolean, + display_name => String, + group_lookup_attr => String, + group_member_attr => String, + group_name_attr => String, + group_object_class => String, + Optional[group_rdn] => Optional[String], + Optional[help_link] => Optional[String], + hostname => String, + Optional[login] => Optional[String], + Optional[password] => Optional[String], + port => Integer, + search_nested_groups => Boolean, + ssl => Boolean, + ssl_hostname_validation => Boolean, + ssl_wildcard_validation => Boolean, + start_tls => Boolean, + user_display_name_attr => String, + user_email_attr => String, + user_lookup_attr => String, + Optional[user_rdn] => Optional[String], }] From 5827944f821f0dea67debac689dfb38b710bb8f7 Mon Sep 17 00:00:00 2001 From: Dimitri Tischenko Date: Fri, 23 Sep 2022 13:42:06 +0200 Subject: [PATCH 02/46] code layout --- plans/status.pp | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/plans/status.pp b/plans/status.pp index ceaee2f0..ba24e982 100644 --- a/plans/status.pp +++ b/plans/status.pp @@ -12,17 +12,17 @@ Enum[json,table] $format = 'table', Boolean $verbose = false, Boolean $summarize = true, - Boolean $colors = $format ? { json => false, default => true } + Boolean $colors = $format ? { 'json' => false, default => true } ) { peadm::assert_supported_bolt_version() - $results = run_task('peadm::infrastatus', $targets, { format => 'json'}) + $results = run_task('peadm::infrastatus', $targets, { format => 'json' }) # returns the data in a hash - $stack_status = $results.reduce({}) | $res, $item | { + $stack_status = $results.reduce( {}) | $res, $item | { $data = $item.value[output] $stack_name = $item.target.peadm::certname() $status = peadm::determine_status($data, $colors).merge(stack_name => $stack_name ) - $res.merge({ $stack_name => $status }) + $res.merge( { $stack_name => $status }) } $overall_degraded_stacks = $stack_status.filter | $item | { $item[1][status] =~ /degraded/ } @@ -62,27 +62,27 @@ if $format == 'table' { # Summary table out::message( - format::table({ - title => $table_title, - head => $table_head, - rows => $stack_table_rows})) + format::table( { + title => $table_title, + head => $table_head, + rows => $stack_table_rows })) # Failed services table unless $bad_svc_rows.empty { out::message( - format::table({ - title => 'Failed Service Status', - head => $service_table_head, - rows => $bad_svc_rows.reduce([]) |$memo,$rows| { $memo + $rows }})) + format::table( { + title => 'Failed Service Status', + head => $service_table_head, + rows => $bad_svc_rows.reduce([]) |$memo,$rows| { $memo + $rows } })) } # Operational services table if $verbose and ! $good_svc_rows.empty { out::message( - format::table({ - title => 'Operational Service Status', - head => $service_table_head, - rows => $good_svc_rows.reduce([]) |$memo,$rows| { $memo + $rows }})) + format::table( { + title => 'Operational Service Status', + head => $service_table_head, + rows => $good_svc_rows.reduce([]) |$memo,$rows| { $memo + $rows } })) } } else { if $summarize { @@ -91,10 +91,10 @@ $summary_json = { 'summary' => { 'status' => $overall_status, - 'stacks' => $stack_table_rows.hash + 'stacks' => $stack_table_rows.hash, }, 'failed' => $failed, - 'operational' => $passed + 'operational' => $passed, } return $summary_json } else { From 6b25eba8a16f07dcb219d30fb962a774f2ae2a70 Mon Sep 17 00:00:00 2001 From: Dimitri Tischenko Date: Fri, 23 Sep 2022 13:43:12 +0200 Subject: [PATCH 03/46] typo --- functions/assert_supported_architecture.pp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/functions/assert_supported_architecture.pp b/functions/assert_supported_architecture.pp index 3d426227..c267bc95 100644 --- a/functions/assert_supported_architecture.pp +++ b/functions/assert_supported_architecture.pp @@ -32,7 +32,7 @@ function peadm::assert_supported_architecture ( } default: { # Invalid out::message(inline_epp(@(HEREDOC))) - Invalid architecture! Recieved: + Invalid architecture! Received: - primary <% if $replica_host { -%> - primary-replica From f43e363c30572debc91048e8e1fcfb99f3a6fac2 Mon Sep 17 00:00:00 2001 From: Dimitri Tischenko Date: Tue, 27 Sep 2022 14:50:41 +0200 Subject: [PATCH 04/46] docs and formatting --- plans/add_replica.pp | 22 +++---- .../plans/provision_test_cluster.pp | 60 ++++++++++--------- 2 files changed, 42 insertions(+), 40 deletions(-) diff --git a/plans/add_replica.pp b/plans/add_replica.pp index b30e581e..c8b95d33 100644 --- a/plans/add_replica.pp +++ b/plans/add_replica.pp @@ -5,10 +5,11 @@ # @summary Replace a replica host for a Standard or Large architecture. # Supported use cases: # 1: The existing replica is broken, we have a fresh new VM we want to provision the replica to. -# The new replica should have the same certname as the broken one. # @param primary_host - The hostname and certname of the primary Puppet server # @param replica_host - The hostname and certname of the replica VM -# @param replica_postgresql_host - The hostname and certname of the host with the replica PE-PosgreSQL database. +# @param replica_postgresql_host - The hostname and certname of the host with the replica PE-PosgreSQL database. +# @param token_file - (optional) the token file in a different location than the default. +# # Can be a separate host in an XL architecture, or undef in Standard or Large. plan peadm::add_replica( # Standard or Large @@ -21,14 +22,13 @@ # Common Configuration Optional[String] $token_file = undef, ) { - $primary_target = peadm::get_targets($primary_host, 1) $replica_target = peadm::get_targets($replica_host, 1) $replica_postgresql_target = peadm::get_targets($replica_postgresql_host, 1) run_command('systemctl stop puppet.service', peadm::flatten_compact([ - $primary_target, - $replica_postgresql_target, + $primary_target, + $replica_postgresql_target, ])) # Get current peadm config to ensure we forget active replicas @@ -36,8 +36,8 @@ # Make list of all possible replicas, configured and provided $replicas = peadm::flatten_compact([ - $replica_host, - $peadm_config['params']['replica_host'] + $replica_host, + $peadm_config['params']['replica_host'], ]).unique $certdata = run_task('peadm::cert_data', $primary_target).first.value @@ -103,7 +103,7 @@ '/etc/puppetlabs/orchestration-services/conf.d/secrets/keys.json', '/etc/puppetlabs/orchestration-services/conf.d/secrets/orchestrator-encryption-keys.json', '/etc/puppetlabs/console-services/conf.d/secrets/keys.json', - '/etc/puppetlabs/puppet/hiera.yaml' + '/etc/puppetlabs/puppet/hiera.yaml', ] parallelize($content_sources) |$path| { run_plan('peadm::util::copy_file', $replica_target, @@ -125,9 +125,9 @@ # start puppet service run_command('systemctl start puppet.service', peadm::flatten_compact([ - $primary_target, - $replica_postgresql_target, - $replica_target + $primary_target, + $replica_postgresql_target, + $replica_target, ])) return("Added replica ${replica_target}") diff --git a/spec/acceptance/peadm_spec/plans/provision_test_cluster.pp b/spec/acceptance/peadm_spec/plans/provision_test_cluster.pp index 5dc5f970..9494c533 100644 --- a/spec/acceptance/peadm_spec/plans/provision_test_cluster.pp +++ b/spec/acceptance/peadm_spec/plans/provision_test_cluster.pp @@ -3,41 +3,43 @@ $architecture, $image, ) { - $nodes = case $architecture { - 'standard': { - ['primary'] - } - 'standard-with-dr': { - ['primary', 'replica'] - } - 'large': { - ['primary', 'compiler'] - } - 'large-with-dr': { - ['primary', 'compiler', - 'replica', 'compiler'] - } - 'extra-large': { - ['primary', 'primary-pdb-postgresql', 'compiler'] - } - 'extra-large-with-dr': { - ['primary', 'primary-pdb-postgresql', 'compiler', - 'replica', 'replica-pdb-postgresql', 'compiler'] - } + 'standard': { + ['primary'] + } + 'standard-with-dr': { + ['primary', 'replica'] + } + 'large': { + ['primary', 'compiler'] + } + 'large-with-dr': { + ['primary', 'compiler', + 'replica', 'compiler'] } + 'extra-large': { + ['primary', 'primary-pdb-postgresql', 'compiler'] + } + 'extra-large-with-dr': { + ['primary', 'primary-pdb-postgresql', 'compiler', + 'replica', 'replica-pdb-postgresql', 'compiler'] + } + default: { + fail_plan("Unknown architecture: ${architecture}") + } + } $provision_results = # This SHOULD be `parallelize() || {}`. However, provision::* is entirely - # side-effect based, and not at all parallel-safe. - $nodes.each |$role| { - run_task("provision::${provider}", 'localhost', - action => 'provision', - platform => $image, - vars => "role: ${role}" - ) - } + # side-effect based, and not at all parallel-safe. + $nodes.each |$role| { + run_task("provision::${provider}", 'localhost', + action => 'provision', + platform => $image, + vars => "role: ${role}" + ) + } return($provision_results) } From 93a212ec722cee8fdc823dcf3abb6051f35a8b9f Mon Sep 17 00:00:00 2001 From: Dimitri Tischenko Date: Wed, 28 Sep 2022 13:56:14 +0200 Subject: [PATCH 05/46] formatting --- plans/backup.pp | 14 +-- plans/convert.pp | 22 ++--- plans/subplans/component_install.pp | 2 +- plans/subplans/configure.pp | 28 +++--- plans/subplans/install.pp | 88 +++++++++---------- plans/subplans/prepare_agent.pp | 7 +- .../peadm_spec/plans/add_replica.pp | 4 +- .../peadm_spec/plans/install_test_cluster.pp | 29 +++--- .../peadm_spec/plans/upgrade_test_cluster.pp | 31 ++++--- 9 files changed, 110 insertions(+), 115 deletions(-) diff --git a/plans/backup.pp b/plans/backup.pp index 6672f3dd..6e8e187a 100644 --- a/plans/backup.pp +++ b/plans/backup.pp @@ -49,7 +49,7 @@ ensure => 'directory', owner => 'root', group => 'root', - mode => '0700' + mode => '0700', } # Create a subdir for each backup type selected @@ -58,7 +58,7 @@ ensure => 'directory', owner => 'root', group => 'root', - mode => '0700' + mode => '0700', } } } @@ -81,7 +81,7 @@ if getvar('recovery_opts.rbac') { out::message('# Backing up ldap secret key if it exists') run_command(@("CMD"/L), $primary_target) - test -f /etc/puppetlabs/console-services/conf.d/secrets/keys.json \ + test -f /etc/puppetlabs/console-services/conf.d/secrets/keys.json \ && cp -rp /etc/puppetlabs/console-services/conf.d/secrets ${shellquote($backup_directory)}/rbac/ \ || echo secret ldap key doesnt exist | CMD @@ -91,13 +91,13 @@ if getvar('recovery_opts.orchestrator') { out::message('# Backing up orchestrator secret keys') run_command(@("CMD"), $primary_target) - cp -rp /etc/puppetlabs/orchestration-services/conf.d/secrets ${shellquote($backup_directory)}/orchestrator/ + cp -rp /etc/puppetlabs/orchestration-services/conf.d/secrets ${shellquote($backup_directory)}/orchestrator/ | CMD } $backup_databases.each |$name,$database_target| { run_command(@("CMD"/L), $primary_target) - /opt/puppetlabs/server/bin/pg_dump -Fd -Z3 -j4 \ + /opt/puppetlabs/server/bin/pg_dump -Fd -Z3 -j4 \ -f ${shellquote($backup_directory)}/${shellquote($name)}/pe-${shellquote($name)}.dump.d \ "sslmode=verify-ca \ host=${shellquote($database_target.peadm::certname())} \ @@ -110,11 +110,11 @@ } run_command(@("CMD"/L), $primary_target) - umask 0077 \ + umask 0077 \ && cd ${shellquote(dirname($backup_directory))} \ && tar -czf ${shellquote($backup_directory)}.tar.gz ${shellquote(basename($backup_directory))} \ && rm -rf ${shellquote($backup_directory)} | CMD - return({'path' => "${backup_directory}.tar.gz"}) + return( { 'path' => "${backup_directory}.tar.gz" }) } diff --git a/plans/convert.pp b/plans/convert.pp index 146d6dea..02f3088e 100644 --- a/plans/convert.pp +++ b/plans/convert.pp @@ -19,13 +19,13 @@ String $compiler_pool_address = $primary_host, Optional[String] $internal_compiler_a_pool_address = undef, Optional[String] $internal_compiler_b_pool_address = undef, - Array[String] $dns_alt_names = [ ], + Array[String] $dns_alt_names = [], Optional[Enum[ - 'modify-primary-certs', - 'modify-infra-certs', - 'convert-node-groups', - 'finalize']] $begin_at_step = undef, + 'modify-primary-certs', + 'modify-infra-certs', + 'convert-node-groups', + 'finalize']] $begin_at_step = undef, ) { peadm::assert_supported_bolt_version() @@ -39,11 +39,11 @@ $primary_postgresql_target = peadm::get_targets($primary_postgresql_host, 1) $all_targets = peadm::flatten_compact([ - $primary_target, - $replica_target, - $replica_postgresql_target, - $compiler_targets, - $primary_postgresql_target, + $primary_target, + $replica_target, + $replica_postgresql_target, + $compiler_targets, + $primary_postgresql_target, ]) # Ensure input valid for a supported architecture @@ -60,7 +60,7 @@ # Get trusted fact information for all compilers. Use peadm::certname() as # the hash key because the apply block below will break trying to parse the # $compiler_extensions variable if it has Target-type hash keys. - $cert_extensions = run_task('peadm::cert_data', $all_targets).reduce({}) |$memo,$result| { + $cert_extensions = run_task('peadm::cert_data', $all_targets).reduce( {}) |$memo,$result| { $memo + { $result.target.peadm::certname() => $result['extensions'] } } diff --git a/plans/subplans/component_install.pp b/plans/subplans/component_install.pp index 774b1403..80f2cd7e 100644 --- a/plans/subplans/component_install.pp +++ b/plans/subplans/component_install.pp @@ -12,7 +12,7 @@ Enum['A', 'B'] $avail_group_letter, Optional[Variant[String[1], Array]] $dns_alt_names = undef, Optional[String[1]] $role = undef -){ +) { $component_target = peadm::get_targets($targets, 1) $primary_target = peadm::get_targets($primary_host, 1) diff --git a/plans/subplans/configure.pp b/plans/subplans/configure.pp index 3d5a506a..ff4e1170 100644 --- a/plans/subplans/configure.pp +++ b/plans/subplans/configure.pp @@ -68,9 +68,9 @@ ] run_plan('peadm::util::copy_file', peadm::flatten_compact([ - $replica_target, - $compiler_targets, - ]), + $replica_target, + $compiler_targets, + ]), source_host => $primary_target, path => $common_content_source ) @@ -116,7 +116,7 @@ } if $ldap_config { - # Run the task to configure ldap + # Run the task to configure ldap $ldap_result = run_task('peadm::pe_ldap_config', $primary_target, pe_main => $primary_target.peadm::certname(), ldap_config => $ldap_config, @@ -132,11 +132,11 @@ # Run Puppet everywhere to pick up last remaining config tweaks run_task('peadm::puppet_runonce', peadm::flatten_compact([ - $primary_target, - $primary_postgresql_target, - $compiler_targets, - $replica_target, - $replica_postgresql_target, + $primary_target, + $primary_postgresql_target, + $compiler_targets, + $replica_target, + $replica_postgresql_target, ])) # Deploy an environment if a deploy environment is specified @@ -148,11 +148,11 @@ # Ensure Puppet agent service is running now that configuration is complete run_command('systemctl start puppet', peadm::flatten_compact([ - $primary_target, - $replica_target, - $primary_postgresql_target, - $replica_postgresql_target, - $compiler_targets, + $primary_target, + $replica_target, + $primary_postgresql_target, + $replica_postgresql_target, + $compiler_targets, ])) return("Configuration of Puppet Enterprise ${arch['architecture']} succeeded.") diff --git a/plans/subplans/install.pp b/plans/subplans/install.pp index 41b41073..e237334e 100644 --- a/plans/subplans/install.pp +++ b/plans/subplans/install.pp @@ -42,8 +42,8 @@ String $console_password, Peadm::Pe_version $version, Optional[String] $pe_installer_source = undef, - Array[String] $dns_alt_names = [ ], - Hash $pe_conf_data = { }, + Array[String] $dns_alt_names = [], + Hash $pe_conf_data = {}, # Code Manager Optional[String] $r10k_remote = undef, @@ -79,32 +79,32 @@ ) $all_targets = peadm::flatten_compact([ - $primary_target, - $primary_postgresql_target, - $replica_target, - $replica_postgresql_target, - $compiler_targets, + $primary_target, + $primary_postgresql_target, + $replica_target, + $replica_postgresql_target, + $compiler_targets, ]) $primary_targets = peadm::flatten_compact([ - $primary_target, - $replica_target, + $primary_target, + $replica_target, ]) $database_targets = peadm::flatten_compact([ - $primary_postgresql_target, - $replica_postgresql_target, + $primary_postgresql_target, + $replica_postgresql_target, ]) $pe_installer_targets = peadm::flatten_compact([ - $primary_target, - $primary_postgresql_target, - $replica_postgresql_target, + $primary_target, + $primary_postgresql_target, + $replica_postgresql_target, ]) $agent_installer_targets = peadm::flatten_compact([ - $compiler_targets, - $replica_target, + $compiler_targets, + $replica_target, ]) # Clusters A and B are used to divide PuppetDB availability for compilers @@ -137,7 +137,7 @@ if ($name != $result['hostname']) { warning(@("HEREDOC")) WARNING: Target name / hostname mismatch: target ${name} reports ${result['hostname']} - Certificate name will be set to target name. Please ensure target name is correct and resolvable + Certificate name will be set to target name. Please ensure target name is correct and resolvable |-HEREDOC } if ($result['platform'] != $platform) { @@ -156,29 +156,29 @@ ).map |$t| { $t.peadm::certname() }, } - $primary_pe_conf = peadm::generate_pe_conf({ - 'console_admin_password' => $console_password, - 'puppet_enterprise::puppet_master_host' => $primary_target.peadm::certname(), - 'pe_install::puppet_master_dnsaltnames' => $dns_alt_names, - 'puppet_enterprise::puppetdb_database_host' => $primary_postgresql_target.peadm::certname(), - 'puppet_enterprise::profile::master::code_manager_auto_configure' => true, - 'puppet_enterprise::profile::master::r10k_remote' => $r10k_remote, - 'puppet_enterprise::profile::master::r10k_private_key' => $r10k_private_key ? { - undef => undef, - default => '/etc/puppetlabs/puppetserver/ssh/id-control_repo.rsa', - }, + $primary_pe_conf = peadm::generate_pe_conf( { + 'console_admin_password' => $console_password, + 'puppet_enterprise::puppet_master_host' => $primary_target.peadm::certname(), + 'pe_install::puppet_master_dnsaltnames' => $dns_alt_names, + 'puppet_enterprise::puppetdb_database_host' => $primary_postgresql_target.peadm::certname(), + 'puppet_enterprise::profile::master::code_manager_auto_configure' => true, + 'puppet_enterprise::profile::master::r10k_remote' => $r10k_remote, + 'puppet_enterprise::profile::master::r10k_private_key' => $r10k_private_key ? { + undef => undef, + default => '/etc/puppetlabs/puppetserver/ssh/id-control_repo.rsa', + }, } + $puppetdb_database_temp_config + $pe_conf_data) - $primary_postgresql_pe_conf = peadm::generate_pe_conf({ - 'console_admin_password' => 'not used', - 'puppet_enterprise::puppet_master_host' => $primary_target.peadm::certname(), - 'puppet_enterprise::database_host' => $primary_postgresql_target.peadm::certname(), + $primary_postgresql_pe_conf = peadm::generate_pe_conf( { + 'console_admin_password' => 'not used', + 'puppet_enterprise::puppet_master_host' => $primary_target.peadm::certname(), + 'puppet_enterprise::database_host' => $primary_postgresql_target.peadm::certname(), } + $puppetdb_database_temp_config + $pe_conf_data) - $replica_postgresql_pe_conf = peadm::generate_pe_conf({ - 'console_admin_password' => 'not used', - 'puppet_enterprise::puppet_master_host' => $primary_target.peadm::certname(), - 'puppet_enterprise::database_host' => $replica_postgresql_target.peadm::certname(), + $replica_postgresql_pe_conf = peadm::generate_pe_conf( { + 'console_admin_password' => 'not used', + 'puppet_enterprise::puppet_master_host' => $primary_target.peadm::certname(), + 'puppet_enterprise::database_host' => $replica_postgresql_target.peadm::certname(), } + $puppetdb_database_temp_config + $pe_conf_data) # Upload the pe.conf files to the hosts that need them, and ensure correctly @@ -228,7 +228,7 @@ run_plan('peadm::util::insert_csr_extension_requests', $primary_target, extension_requests => { peadm::oid('peadm_role') => 'puppet/server', - peadm::oid('peadm_availability_group') => 'A' + peadm::oid('peadm_availability_group') => 'A', } ) }, @@ -236,7 +236,7 @@ run_plan('peadm::util::insert_csr_extension_requests', $replica_target, extension_requests => { peadm::oid('peadm_role') => 'puppet/server', - peadm::oid('peadm_availability_group') => 'B' + peadm::oid('peadm_availability_group') => 'B', } ) }, @@ -244,7 +244,7 @@ run_plan('peadm::util::insert_csr_extension_requests', $compiler_a_targets, extension_requests => { peadm::oid('pp_auth_role') => 'pe_compiler', - peadm::oid('peadm_availability_group') => 'A' + peadm::oid('peadm_availability_group') => 'A', } ) }, @@ -252,7 +252,7 @@ run_plan('peadm::util::insert_csr_extension_requests', $compiler_b_targets, extension_requests => { peadm::oid('pp_auth_role') => 'pe_compiler', - peadm::oid('peadm_availability_group') => 'B' + peadm::oid('peadm_availability_group') => 'B', } ) }, @@ -260,7 +260,7 @@ run_plan('peadm::util::insert_csr_extension_requests', $primary_postgresql_target, extension_requests => { peadm::oid('peadm_role') => 'puppet/puppetdb-database', - peadm::oid('peadm_availability_group') => 'A' + peadm::oid('peadm_availability_group') => 'A', } ) }, @@ -268,10 +268,10 @@ run_plan('peadm::util::insert_csr_extension_requests', $replica_postgresql_target, extension_requests => { peadm::oid('peadm_role') => 'puppet/puppetdb-database', - peadm::oid('peadm_availability_group') => 'B' + peadm::oid('peadm_availability_group') => 'B', } ) - } + }, ] wait($csr_yaml_jobs) @@ -367,7 +367,7 @@ # Ensure certificate requests have been submitted, then run Puppet run_task('peadm::submit_csr', $target) - run_task('peadm::sign_csr', $primary_target, { 'certnames' => [$target.peadm::certname] } ) + run_task('peadm::sign_csr', $primary_target, { 'certnames' => [$target.peadm::certname] }) run_task('peadm::puppet_runonce', $target) } diff --git a/plans/subplans/prepare_agent.pp b/plans/subplans/prepare_agent.pp index 507c33ac..fff49e6e 100644 --- a/plans/subplans/prepare_agent.pp +++ b/plans/subplans/prepare_agent.pp @@ -5,7 +5,6 @@ Hash $certificate_extensions, Optional[Array] $dns_alt_names = undef, ) { - $agent_target = peadm::get_targets($targets, 1) $primary_target = peadm::get_targets($primary_host, 1) @@ -42,7 +41,7 @@ # Obtain data about certificate from primary $certstatus = run_task('peadm::cert_valid_status', $primary_target, - certname => $agent_target.peadm::certname()).first.value + certname => $agent_target.peadm::certname()).first.value # Obtain data about certificate from agent $certdata = run_task('peadm::cert_data', $agent_target).first.value @@ -73,10 +72,10 @@ # If necessary, manually submit a CSR # ignoring errors to simplify logic unless $skip_csr { - run_task('peadm::submit_csr', $agent_target, {'_catch_errors' => true}) + run_task('peadm::submit_csr', $agent_target, { '_catch_errors' => true }) # On primary, if necessary, sign the certificate request - run_task('peadm::sign_csr', $primary_target, { 'certnames' => [$agent_target.peadm::certname()] } ) + run_task('peadm::sign_csr', $primary_target, { 'certnames' => [$agent_target.peadm::certname()] }) } # If agent certificate is good but lacks appropriate extensions, plan will still diff --git a/spec/acceptance/peadm_spec/plans/add_replica.pp b/spec/acceptance/peadm_spec/plans/add_replica.pp index 89cefc48..b2b053d8 100644 --- a/spec/acceptance/peadm_spec/plans/add_replica.pp +++ b/spec/acceptance/peadm_spec/plans/add_replica.pp @@ -1,6 +1,5 @@ plan peadm_spec::add_replica( -){ - +) { $t = get_targets('*') wait_until_available($t) @@ -22,5 +21,4 @@ replica_host => $replica_host, replica_postgresql_host => $replica_postgresql_host ? { [] => undef, default => $replica_postgresql_host }, ) - } diff --git a/spec/acceptance/peadm_spec/plans/install_test_cluster.pp b/spec/acceptance/peadm_spec/plans/install_test_cluster.pp index 0b3b77d5..440b449a 100644 --- a/spec/acceptance/peadm_spec/plans/install_test_cluster.pp +++ b/spec/acceptance/peadm_spec/plans/install_test_cluster.pp @@ -3,7 +3,6 @@ String[1] $version, Enum['enable', 'disable'] $fips = 'disable' ) { - $t = get_targets('*') wait_until_available($t) @@ -29,36 +28,36 @@ $arch_params = case $architecture { - 'standard': {{ + 'standard': { { primary_host => $t.filter |$n| { $n.vars['role'] == 'primary' }, - }} - 'standard-with-dr': {{ + } } + 'standard-with-dr': { { primary_host => $t.filter |$n| { $n.vars['role'] == 'primary' }, replica_host => $t.filter |$n| { $n.vars['role'] == 'replica' }, - }} - 'large': {{ + } } + 'large': { { primary_host => $t.filter |$n| { $n.vars['role'] == 'primary' }, compiler_hosts => $t.filter |$n| { $n.vars['role'] == 'compiler' }, - }} - 'large-with-dr': {{ + } } + 'large-with-dr': { { primary_host => $t.filter |$n| { $n.vars['role'] == 'primary' }, replica_host => $t.filter |$n| { $n.vars['role'] == 'replica' }, compiler_hosts => $t.filter |$n| { $n.vars['role'] == 'compiler' }, - }} - 'extra-large': {{ + } } + 'extra-large': { { primary_host => $t.filter |$n| { $n.vars['role'] == 'primary' }, primary_postgresql_host => $t.filter |$n| { $n.vars['role'] == 'primary-pdb-postgresql' }, compiler_hosts => $t.filter |$n| { $n.vars['role'] == 'compiler' }, - }} - 'extra-large-with-dr': {{ + } } + 'extra-large-with-dr': { { primary_host => $t.filter |$n| { $n.vars['role'] == 'primary' }, primary_postgresql_host => $t.filter |$n| { $n.vars['role'] == 'primary-pdb-postgresql' }, replica_host => $t.filter |$n| { $n.vars['role'] == 'replica' }, replica_postgresql_host => $t.filter |$n| { $n.vars['role'] == 'replica-pdb-postgresql' }, compiler_hosts => $t.filter |$n| { $n.vars['role'] == 'compiler' }, - }} - default: { fail('Invalid architecture!') } - } + } } + default: { fail('Invalid architecture!') } + } $install_result = run_plan('peadm::install', $arch_params + $common_params) diff --git a/spec/acceptance/peadm_spec/plans/upgrade_test_cluster.pp b/spec/acceptance/peadm_spec/plans/upgrade_test_cluster.pp index af08abee..e3ab8fc3 100644 --- a/spec/acceptance/peadm_spec/plans/upgrade_test_cluster.pp +++ b/spec/acceptance/peadm_spec/plans/upgrade_test_cluster.pp @@ -2,8 +2,7 @@ $architecture, $version, $download_mode -){ - +) { $t = get_targets('*') wait_until_available($t) @@ -19,36 +18,36 @@ $arch_params = case $architecture { - 'standard': {{ + 'standard': { { primary_host => $t.filter |$n| { $n.vars['role'] == 'primary' }, - }} - 'standard-with-dr': {{ + } } + 'standard-with-dr': { { primary_host => $t.filter |$n| { $n.vars['role'] == 'primary' }, replica_host => $t.filter |$n| { $n.vars['role'] == 'replica' }, - }} - 'large': {{ + } } + 'large': { { primary_host => $t.filter |$n| { $n.vars['role'] == 'primary' }, compiler_hosts => $t.filter |$n| { $n.vars['role'] == 'compiler' }, - }} - 'large-with-dr': {{ + } } + 'large-with-dr': { { primary_host => $t.filter |$n| { $n.vars['role'] == 'primary' }, replica_host => $t.filter |$n| { $n.vars['role'] == 'replica' }, compiler_hosts => $t.filter |$n| { $n.vars['role'] == 'compiler' }, - }} - 'extra-large': {{ + } } + 'extra-large': { { primary_host => $t.filter |$n| { $n.vars['role'] == 'primary' }, primary_postgresql_host => $t.filter |$n| { $n.vars['role'] == 'primary-pdb-postgresql' }, compiler_hosts => $t.filter |$n| { $n.vars['role'] == 'compiler' }, - }} - 'extra-large-with-dr': {{ + } } + 'extra-large-with-dr': { { primary_host => $t.filter |$n| { $n.vars['role'] == 'primary' }, primary_postgresql_host => $t.filter |$n| { $n.vars['role'] == 'primary-pdb-postgresql' }, replica_host => $t.filter |$n| { $n.vars['role'] == 'replica' }, replica_postgresql_host => $t.filter |$n| { $n.vars['role'] == 'replica-pdb-postgresql' }, compiler_hosts => $t.filter |$n| { $n.vars['role'] == 'compiler' }, - }} - default: { fail('Invalid architecture!') } - } + } } + default: { fail('Invalid architecture!') } + } $params = $arch_params + $common_params run_plan('peadm::upgrade', $params) From 18c851ccce8631853e599cf38b0e7cfc6e65b3af Mon Sep 17 00:00:00 2001 From: Dimitri Tischenko Date: Fri, 30 Sep 2022 12:24:13 +0200 Subject: [PATCH 06/46] formatting --- plans/subplans/modify_certificate.pp | 32 +++++++++++++--------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/plans/subplans/modify_certificate.pp b/plans/subplans/modify_certificate.pp index 1b571aee..8159fb65 100644 --- a/plans/subplans/modify_certificate.pp +++ b/plans/subplans/modify_certificate.pp @@ -3,8 +3,8 @@ Peadm::SingleTargetSpec $targets, TargetSpec $primary_host, String $primary_certname, - Hash $add_extensions = { }, - Array $remove_extensions = [ ], + Hash $add_extensions = {}, + Array $remove_extensions = [], Optional[Array] $dns_alt_names = undef, Boolean $force_regenerate = false, ) { @@ -32,11 +32,10 @@ # If the existing certificate meets all the requirements, there's no need # to regenerate it. Skip it and move on to the next. if ($certdata['certificate-exists'] and - ($desired_alt_names == $existing_alt_names) and - ($desired_exts.all |$key,$val| { $existing_exts[$key] == $val }) and - !($remove_extensions.any |$key| { $key in $existing_exts.keys }) and - !$force_regenerate) - { + ($desired_alt_names == $existing_alt_names) and + ($desired_exts.all |$key,$val| { $existing_exts[$key] == $val }) and + !($remove_extensions.any |$key| { $key in $existing_exts.keys }) and + !$force_regenerate) { out::message("${certname} already has requested modifications; certificate will not be re-issued") return('Skipped') } @@ -58,8 +57,8 @@ merge => false, ) - $ca_clean_result = run_command(@("HEREDOC"/L), $primary_target, _catch_errors => true).first - /opt/puppetlabs/bin/puppetserver ca clean --certname ${certname} + $ca_clean_result = run_command(@("HEREDOC"/L), $primary_target, _catch_errors => true).first + /opt/puppetlabs/bin/puppetserver ca clean --certname ${certname} |-HEREDOC unless $ca_clean_result.ok { @@ -68,10 +67,9 @@ # Scenario 2: the primary's cert can't be cleaned because it's been deleted. # Scenario 3: any component's cert can't be cleaned because it's been deleted. unless ($target_is_primary and - ($ca_clean_result[merged_output] =~ /certificate revoked/ or - $ca_clean_result[merged_output] =~ /Could not find 'hostcert'/)) or - ($ca_clean_result[merged_output] =~ /Could not find files to clean/) - { + ($ca_clean_result[merged_output] =~ /certificate revoked/ or + $ca_clean_result[merged_output] =~ /Could not find 'hostcert'/)) or + ($ca_clean_result[merged_output] =~ /Could not find files to clean/) { fail_plan($ca_clean_result[merged_output]) } } @@ -95,9 +93,9 @@ else { # PRIMARY cert regeneration # The docs are broken, and the process is unclean. Sadface. - run_task('service', $target, {action => 'stop', name => 'pe-puppetserver'}) + run_task('service', $target, { action => 'stop', name => 'pe-puppetserver' }) run_command(@("HEREDOC"/L), $target) - rm -f \ + rm -f \ /etc/puppetlabs/puppet/ssl/certs/${certname}.pem \ /etc/puppetlabs/puppet/ssl/private_keys/${certname}.pem \ /etc/puppetlabs/puppet/ssl/public_keys/${certname}.pem \ @@ -105,12 +103,12 @@ /etc/puppetlabs/puppet/ssl/ca/signed/${certname}.pem \ |-HEREDOC run_command(@("HEREDOC"/L), $target) - /opt/puppetlabs/bin/puppetserver ca generate \ + /opt/puppetlabs/bin/puppetserver ca generate \ --certname ${certname} \ --subject-alt-names ${alt_names.join(',')} \ --ca-client |-HEREDOC - run_task('service', $target, {action => 'start', name => 'pe-puppetserver'}) + run_task('service', $target, { action => 'start', name => 'pe-puppetserver' }) } # Fire puppet back up when done From acbef6b2a01e9b61926b10cb21b2c2f2f84af95a Mon Sep 17 00:00:00 2001 From: Dimitri Tischenko Date: Fri, 30 Sep 2022 12:24:39 +0200 Subject: [PATCH 07/46] add perform_failover spec plan --- .../peadm_spec/plans/perform_failover.pp | 64 +++++++++++++++++++ .../plans/provision_test_cluster.pp | 4 ++ 2 files changed, 68 insertions(+) create mode 100644 spec/acceptance/peadm_spec/plans/perform_failover.pp diff --git a/spec/acceptance/peadm_spec/plans/perform_failover.pp b/spec/acceptance/peadm_spec/plans/perform_failover.pp new file mode 100644 index 00000000..363868dc --- /dev/null +++ b/spec/acceptance/peadm_spec/plans/perform_failover.pp @@ -0,0 +1,64 @@ +plan peadm_spec::perform_failover( + String[1] $console_password +) { + # get node certnames + $t = get_targets('*') + # wait_until_available($t) + + parallelize($t) |$target| { + $fqdn = run_command('hostname -f', $target) + $target.set_var('certname', $fqdn.first['stdout'].chomp) + } + + # run infra status on the primary + $primary_host = $t.filter |$n| { $n.vars['role'] == 'primary' } + # out::verbose("Running peadm::status on new primary host ${primary_host}") + # run_plan('peadm::status', $primary_host) + + # # bring down the current primary + # out::verbose("Bringing down primary host ${primary_host}") + # run_task('reboot', $primary_host, shutdown_only => true) + + # promote the replica to new primary + $replica_host = $t.filter |$n| { $n.vars['role'] == 'replica' } + # out::verbose("Promoting replica host ${replica_host} to primary") + # run_command(@("HEREDOC"/L), $replica_host) + # /opt/puppetlabs/bin/puppet infra promote replica --topology mono-with-compile --yes + # |-HEREDOC + + # generate access token + out::verbose("Generating access token on replica host ${replica_host}") + run_task('peadm::rbac_token', $replica_host, + password => $console_password, + token_lifetime => '1y', + ) + + # $primary_certname = $primary_host.peadm::certname() + # purge the "failed" primary node + run_command(@("HEREDOC"/L), $replica_host) + # /opt/puppetlabs/bin/puppet node purge ${peadm::certname($primary_host)} + /opt/puppetlabs/bin/puppet node purge ip-10-138-1-143.eu-central-1.compute.internal + |-HEREDOC + + # add new replica + $replica_postgresql_host = $t.filter |$n| { $n.vars['role'] == 'primary-pdb-postgresql' } + $new_replica_host = $t.filter |$n| { $n.vars['role'] == 'spare-replica' } + + if $new_replica_host == [] { + fail_plan('"spare-replica" role missing from inventory, cannot continue') + } + + out::verbose("Adding new replica host ${new_replica_host} to primary") + run_plan('peadm::add_replica', + primary_host => $replica_host.first(), + replica_host => $new_replica_host.first(), + replica_postgresql_host => $replica_postgresql_host ? { [] => undef, default => $replica_postgresql_host.first() }, + ) + + # run infra status on the new primary + out::verbose("Running peadm::status on new primary host ${replica_host}") + run_plan('peadm::status', $replica_host) + + out::message('Failover process complete. New configuration:') + run_task('peadm::get_peadm_config', $replica_host) +} diff --git a/spec/acceptance/peadm_spec/plans/provision_test_cluster.pp b/spec/acceptance/peadm_spec/plans/provision_test_cluster.pp index 9494c533..b6c4f167 100644 --- a/spec/acceptance/peadm_spec/plans/provision_test_cluster.pp +++ b/spec/acceptance/peadm_spec/plans/provision_test_cluster.pp @@ -25,6 +25,10 @@ ['primary', 'primary-pdb-postgresql', 'compiler', 'replica', 'replica-pdb-postgresql', 'compiler'] } + 'extra-large-with-dr-and-spare-replica': { + ['primary', 'primary-pdb-postgresql', 'compiler', + 'replica', 'replica-pdb-postgresql', 'compiler', 'spare-replica'] + } default: { fail_plan("Unknown architecture: ${architecture}") } From 85a3334abd3ce3e69f030caf5d0221caa18bc728 Mon Sep 17 00:00:00 2001 From: Dimitri Tischenko Date: Fri, 30 Sep 2022 12:24:57 +0200 Subject: [PATCH 08/46] output error when rbac token request fails --- tasks/rbac_token.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/rbac_token.rb b/tasks/rbac_token.rb index 93c32bec..9ad76f1f 100755 --- a/tasks/rbac_token.rb +++ b/tasks/rbac_token.rb @@ -29,7 +29,7 @@ request.body = body response = http.request(request) -raise unless response.is_a? Net::HTTPSuccess +raise "Error requesting token, #{response.body}" unless response.is_a? Net::HTTPSuccess token = JSON.parse(response.body)['token'] FileUtils.mkdir_p('/root/.puppetlabs') From ab85bc9189e1b64489f1cc09e7b454cfcada062f Mon Sep 17 00:00:00 2001 From: Dimitri Tischenko Date: Fri, 30 Sep 2022 13:45:00 +0200 Subject: [PATCH 09/46] fix failover test plan and GA --- .github/workflows/test-failover.yaml | 221 ++++++++++++++++++ .../peadm_spec/plans/perform_failover.pp | 37 ++- 2 files changed, 239 insertions(+), 19 deletions(-) create mode 100644 .github/workflows/test-failover.yaml diff --git a/.github/workflows/test-failover.yaml b/.github/workflows/test-failover.yaml new file mode 100644 index 00000000..89cfbd4e --- /dev/null +++ b/.github/workflows/test-failover.yaml @@ -0,0 +1,221 @@ +--- +name: "Failover test" + +on: + workflow_dispatch: + inputs: + image: + description: 'GCP image for test cluster' + required: true + default: 'almalinux-cloud/almalinux-8' + version: + description: 'PE version to install' + required: true + default: '2019.8.12' + version_to_upgrade: + description: 'PE version to upgrade to' + required: false + default: '2021.7.0' + ssh-debugging: + description: 'Boolean; whether or not to pause for ssh debugging' + required: true + default: 'false' + +env: + HONEYCOMB_WRITEKEY: 7f3c63a70eecc61d635917de46bea4e6 + HONEYCOMB_DATASET: litmus tests + +jobs: + test-failover: + name: "PE ${{ matrix.version }} ${{ matrix.architecture }} on ${{ matrix.image }}" + runs-on: ubuntu-20.04 + env: + BOLT_GEM: true + BOLT_DISABLE_ANALYTICS: true + BUILDEVENT_FILE: '../buildevents.txt' + LANG: 'en_US.UTF-8' + strategy: + fail-fast: false + matrix: + architecture: + - "extra-large-with-dr-and-spare-replica" + version: + - "${{ github.event.inputs.version }}" + image: + - "${{ github.event.inputs.image }}" + + steps: + - name: 'Start SSH session' + if: ${{ github.event.inputs.ssh-debugging == 'true' }} + uses: luchihoratiu/debug-via-ssh@main + with: + NGROK_AUTH_TOKEN: ${{ secrets.NGROK_AUTH_TOKEN }} + SSH_PASS: ${{ secrets.SSH_PASS }} + + - name: "Honeycomb: Start recording" + uses: puppetlabs/kvrhdn-gha-buildevents@pdk-templates-v1 + with: + apikey: ${{ env.HONEYCOMB_WRITEKEY }} + dataset: ${{ env.HONEYCOMB_DATASET }} + job-status: ${{ job.status }} + + - name: "Honeycomb: Start first step" + run: | + echo STEP_ID=setup-test-cluster >> $GITHUB_ENV + echo STEP_START=$(date +%s) >> $GITHUB_ENV + + - name: "Checkout Source" + uses: actions/checkout@v2 + + - name: "Activate Ruby 2.7" + uses: ruby/setup-ruby@v1 + with: + ruby-version: "2.7" + bundler-cache: true + + - name: "Print bundle environment" + if: ${{ github.repository_owner == 'puppetlabs' }} + run: | + echo ::group::info:bundler + buildevents cmd $TRACE_ID $STEP_ID 'bundle env' -- bundle env + echo ::endgroup:: + + - name: "Honeycomb: Record environment setup time" + if: ${{ always() }} + run: | + echo ::group::honeycomb + buildevents step $TRACE_ID $STEP_ID $STEP_START 'Set up environment' + echo STEP_ID=${{ matrix.architecture }}-${{ matrix.image }}-provision >> $GITHUB_ENV + echo STEP_START=$(date +%s) >> $GITHUB_ENV + echo ::endgroup:: + + - name: 'Provision test cluster (specified architecture with added DR)' + timeout-minutes: 15 + run: | + echo ::group::prepare + mkdir -p $HOME/.ssh + echo 'Host *' > $HOME/.ssh/config + echo ' ServerAliveInterval 150' >> $HOME/.ssh/config + echo ' ServerAliveCountMax 2' >> $HOME/.ssh/config + buildevents cmd $TRACE_ID $STEP_ID 'rake spec_prep' -- bundle exec rake spec_prep + echo ::endgroup:: + + echo ::group::provision + buildevents cmd $TRACE_ID $STEP_ID 'bolt plan run peadm_spec::provision_test_cluster' -- \ + bundle exec bolt plan run peadm_spec::provision_test_cluster \ + --modulepath spec/fixtures/modules \ + provider=provision_service \ + image=${{ matrix.image }} \ + architecture=${{ matrix.architecture }}-with-dr + buildevents cmd $TRACE_ID $STEP_ID 'bolt task run provision::provision_service' -- \ + bundle exec bolt bolt task run provision::provision_service \ + --modulepath spec/fixtures/modules \ + action=provision + platform=${{ matrix.image }} \ + vars="role: primary" + echo ::endgroup:: + + echo ::group::info:request + cat request.json || true; echo + echo ::endgroup:: + + echo ::group::info:inventory + sed -e 's/password: .*/password: "[redacted]"/' < spec/fixtures/litmus_inventory.yaml || true + echo ::endgroup:: + + - name: "Honeycomb: Record provision time" + if: ${{ always() }} + run: | + echo ::group::honeycomb + buildevents step $TRACE_ID $STEP_ID $STEP_START 'Provision test cluster' + echo STEP_ID=${{ matrix.architecture }}-${{ matrix.image }}-install >> $GITHUB_ENV + echo STEP_START=$(date +%s) >> $GITHUB_ENV + echo ::endgroup:: + + - name: 'Install PE on test cluster' + timeout-minutes: 120 + run: | + buildevents cmd $TRACE_ID $STEP_ID 'bolt plan run peadm_spec::install_test_cluster' -- \ + bundle exec bolt plan run peadm_spec::install_test_cluster \ + --inventoryfile spec/fixtures/litmus_inventory.yaml \ + --modulepath spec/fixtures/modules \ + architecture=${{ matrix.architecture }} \ + version=${{ matrix.version }} + + - name: "Honeycomb: Record install time" + if: ${{ always() }} + run: | + echo ::group::honeycomb + buildevents step $TRACE_ID $STEP_ID $STEP_START 'Install PE on test cluster' + echo STEP_ID=${{ matrix.architecture }}-${{ matrix.image }}-tear_down >> $GITHUB_ENV + echo STEP_START=$(date +%s) >> $GITHUB_ENV + echo ::endgroup:: + + - name: 'Perform failover' + run: | + buildevents cmd $TRACE_ID $STEP_ID 'bolt plan run peadm_spec::perform_failover' -- \ + bundle exec bolt plan run peadm_spec::perform_failover \ + --inventoryfile spec/fixtures/litmus_inventory.yaml \ + --modulepath spec/fixtures/modules \ + platform=${{ matrix.image }} \ + vars="role: primary" + + - name: "Honeycomb: Record falover time" + if: ${{ always() }} + run: | + echo ::group::honeycomb + buildevents step $TRACE_ID $STEP_ID $STEP_START 'Perform failover' + echo STEP_ID=${{ matrix.architecture }}-${{ matrix.image }}-tear_down >> $GITHUB_ENV + echo STEP_START=$(date +%s) >> $GITHUB_ENV + echo ::endgroup:: + + - name: 'Wait as long as the file ${HOME}/pause file is present' + if: ${{ always() && github.event.inputs.ssh-debugging == 'true' }} + run: | + while [ -f "${HOME}/pause" ] ; do + echo "${HOME}/pause present, sleeping for 60 seconds..." + sleep 60 + done + echo "${HOME}/pause absent, continuing workflow." + + - name: 'Upgrade PE on test cluster' + if: ${{ always() && github.event.inputs.version_to_upgrade != '' }} + timeout-minutes: 120 + run: | + buildevents cmd $TRACE_ID $STEP_ID 'bolt plan run peadm_spec::upgrade_test_cluster' -- \ + bundle exec bolt plan run peadm_spec::upgrade_test_cluster \ + --inventoryfile spec/fixtures/litmus_inventory.yaml \ + --modulepath spec/fixtures/modules \ + architecture='extra-large-with-dr' \ + download_mode='direct' \ + version=${{ matrix.version_to_upgrade }} + + - name: "Honeycomb: Record upgrade time" + if: ${{ always() && github.event.inputs.version_to_upgrade != '' }} + run: | + echo ::group::honeycomb + buildevents step $TRACE_ID $STEP_ID $STEP_START 'Upgrade PE on test cluster' + echo STEP_ID=${{ matrix.architecture }}-${{ matrix.version_to_upgrade }}-tear_down >> $GITHUB_ENV + echo STEP_START=$(date +%s) >> $GITHUB_ENV + echo ::endgroup:: + + - name: 'Tear down test cluster' + if: ${{ always() }} + continue-on-error: true + run: | + if [ -f spec/fixtures/litmus_inventory.yaml ]; then + echo ::group::tear_down + buildevents cmd $TRACE_ID $STEP_ID 'rake litmus:tear_down' -- bundle exec rake 'litmus:tear_down' + echo ::endgroup:: + + echo ::group::info:request + cat request.json || true; echo + echo ::endgroup:: + fi + + - name: "Honeycomb: Record tear down time" + if: ${{ always() }} + run: | + echo ::group::honeycomb + buildevents step $TRACE_ID $STEP_ID $STEP_START 'Tear down test cluster' + echo ::endgroup:: diff --git a/spec/acceptance/peadm_spec/plans/perform_failover.pp b/spec/acceptance/peadm_spec/plans/perform_failover.pp index 363868dc..923545d1 100644 --- a/spec/acceptance/peadm_spec/plans/perform_failover.pp +++ b/spec/acceptance/peadm_spec/plans/perform_failover.pp @@ -1,9 +1,10 @@ +# This plan performs a failover procedure on an XL architecture +# It assumes an inventory files with roles specified including a `spare-replica` role plan peadm_spec::perform_failover( - String[1] $console_password ) { # get node certnames $t = get_targets('*') - # wait_until_available($t) + wait_until_available($t) parallelize($t) |$target| { $fqdn = run_command('hostname -f', $target) @@ -12,32 +13,30 @@ # run infra status on the primary $primary_host = $t.filter |$n| { $n.vars['role'] == 'primary' } - # out::verbose("Running peadm::status on new primary host ${primary_host}") - # run_plan('peadm::status', $primary_host) + out::verbose("Running peadm::status on new primary host ${primary_host}") + run_plan('peadm::status', $primary_host) - # # bring down the current primary - # out::verbose("Bringing down primary host ${primary_host}") - # run_task('reboot', $primary_host, shutdown_only => true) + # bring down the current primary + out::verbose("Bringing down primary host ${primary_host}") + run_task('reboot', $primary_host, shutdown_only => true) # promote the replica to new primary $replica_host = $t.filter |$n| { $n.vars['role'] == 'replica' } - # out::verbose("Promoting replica host ${replica_host} to primary") - # run_command(@("HEREDOC"/L), $replica_host) - # /opt/puppetlabs/bin/puppet infra promote replica --topology mono-with-compile --yes - # |-HEREDOC + out::verbose("Promoting replica host ${replica_host} to primary") + run_command(@("HEREDOC"/L), $replica_host) + /opt/puppetlabs/bin/puppet infra promote replica --topology mono-with-compile --yes + |-HEREDOC - # generate access token + # generate access token on new primary out::verbose("Generating access token on replica host ${replica_host}") run_task('peadm::rbac_token', $replica_host, - password => $console_password, + password => 'puppetlabs', token_lifetime => '1y', ) - # $primary_certname = $primary_host.peadm::certname() # purge the "failed" primary node run_command(@("HEREDOC"/L), $replica_host) - # /opt/puppetlabs/bin/puppet node purge ${peadm::certname($primary_host)} - /opt/puppetlabs/bin/puppet node purge ip-10-138-1-143.eu-central-1.compute.internal + /opt/puppetlabs/bin/puppet node purge ${peadm::certname($primary_host)} |-HEREDOC # add new replica @@ -50,9 +49,9 @@ out::verbose("Adding new replica host ${new_replica_host} to primary") run_plan('peadm::add_replica', - primary_host => $replica_host.first(), - replica_host => $new_replica_host.first(), - replica_postgresql_host => $replica_postgresql_host ? { [] => undef, default => $replica_postgresql_host.first() }, + primary_host => peadm::certname($replica_host), + replica_host => peadm::certname($new_replica_host), + replica_postgresql_host => $replica_postgresql_host ? { [] => undef, default => peadm::certname($replica_postgresql_host) }, ) # run infra status on the new primary From 64b0322ce10c28a2196ba2fde49ee73881c7f3f7 Mon Sep 17 00:00:00 2001 From: Dimitri Tischenko Date: Fri, 30 Sep 2022 13:52:03 +0200 Subject: [PATCH 10/46] fix architecture --- .github/workflows/test-failover.yaml | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/.github/workflows/test-failover.yaml b/.github/workflows/test-failover.yaml index 89cfbd4e..40bcf977 100644 --- a/.github/workflows/test-failover.yaml +++ b/.github/workflows/test-failover.yaml @@ -27,7 +27,7 @@ env: jobs: test-failover: - name: "PE ${{ matrix.version }} ${{ matrix.architecture }} on ${{ matrix.image }}" + name: "PE ${{ matrix.version }} extra-large-with-dr-and-spare-replica on ${{ matrix.image }}" runs-on: ubuntu-20.04 env: BOLT_GEM: true @@ -37,13 +37,12 @@ jobs: strategy: fail-fast: false matrix: - architecture: - - "extra-large-with-dr-and-spare-replica" version: - "${{ github.event.inputs.version }}" image: - "${{ github.event.inputs.image }}" - + architecture: + - "extra-large-with-dr-and-spare-replica" steps: - name: 'Start SSH session' if: ${{ github.event.inputs.ssh-debugging == 'true' }} @@ -85,11 +84,11 @@ jobs: run: | echo ::group::honeycomb buildevents step $TRACE_ID $STEP_ID $STEP_START 'Set up environment' - echo STEP_ID=${{ matrix.architecture }}-${{ matrix.image }}-provision >> $GITHUB_ENV + echo STEP_ID=extra-large-with-dr-and-spare-replica-${{ matrix.image }}-provision >> $GITHUB_ENV echo STEP_START=$(date +%s) >> $GITHUB_ENV echo ::endgroup:: - - name: 'Provision test cluster (specified architecture with added DR)' + - name: 'Provision test cluster (XL with spare replica)' timeout-minutes: 15 run: | echo ::group::prepare @@ -106,7 +105,7 @@ jobs: --modulepath spec/fixtures/modules \ provider=provision_service \ image=${{ matrix.image }} \ - architecture=${{ matrix.architecture }}-with-dr + architecture=${{ matrix.architecture }} buildevents cmd $TRACE_ID $STEP_ID 'bolt task run provision::provision_service' -- \ bundle exec bolt bolt task run provision::provision_service \ --modulepath spec/fixtures/modules \ @@ -128,7 +127,7 @@ jobs: run: | echo ::group::honeycomb buildevents step $TRACE_ID $STEP_ID $STEP_START 'Provision test cluster' - echo STEP_ID=${{ matrix.architecture }}-${{ matrix.image }}-install >> $GITHUB_ENV + echo STEP_ID=extra-large-with-dr-and-spare-replica-${{ matrix.image }}-install >> $GITHUB_ENV echo STEP_START=$(date +%s) >> $GITHUB_ENV echo ::endgroup:: @@ -147,7 +146,7 @@ jobs: run: | echo ::group::honeycomb buildevents step $TRACE_ID $STEP_ID $STEP_START 'Install PE on test cluster' - echo STEP_ID=${{ matrix.architecture }}-${{ matrix.image }}-tear_down >> $GITHUB_ENV + echo STEP_ID=extra-large-with-dr-and-spare-replica-${{ matrix.image }}-tear_down >> $GITHUB_ENV echo STEP_START=$(date +%s) >> $GITHUB_ENV echo ::endgroup:: @@ -165,7 +164,7 @@ jobs: run: | echo ::group::honeycomb buildevents step $TRACE_ID $STEP_ID $STEP_START 'Perform failover' - echo STEP_ID=${{ matrix.architecture }}-${{ matrix.image }}-tear_down >> $GITHUB_ENV + echo STEP_ID=extra-large-with-dr-and-spare-replica-${{ matrix.image }}-tear_down >> $GITHUB_ENV echo STEP_START=$(date +%s) >> $GITHUB_ENV echo ::endgroup:: @@ -186,7 +185,7 @@ jobs: bundle exec bolt plan run peadm_spec::upgrade_test_cluster \ --inventoryfile spec/fixtures/litmus_inventory.yaml \ --modulepath spec/fixtures/modules \ - architecture='extra-large-with-dr' \ + architecture=${{ matrix.architecture }} \ download_mode='direct' \ version=${{ matrix.version_to_upgrade }} @@ -195,7 +194,7 @@ jobs: run: | echo ::group::honeycomb buildevents step $TRACE_ID $STEP_ID $STEP_START 'Upgrade PE on test cluster' - echo STEP_ID=${{ matrix.architecture }}-${{ matrix.version_to_upgrade }}-tear_down >> $GITHUB_ENV + echo STEP_ID=extra-large-with-dr-and-spare-replica-${{ matrix.version_to_upgrade }}-tear_down >> $GITHUB_ENV echo STEP_START=$(date +%s) >> $GITHUB_ENV echo ::endgroup:: From 3a3b3ef8e48994ee38b7935ab21c6571e152fcdf Mon Sep 17 00:00:00 2001 From: Dimitri Tischenko Date: Fri, 30 Sep 2022 14:01:06 +0200 Subject: [PATCH 11/46] fix provision --- .github/workflows/test-failover.yaml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/.github/workflows/test-failover.yaml b/.github/workflows/test-failover.yaml index 40bcf977..2c29c480 100644 --- a/.github/workflows/test-failover.yaml +++ b/.github/workflows/test-failover.yaml @@ -106,12 +106,6 @@ jobs: provider=provision_service \ image=${{ matrix.image }} \ architecture=${{ matrix.architecture }} - buildevents cmd $TRACE_ID $STEP_ID 'bolt task run provision::provision_service' -- \ - bundle exec bolt bolt task run provision::provision_service \ - --modulepath spec/fixtures/modules \ - action=provision - platform=${{ matrix.image }} \ - vars="role: primary" echo ::endgroup:: echo ::group::info:request From 331a8101d520e002c6ebdd137891d76488904af1 Mon Sep 17 00:00:00 2001 From: Dimitri Tischenko Date: Fri, 30 Sep 2022 14:12:35 +0200 Subject: [PATCH 12/46] fix architecture --- .github/workflows/test-failover.yaml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/test-failover.yaml b/.github/workflows/test-failover.yaml index 2c29c480..e5706b7f 100644 --- a/.github/workflows/test-failover.yaml +++ b/.github/workflows/test-failover.yaml @@ -27,7 +27,7 @@ env: jobs: test-failover: - name: "PE ${{ matrix.version }} extra-large-with-dr-and-spare-replica on ${{ matrix.image }}" + name: "PE ${{ matrix.version }} ${{ matrix.architecture }} on ${{ matrix.image }}" runs-on: ubuntu-20.04 env: BOLT_GEM: true @@ -42,7 +42,7 @@ jobs: image: - "${{ github.event.inputs.image }}" architecture: - - "extra-large-with-dr-and-spare-replica" + - "extra-large-with-dr" steps: - name: 'Start SSH session' if: ${{ github.event.inputs.ssh-debugging == 'true' }} @@ -84,7 +84,7 @@ jobs: run: | echo ::group::honeycomb buildevents step $TRACE_ID $STEP_ID $STEP_START 'Set up environment' - echo STEP_ID=extra-large-with-dr-and-spare-replica-${{ matrix.image }}-provision >> $GITHUB_ENV + echo STEP_ID=${{ matrix.architecture }}-${{ matrix.image }}-provision >> $GITHUB_ENV echo STEP_START=$(date +%s) >> $GITHUB_ENV echo ::endgroup:: @@ -105,7 +105,7 @@ jobs: --modulepath spec/fixtures/modules \ provider=provision_service \ image=${{ matrix.image }} \ - architecture=${{ matrix.architecture }} + architecture=${{ matrix.architecture }}-and-spare-replica echo ::endgroup:: echo ::group::info:request @@ -121,7 +121,7 @@ jobs: run: | echo ::group::honeycomb buildevents step $TRACE_ID $STEP_ID $STEP_START 'Provision test cluster' - echo STEP_ID=extra-large-with-dr-and-spare-replica-${{ matrix.image }}-install >> $GITHUB_ENV + echo STEP_ID=${{ matrix.architecture }}-${{ matrix.image }}-install >> $GITHUB_ENV echo STEP_START=$(date +%s) >> $GITHUB_ENV echo ::endgroup:: @@ -140,7 +140,7 @@ jobs: run: | echo ::group::honeycomb buildevents step $TRACE_ID $STEP_ID $STEP_START 'Install PE on test cluster' - echo STEP_ID=extra-large-with-dr-and-spare-replica-${{ matrix.image }}-tear_down >> $GITHUB_ENV + echo STEP_ID=${{ matrix.architecture }}-${{ matrix.image }}-tear_down >> $GITHUB_ENV echo STEP_START=$(date +%s) >> $GITHUB_ENV echo ::endgroup:: @@ -158,7 +158,7 @@ jobs: run: | echo ::group::honeycomb buildevents step $TRACE_ID $STEP_ID $STEP_START 'Perform failover' - echo STEP_ID=extra-large-with-dr-and-spare-replica-${{ matrix.image }}-tear_down >> $GITHUB_ENV + echo STEP_ID=${{ matrix.architecture }}-${{ matrix.image }}-tear_down >> $GITHUB_ENV echo STEP_START=$(date +%s) >> $GITHUB_ENV echo ::endgroup:: @@ -188,7 +188,7 @@ jobs: run: | echo ::group::honeycomb buildevents step $TRACE_ID $STEP_ID $STEP_START 'Upgrade PE on test cluster' - echo STEP_ID=extra-large-with-dr-and-spare-replica-${{ matrix.version_to_upgrade }}-tear_down >> $GITHUB_ENV + echo STEP_ID=${{ matrix.architecture }}-${{ matrix.version_to_upgrade }}-tear_down >> $GITHUB_ENV echo STEP_START=$(date +%s) >> $GITHUB_ENV echo ::endgroup:: From 4c29ac501f3094ba6aeeec127b1359e31e5a5a5a Mon Sep 17 00:00:00 2001 From: Dimitri Tischenko Date: Fri, 30 Sep 2022 15:45:23 +0200 Subject: [PATCH 13/46] swap primary and replica; correct params --- .github/workflows/test-failover.yaml | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test-failover.yaml b/.github/workflows/test-failover.yaml index e5706b7f..b8e287a5 100644 --- a/.github/workflows/test-failover.yaml +++ b/.github/workflows/test-failover.yaml @@ -149,9 +149,7 @@ jobs: buildevents cmd $TRACE_ID $STEP_ID 'bolt plan run peadm_spec::perform_failover' -- \ bundle exec bolt plan run peadm_spec::perform_failover \ --inventoryfile spec/fixtures/litmus_inventory.yaml \ - --modulepath spec/fixtures/modules \ - platform=${{ matrix.image }} \ - vars="role: primary" + --modulepath spec/fixtures/modules - name: "Honeycomb: Record falover time" if: ${{ always() }} @@ -171,6 +169,11 @@ jobs: done echo "${HOME}/pause absent, continuing workflow." + - name: 'Swap primary and replica nodes' + run: | + sed -i .sedbak 's/ primary/ ___replica___/;s/ replica/ ___primary___/;s/___//g' \ + spec/fixtures/litmus_inventory.yaml + - name: 'Upgrade PE on test cluster' if: ${{ always() && github.event.inputs.version_to_upgrade != '' }} timeout-minutes: 120 From 6ad3cd05f2dfefabf49e25aed209d4a9d2e03345 Mon Sep 17 00:00:00 2001 From: Dimitri Tischenko Date: Fri, 30 Sep 2022 17:24:17 +0200 Subject: [PATCH 14/46] add trace --- .github/workflows/test-failover.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/test-failover.yaml b/.github/workflows/test-failover.yaml index b8e287a5..5932898c 100644 --- a/.github/workflows/test-failover.yaml +++ b/.github/workflows/test-failover.yaml @@ -129,7 +129,7 @@ jobs: timeout-minutes: 120 run: | buildevents cmd $TRACE_ID $STEP_ID 'bolt plan run peadm_spec::install_test_cluster' -- \ - bundle exec bolt plan run peadm_spec::install_test_cluster \ + bundle exec bolt plan run peadm_spec::install_test_cluster --log_level trace \ --inventoryfile spec/fixtures/litmus_inventory.yaml \ --modulepath spec/fixtures/modules \ architecture=${{ matrix.architecture }} \ @@ -175,7 +175,7 @@ jobs: spec/fixtures/litmus_inventory.yaml - name: 'Upgrade PE on test cluster' - if: ${{ always() && github.event.inputs.version_to_upgrade != '' }} + if: ${{ success() && github.event.inputs.version_to_upgrade != '' }} timeout-minutes: 120 run: | buildevents cmd $TRACE_ID $STEP_ID 'bolt plan run peadm_spec::upgrade_test_cluster' -- \ @@ -184,10 +184,10 @@ jobs: --modulepath spec/fixtures/modules \ architecture=${{ matrix.architecture }} \ download_mode='direct' \ - version=${{ matrix.version_to_upgrade }} + version=${{ github.event.inputs.version_to_upgrade }} - name: "Honeycomb: Record upgrade time" - if: ${{ always() && github.event.inputs.version_to_upgrade != '' }} + if: ${{ success() && github.event.inputs.version_to_upgrade != '' }} run: | echo ::group::honeycomb buildevents step $TRACE_ID $STEP_ID $STEP_START 'Upgrade PE on test cluster' From 87c16adaba5944684fe77acd6790d75023346dbe Mon Sep 17 00:00:00 2001 From: Dimitri Tischenko Date: Sat, 1 Oct 2022 17:12:25 +0200 Subject: [PATCH 15/46] add logging --- .github/workflows/test-failover.yaml | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test-failover.yaml b/.github/workflows/test-failover.yaml index 5932898c..42c6dce7 100644 --- a/.github/workflows/test-failover.yaml +++ b/.github/workflows/test-failover.yaml @@ -20,6 +20,10 @@ on: description: 'Boolean; whether or not to pause for ssh debugging' required: true default: 'false' + log_level: + description: 'Bolt log level' + required: false + default: 'trace' env: HONEYCOMB_WRITEKEY: 7f3c63a70eecc61d635917de46bea4e6 @@ -129,7 +133,7 @@ jobs: timeout-minutes: 120 run: | buildevents cmd $TRACE_ID $STEP_ID 'bolt plan run peadm_spec::install_test_cluster' -- \ - bundle exec bolt plan run peadm_spec::install_test_cluster --log_level trace \ + bundle exec bolt plan run peadm_spec::install_test_cluster --log_level ${{ github.event.inputs.log_level }} \ --inventoryfile spec/fixtures/litmus_inventory.yaml \ --modulepath spec/fixtures/modules \ architecture=${{ matrix.architecture }} \ @@ -147,7 +151,7 @@ jobs: - name: 'Perform failover' run: | buildevents cmd $TRACE_ID $STEP_ID 'bolt plan run peadm_spec::perform_failover' -- \ - bundle exec bolt plan run peadm_spec::perform_failover \ + bundle exec bolt plan run peadm_spec::perform_failover --log_level ${{ github.event.inputs.log_level }} \ --inventoryfile spec/fixtures/litmus_inventory.yaml \ --modulepath spec/fixtures/modules @@ -179,7 +183,7 @@ jobs: timeout-minutes: 120 run: | buildevents cmd $TRACE_ID $STEP_ID 'bolt plan run peadm_spec::upgrade_test_cluster' -- \ - bundle exec bolt plan run peadm_spec::upgrade_test_cluster \ + bundle exec bolt plan run peadm_spec::upgrade_test_cluster --log_level ${{ github.event.inputs.log_level }} \ --inventoryfile spec/fixtures/litmus_inventory.yaml \ --modulepath spec/fixtures/modules \ architecture=${{ matrix.architecture }} \ From c8267008559847f6cd3664011c6324327c9a0231 Mon Sep 17 00:00:00 2001 From: Dimitri Tischenko Date: Tue, 4 Oct 2022 14:19:42 +0200 Subject: [PATCH 16/46] revert back to using targets --- .github/workflows/test-failover.yaml | 2 +- .../peadm_spec/plans/perform_failover.pp | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/test-failover.yaml b/.github/workflows/test-failover.yaml index 42c6dce7..9c1f517b 100644 --- a/.github/workflows/test-failover.yaml +++ b/.github/workflows/test-failover.yaml @@ -173,7 +173,7 @@ jobs: done echo "${HOME}/pause absent, continuing workflow." - - name: 'Swap primary and replica nodes' + - name: 'Swap primary and replica nodes in inventory' run: | sed -i .sedbak 's/ primary/ ___replica___/;s/ replica/ ___primary___/;s/___//g' \ spec/fixtures/litmus_inventory.yaml diff --git a/spec/acceptance/peadm_spec/plans/perform_failover.pp b/spec/acceptance/peadm_spec/plans/perform_failover.pp index 923545d1..bee1c0ea 100644 --- a/spec/acceptance/peadm_spec/plans/perform_failover.pp +++ b/spec/acceptance/peadm_spec/plans/perform_failover.pp @@ -12,7 +12,7 @@ } # run infra status on the primary - $primary_host = $t.filter |$n| { $n.vars['role'] == 'primary' } + $primary_host = $t.filter |$n| { $n.vars['role'] == 'primary' }[0] out::verbose("Running peadm::status on new primary host ${primary_host}") run_plan('peadm::status', $primary_host) @@ -21,7 +21,7 @@ run_task('reboot', $primary_host, shutdown_only => true) # promote the replica to new primary - $replica_host = $t.filter |$n| { $n.vars['role'] == 'replica' } + $replica_host = $t.filter |$n| { $n.vars['role'] == 'replica' }[0] out::verbose("Promoting replica host ${replica_host} to primary") run_command(@("HEREDOC"/L), $replica_host) /opt/puppetlabs/bin/puppet infra promote replica --topology mono-with-compile --yes @@ -40,8 +40,8 @@ |-HEREDOC # add new replica - $replica_postgresql_host = $t.filter |$n| { $n.vars['role'] == 'primary-pdb-postgresql' } - $new_replica_host = $t.filter |$n| { $n.vars['role'] == 'spare-replica' } + $replica_postgresql_host = $t.filter |$n| { $n.vars['role'] == 'primary-pdb-postgresql' }[0] + $new_replica_host = $t.filter |$n| { $n.vars['role'] == 'spare-replica' }[0] if $new_replica_host == [] { fail_plan('"spare-replica" role missing from inventory, cannot continue') @@ -49,9 +49,9 @@ out::verbose("Adding new replica host ${new_replica_host} to primary") run_plan('peadm::add_replica', - primary_host => peadm::certname($replica_host), - replica_host => peadm::certname($new_replica_host), - replica_postgresql_host => $replica_postgresql_host ? { [] => undef, default => peadm::certname($replica_postgresql_host) }, + primary_host => $replica_host, + replica_host => $new_replica_host, + replica_postgresql_host => $replica_postgresql_host ? { [] => undef, default => $replica_postgresql_host }, ) # run infra status on the new primary From 26d7e17c58b33634aecb21fb6b1330a8fc37cc46 Mon Sep 17 00:00:00 2001 From: Dimitri Tischenko Date: Wed, 5 Oct 2022 14:58:11 +0200 Subject: [PATCH 17/46] formatting --- plans/modify_certificate.pp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/plans/modify_certificate.pp b/plans/modify_certificate.pp index 1e8d796a..9286c1a8 100644 --- a/plans/modify_certificate.pp +++ b/plans/modify_certificate.pp @@ -5,8 +5,8 @@ plan peadm::modify_certificate ( TargetSpec $targets, Peadm::SingleTargetSpec $primary_host, - Hash $add_extensions = { }, - Array $remove_extensions = [ ], + Hash $add_extensions = {}, + Array $remove_extensions = [], Optional[Array] $dns_alt_names = undef, Boolean $force_regenerate = false, ) { From 437cf23c5382f642b292510b852f04edef3b05a9 Mon Sep 17 00:00:00 2001 From: Dimitri Tischenko Date: Wed, 5 Oct 2022 14:58:25 +0200 Subject: [PATCH 18/46] use .name to convert target to string --- spec/acceptance/peadm_spec/plans/perform_failover.pp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/spec/acceptance/peadm_spec/plans/perform_failover.pp b/spec/acceptance/peadm_spec/plans/perform_failover.pp index bee1c0ea..38025afc 100644 --- a/spec/acceptance/peadm_spec/plans/perform_failover.pp +++ b/spec/acceptance/peadm_spec/plans/perform_failover.pp @@ -49,9 +49,9 @@ out::verbose("Adding new replica host ${new_replica_host} to primary") run_plan('peadm::add_replica', - primary_host => $replica_host, - replica_host => $new_replica_host, - replica_postgresql_host => $replica_postgresql_host ? { [] => undef, default => $replica_postgresql_host }, + primary_host => $replica_host.name, + replica_host => $new_replica_host.name, + replica_postgresql_host => $replica_postgresql_host ? { [] => undef, default => $replica_postgresql_host.name }, ) # run infra status on the new primary From 3e7955e68e6777324acfa94fed83290746e4ea09 Mon Sep 17 00:00:00 2001 From: Dimitri Tischenko Date: Wed, 5 Oct 2022 17:33:42 +0200 Subject: [PATCH 19/46] change name to uri --- spec/acceptance/peadm_spec/plans/perform_failover.pp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/spec/acceptance/peadm_spec/plans/perform_failover.pp b/spec/acceptance/peadm_spec/plans/perform_failover.pp index 38025afc..505f864c 100644 --- a/spec/acceptance/peadm_spec/plans/perform_failover.pp +++ b/spec/acceptance/peadm_spec/plans/perform_failover.pp @@ -49,9 +49,9 @@ out::verbose("Adding new replica host ${new_replica_host} to primary") run_plan('peadm::add_replica', - primary_host => $replica_host.name, - replica_host => $new_replica_host.name, - replica_postgresql_host => $replica_postgresql_host ? { [] => undef, default => $replica_postgresql_host.name }, + primary_host => $replica_host.uri, + replica_host => $new_replica_host.uri, + replica_postgresql_host => $replica_postgresql_host ? { [] => undef, default => $replica_postgresql_host.uri }, ) # run infra status on the new primary From 6f06cab9dc45e1313ab3288ea5ffa639accedcb0 Mon Sep 17 00:00:00 2001 From: Dimitri Tischenko Date: Thu, 6 Oct 2022 15:10:44 +0200 Subject: [PATCH 20/46] debugging --- plans/modify_certificate.pp | 1 + 1 file changed, 1 insertion(+) diff --git a/plans/modify_certificate.pp b/plans/modify_certificate.pp index 9286c1a8..b2770942 100644 --- a/plans/modify_certificate.pp +++ b/plans/modify_certificate.pp @@ -19,6 +19,7 @@ # TODO: convert $add_extensions and $remov_extensions to OIDs, if friendly # names have been given + out::message("peadm::modify_certificate: primary host: ${primary_target} - ${primary_target.name} - ${primary_target.uri}") $primary_certname = run_task('peadm::cert_data', $primary_target).first['certname'] # Do the primary first, if it's in the list From bf027cd72b538c2cb37b58dbff048e7b5391433c Mon Sep 17 00:00:00 2001 From: Dimitri Tischenko Date: Mon, 10 Oct 2022 15:57:57 +0200 Subject: [PATCH 21/46] debugging --- plans/subplans/prepare_agent.pp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/plans/subplans/prepare_agent.pp b/plans/subplans/prepare_agent.pp index fff49e6e..b86f9c8b 100644 --- a/plans/subplans/prepare_agent.pp +++ b/plans/subplans/prepare_agent.pp @@ -8,6 +8,9 @@ $agent_target = peadm::get_targets($targets, 1) $primary_target = peadm::get_targets($primary_host, 1) + out::message("Preparing agent ${agent_target} to connect to ${primary_target}") + out::message("agent target ${agent_target} to connect to ${primary_target}") + $dns_alt_names_flag = $dns_alt_names? { undef => [], default => ["main:dns_alt_names=${dns_alt_names.join(',')}"], @@ -80,6 +83,7 @@ # If agent certificate is good but lacks appropriate extensions, plan will still # regenerate certificate + out::message("primary target: ${primary_target}, certname: ${primary_target.peadm::certname()}, uri: ${primary_target.uri}") run_plan('peadm::modify_certificate', $agent_target, primary_host => $primary_target.peadm::certname(), add_extensions => $certificate_extensions, From 04611f637c218fcf5ed9b4d97bf02f693d67c197 Mon Sep 17 00:00:00 2001 From: Dimitri Tischenko Date: Mon, 10 Oct 2022 18:59:12 +0200 Subject: [PATCH 22/46] fix --- plans/subplans/prepare_agent.pp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plans/subplans/prepare_agent.pp b/plans/subplans/prepare_agent.pp index b86f9c8b..c73ef909 100644 --- a/plans/subplans/prepare_agent.pp +++ b/plans/subplans/prepare_agent.pp @@ -83,7 +83,7 @@ # If agent certificate is good but lacks appropriate extensions, plan will still # regenerate certificate - out::message("primary target: ${primary_target}, certname: ${primary_target.peadm::certname()}, uri: ${primary_target.uri}") + out::message("primary target: ${primary_target}, certname: ${primary_target.peadm::certname()}, uri: ${primary_target[0].uri}") run_plan('peadm::modify_certificate', $agent_target, primary_host => $primary_target.peadm::certname(), add_extensions => $certificate_extensions, From 249364c28d6c649b0fb8520ee518b9781c2d2040 Mon Sep 17 00:00:00 2001 From: Dimitri Tischenko Date: Mon, 10 Oct 2022 19:24:10 +0200 Subject: [PATCH 23/46] remove certname() call --- plans/subplans/prepare_agent.pp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plans/subplans/prepare_agent.pp b/plans/subplans/prepare_agent.pp index c73ef909..aac419d4 100644 --- a/plans/subplans/prepare_agent.pp +++ b/plans/subplans/prepare_agent.pp @@ -85,7 +85,7 @@ # regenerate certificate out::message("primary target: ${primary_target}, certname: ${primary_target.peadm::certname()}, uri: ${primary_target[0].uri}") run_plan('peadm::modify_certificate', $agent_target, - primary_host => $primary_target.peadm::certname(), + primary_host => $primary_target, add_extensions => $certificate_extensions, force_regenerate => $force_regenerate ) From 54970a880a88a45868666c5315f2ff74cff2a7a9 Mon Sep 17 00:00:00 2001 From: Dimitri Tischenko Date: Tue, 11 Oct 2022 17:53:14 +0200 Subject: [PATCH 24/46] purge failed primary before promoting --- spec/acceptance/peadm_spec/plans/perform_failover.pp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/spec/acceptance/peadm_spec/plans/perform_failover.pp b/spec/acceptance/peadm_spec/plans/perform_failover.pp index 505f864c..7d6d40c9 100644 --- a/spec/acceptance/peadm_spec/plans/perform_failover.pp +++ b/spec/acceptance/peadm_spec/plans/perform_failover.pp @@ -20,6 +20,11 @@ out::verbose("Bringing down primary host ${primary_host}") run_task('reboot', $primary_host, shutdown_only => true) + # purge the "failed" primary node + run_command(@("HEREDOC"/L), $replica_host) + /opt/puppetlabs/bin/puppet node purge ${peadm::certname($primary_host)} + |-HEREDOC + # promote the replica to new primary $replica_host = $t.filter |$n| { $n.vars['role'] == 'replica' }[0] out::verbose("Promoting replica host ${replica_host} to primary") @@ -34,11 +39,6 @@ token_lifetime => '1y', ) - # purge the "failed" primary node - run_command(@("HEREDOC"/L), $replica_host) - /opt/puppetlabs/bin/puppet node purge ${peadm::certname($primary_host)} - |-HEREDOC - # add new replica $replica_postgresql_host = $t.filter |$n| { $n.vars['role'] == 'primary-pdb-postgresql' }[0] $new_replica_host = $t.filter |$n| { $n.vars['role'] == 'spare-replica' }[0] From 0b0a707d8694b476d28811071fda39225217aa04 Mon Sep 17 00:00:00 2001 From: Dimitri Tischenko Date: Tue, 11 Oct 2022 18:54:06 +0200 Subject: [PATCH 25/46] fix --- spec/acceptance/peadm_spec/plans/perform_failover.pp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/acceptance/peadm_spec/plans/perform_failover.pp b/spec/acceptance/peadm_spec/plans/perform_failover.pp index 7d6d40c9..3ddbea02 100644 --- a/spec/acceptance/peadm_spec/plans/perform_failover.pp +++ b/spec/acceptance/peadm_spec/plans/perform_failover.pp @@ -21,12 +21,12 @@ run_task('reboot', $primary_host, shutdown_only => true) # purge the "failed" primary node + $replica_host = $t.filter |$n| { $n.vars['role'] == 'replica' }[0] run_command(@("HEREDOC"/L), $replica_host) /opt/puppetlabs/bin/puppet node purge ${peadm::certname($primary_host)} |-HEREDOC # promote the replica to new primary - $replica_host = $t.filter |$n| { $n.vars['role'] == 'replica' }[0] out::verbose("Promoting replica host ${replica_host} to primary") run_command(@("HEREDOC"/L), $replica_host) /opt/puppetlabs/bin/puppet infra promote replica --topology mono-with-compile --yes From 737479722b4d7ca7d2075c48af012d9a45106550 Mon Sep 17 00:00:00 2001 From: Dimitri Tischenko Date: Wed, 12 Oct 2022 14:24:37 +0200 Subject: [PATCH 26/46] moving purge to after promote --- spec/acceptance/peadm_spec/plans/perform_failover.pp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/spec/acceptance/peadm_spec/plans/perform_failover.pp b/spec/acceptance/peadm_spec/plans/perform_failover.pp index 3ddbea02..ada3cc9a 100644 --- a/spec/acceptance/peadm_spec/plans/perform_failover.pp +++ b/spec/acceptance/peadm_spec/plans/perform_failover.pp @@ -20,16 +20,16 @@ out::verbose("Bringing down primary host ${primary_host}") run_task('reboot', $primary_host, shutdown_only => true) - # purge the "failed" primary node + # promote the replica to new primary $replica_host = $t.filter |$n| { $n.vars['role'] == 'replica' }[0] + out::verbose("Promoting replica host ${replica_host} to primary") run_command(@("HEREDOC"/L), $replica_host) - /opt/puppetlabs/bin/puppet node purge ${peadm::certname($primary_host)} + /opt/puppetlabs/bin/puppet infra promote replica --topology mono-with-compile --yes |-HEREDOC - # promote the replica to new primary - out::verbose("Promoting replica host ${replica_host} to primary") + # purge the "failed" primary node run_command(@("HEREDOC"/L), $replica_host) - /opt/puppetlabs/bin/puppet infra promote replica --topology mono-with-compile --yes + /opt/puppetlabs/bin/puppet node purge ${peadm::certname($primary_host)} |-HEREDOC # generate access token on new primary From b041010cecd2053bacecb7ae3a377fe48f7f60d0 Mon Sep 17 00:00:00 2001 From: Dimitri Tischenko Date: Fri, 14 Oct 2022 14:14:53 +0200 Subject: [PATCH 27/46] add timeout 0 to shutdown command --- spec/acceptance/peadm_spec/plans/perform_failover.pp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spec/acceptance/peadm_spec/plans/perform_failover.pp b/spec/acceptance/peadm_spec/plans/perform_failover.pp index ada3cc9a..3f5caada 100644 --- a/spec/acceptance/peadm_spec/plans/perform_failover.pp +++ b/spec/acceptance/peadm_spec/plans/perform_failover.pp @@ -16,9 +16,9 @@ out::verbose("Running peadm::status on new primary host ${primary_host}") run_plan('peadm::status', $primary_host) - # bring down the current primary + # bring down the current primary right now out::verbose("Bringing down primary host ${primary_host}") - run_task('reboot', $primary_host, shutdown_only => true) + run_task('reboot', $primary_host, shutdown_only => true, timeout => 0) # promote the replica to new primary $replica_host = $t.filter |$n| { $n.vars['role'] == 'replica' }[0] From 9531601bbebda6ab5a6fca70d718cbd192540402 Mon Sep 17 00:00:00 2001 From: Dimitri Tischenko Date: Fri, 14 Oct 2022 18:13:27 +0200 Subject: [PATCH 28/46] destroy ssldir, use infra forget --- .../acceptance/peadm_spec/plans/perform_failover.pp | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/spec/acceptance/peadm_spec/plans/perform_failover.pp b/spec/acceptance/peadm_spec/plans/perform_failover.pp index 3f5caada..23ee7783 100644 --- a/spec/acceptance/peadm_spec/plans/perform_failover.pp +++ b/spec/acceptance/peadm_spec/plans/perform_failover.pp @@ -18,6 +18,8 @@ # bring down the current primary right now out::verbose("Bringing down primary host ${primary_host}") + # destroy the ssldir in case the host comes up again + run_command('rm -rf $(puppet config print ssldir)', $primary_host, _catch_errors => true) run_task('reboot', $primary_host, shutdown_only => true, timeout => 0) # promote the replica to new primary @@ -27,9 +29,14 @@ /opt/puppetlabs/bin/puppet infra promote replica --topology mono-with-compile --yes |-HEREDOC - # purge the "failed" primary node - run_command(@("HEREDOC"/L), $replica_host) - /opt/puppetlabs/bin/puppet node purge ${peadm::certname($primary_host)} + # # purge the "failed" primary node + # run_command(@("HEREDOC"/L), $replica_host) + # /opt/puppetlabs/bin/puppet node purge ${peadm::certname($primary_host)} + # |-HEREDOC + + # forget the "failed" primary node + run_command(@("HEREDOC"/L), $replica_host, _catch_errors => true) + /opt/puppetlabs/bin/puppet infrastructure forget ${peadm::certname($primary_host)} |-HEREDOC # generate access token on new primary From ee9de3f78f65f2109615bf721958c5b4a3cf7f10 Mon Sep 17 00:00:00 2001 From: Dimitri Tischenko Date: Fri, 14 Oct 2022 22:55:26 +0200 Subject: [PATCH 29/46] disable start of networking on failed primary --- spec/acceptance/peadm_spec/plans/perform_failover.pp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spec/acceptance/peadm_spec/plans/perform_failover.pp b/spec/acceptance/peadm_spec/plans/perform_failover.pp index 23ee7783..254e5a18 100644 --- a/spec/acceptance/peadm_spec/plans/perform_failover.pp +++ b/spec/acceptance/peadm_spec/plans/perform_failover.pp @@ -18,8 +18,8 @@ # bring down the current primary right now out::verbose("Bringing down primary host ${primary_host}") - # destroy the ssldir in case the host comes up again - run_command('rm -rf $(puppet config print ssldir)', $primary_host, _catch_errors => true) + # prevent host from starting up networking in case it comes up again + run_command('systemctl set-default basic.target', $primary_host, _catch_errors => true) run_task('reboot', $primary_host, shutdown_only => true, timeout => 0) # promote the replica to new primary From 12bab1476a9ca6b350304495227cc2cc39822c1e Mon Sep 17 00:00:00 2001 From: Dimitri Tischenko Date: Mon, 17 Oct 2022 12:42:07 +0200 Subject: [PATCH 30/46] generate rbac token before infra forget --- spec/acceptance/peadm_spec/plans/perform_failover.pp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/spec/acceptance/peadm_spec/plans/perform_failover.pp b/spec/acceptance/peadm_spec/plans/perform_failover.pp index 254e5a18..922e31ea 100644 --- a/spec/acceptance/peadm_spec/plans/perform_failover.pp +++ b/spec/acceptance/peadm_spec/plans/perform_failover.pp @@ -34,11 +34,6 @@ # /opt/puppetlabs/bin/puppet node purge ${peadm::certname($primary_host)} # |-HEREDOC - # forget the "failed" primary node - run_command(@("HEREDOC"/L), $replica_host, _catch_errors => true) - /opt/puppetlabs/bin/puppet infrastructure forget ${peadm::certname($primary_host)} - |-HEREDOC - # generate access token on new primary out::verbose("Generating access token on replica host ${replica_host}") run_task('peadm::rbac_token', $replica_host, @@ -46,6 +41,12 @@ token_lifetime => '1y', ) + # forget the "failed" primary node + run_command(@("HEREDOC"/L), $replica_host, _catch_errors => true) + /opt/puppetlabs/bin/puppet infrastructure forget ${peadm::certname($primary_host)} + |-HEREDOC + + # add new replica $replica_postgresql_host = $t.filter |$n| { $n.vars['role'] == 'primary-pdb-postgresql' }[0] $new_replica_host = $t.filter |$n| { $n.vars['role'] == 'spare-replica' }[0] From 1f0f3eecf7aa4d91a73381f80237dec2c5863769 Mon Sep 17 00:00:00 2001 From: Dimitri Tischenko Date: Tue, 18 Oct 2022 15:36:10 +0200 Subject: [PATCH 31/46] list active nodes --- spec/acceptance/peadm_spec/plans/perform_failover.pp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/spec/acceptance/peadm_spec/plans/perform_failover.pp b/spec/acceptance/peadm_spec/plans/perform_failover.pp index 922e31ea..44e8b4b1 100644 --- a/spec/acceptance/peadm_spec/plans/perform_failover.pp +++ b/spec/acceptance/peadm_spec/plans/perform_failover.pp @@ -41,11 +41,18 @@ token_lifetime => '1y', ) + $res1 = run_command('/opt/puppetlabs/bin/puppet query \'nodes [certname] {node_state = "active"}\'', + $replica_host) + out::message("Active nodes 1: ${res1.first['stdout']}") + # forget the "failed" primary node run_command(@("HEREDOC"/L), $replica_host, _catch_errors => true) /opt/puppetlabs/bin/puppet infrastructure forget ${peadm::certname($primary_host)} |-HEREDOC + $res2 = run_command('/opt/puppetlabs/bin/puppet query \'nodes [certname] {node_state = "active"}\'', + $replica_host) + out::message("Active nodes 2: ${res2.first['stdout']}") # add new replica $replica_postgresql_host = $t.filter |$n| { $n.vars['role'] == 'primary-pdb-postgresql' }[0] @@ -62,6 +69,10 @@ replica_postgresql_host => $replica_postgresql_host ? { [] => undef, default => $replica_postgresql_host.uri }, ) + $res3 = run_command('/opt/puppetlabs/bin/puppet query \'nodes [certname] {node_state = "active"}\'', + $replica_host) + out::message("Active nodes 3: ${res3.first['stdout']}") + # run infra status on the new primary out::verbose("Running peadm::status on new primary host ${replica_host}") run_plan('peadm::status', $replica_host) From c92d5e3a06ffe3eba179b62bd0d26f92a7e8a978 Mon Sep 17 00:00:00 2001 From: Dimitri Tischenko Date: Thu, 3 Nov 2022 14:03:21 +0100 Subject: [PATCH 32/46] add puppetdb queries for debugging --- tasks/provision_replica.sh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tasks/provision_replica.sh b/tasks/provision_replica.sh index 49633ca3..6397b61c 100755 --- a/tasks/provision_replica.sh +++ b/tasks/provision_replica.sh @@ -25,10 +25,15 @@ if [ "$PT_legacy" = "false" ]; then --enable elif [ "$PT_legacy" = "true" ]; then + echo "query active nodes before provision replica" + puppet query 'nodes [certname] {node_state = "active"}' + puppet infrastructure provision replica "$PT_replica" \ --color false \ --token-file "$TOKEN_FILE" + echo "query active nodes before enable replica" + puppet query 'nodes [certname] {node_state = "active"}' puppet infrastructure enable replica "$PT_replica" \ --color false \ --yes --token-file "$TOKEN_FILE" \ From aab97fe5c25e1348d63dbc26774a255624871fd3 Mon Sep 17 00:00:00 2001 From: Dimitri Tischenko Date: Thu, 3 Nov 2022 16:21:07 +0100 Subject: [PATCH 33/46] set param "legacy" to false for provision_replica --- plans/add_replica.pp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plans/add_replica.pp b/plans/add_replica.pp index c8b95d33..6465ccca 100644 --- a/plans/add_replica.pp +++ b/plans/add_replica.pp @@ -120,7 +120,7 @@ # Race condition, where the provision command checks PuppetDB status and # probably gets "starting", but fails out because that's not "running". # Can remove flag when that issue is fixed. - legacy => true, + legacy => false, ) # start puppet service From a5cd053fa95021a3e26b19d27cf8317ea54f5432 Mon Sep 17 00:00:00 2001 From: Dimitri Tischenko Date: Thu, 10 Nov 2022 15:02:42 +0100 Subject: [PATCH 34/46] add different query --- .../peadm_spec/plans/perform_failover.pp | 10 ++++------ tasks/provision_replica.sh | 16 ++++++++++++---- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/spec/acceptance/peadm_spec/plans/perform_failover.pp b/spec/acceptance/peadm_spec/plans/perform_failover.pp index 44e8b4b1..12ec767f 100644 --- a/spec/acceptance/peadm_spec/plans/perform_failover.pp +++ b/spec/acceptance/peadm_spec/plans/perform_failover.pp @@ -41,8 +41,8 @@ token_lifetime => '1y', ) - $res1 = run_command('/opt/puppetlabs/bin/puppet query \'nodes [certname] {node_state = "active"}\'', - $replica_host) + $query = '["from","resources",["extract",["certname"],["and",["=","type","Class"],["=","title","Puppet_enterprise::Profile::Master"]]]]' + $res1 = run_command("/opt/puppetlabs/bin/puppet query \'${query}\'", $replica_host) out::message("Active nodes 1: ${res1.first['stdout']}") # forget the "failed" primary node @@ -50,8 +50,7 @@ /opt/puppetlabs/bin/puppet infrastructure forget ${peadm::certname($primary_host)} |-HEREDOC - $res2 = run_command('/opt/puppetlabs/bin/puppet query \'nodes [certname] {node_state = "active"}\'', - $replica_host) + $res2 = run_command("/opt/puppetlabs/bin/puppet query \'${query}\'", $replica_host) out::message("Active nodes 2: ${res2.first['stdout']}") # add new replica @@ -69,8 +68,7 @@ replica_postgresql_host => $replica_postgresql_host ? { [] => undef, default => $replica_postgresql_host.uri }, ) - $res3 = run_command('/opt/puppetlabs/bin/puppet query \'nodes [certname] {node_state = "active"}\'', - $replica_host) + $res3 = run_command("/opt/puppetlabs/bin/puppet query \'${query}\'", $replica_host) out::message("Active nodes 3: ${res3.first['stdout']}") # run infra status on the new primary diff --git a/tasks/provision_replica.sh b/tasks/provision_replica.sh index 6397b61c..73161810 100755 --- a/tasks/provision_replica.sh +++ b/tasks/provision_replica.sh @@ -3,6 +3,11 @@ # Try and ensure locale is correctly configured [ -z "${LANG}" ] && export LANG=$(localectl status | sed -n 's/.* LANG=\(.*\)/\1/p') +# declare task parameters for linting +declare PT_token_file +declare PT_legacy +declare PT_replica + export USER=$(id -un) export HOME=$(getent passwd "$USER" | cut -d : -f 6) export PATH="/opt/puppetlabs/bin:${PATH}" @@ -17,6 +22,9 @@ fi set -e if [ "$PT_legacy" = "false" ]; then + echo "(legacy=false) query active nodes before provision replica" + puppet query '["from","resources",["extract",["certname"],["and",["=","type","Class"],["=","title","Puppet_enterprise::Profile::Master"]]]]' + puppet infrastructure provision replica "$PT_replica" \ --color false \ --yes --token-file "$TOKEN_FILE" \ @@ -25,15 +33,15 @@ if [ "$PT_legacy" = "false" ]; then --enable elif [ "$PT_legacy" = "true" ]; then - echo "query active nodes before provision replica" - puppet query 'nodes [certname] {node_state = "active"}' + echo "(legacy=true) query active nodes before provision replica" + puppet query '["from","resources",["extract",["certname"],["and",["=","type","Class"],["=","title","Puppet_enterprise::Profile::Master"]]]]' puppet infrastructure provision replica "$PT_replica" \ --color false \ --token-file "$TOKEN_FILE" - echo "query active nodes before enable replica" - puppet query 'nodes [certname] {node_state = "active"}' + echo "(legacy=true) query active nodes before enable replica" + puppet query '["from","resources",["extract",["certname"],["and",["=","type","Class"],["=","title","Puppet_enterprise::Profile::Master"]]]]' puppet infrastructure enable replica "$PT_replica" \ --color false \ --yes --token-file "$TOKEN_FILE" \ From d2843b1caf5d75bfd2adc08de6759d3d2820d979 Mon Sep 17 00:00:00 2001 From: Dimitri Tischenko Date: Thu, 10 Nov 2022 17:36:30 +0100 Subject: [PATCH 35/46] adding catch_errors to the provision_replica task call --- plans/add_replica.pp | 1 + 1 file changed, 1 insertion(+) diff --git a/plans/add_replica.pp b/plans/add_replica.pp index 6465ccca..914c6f91 100644 --- a/plans/add_replica.pp +++ b/plans/add_replica.pp @@ -121,6 +121,7 @@ # probably gets "starting", but fails out because that's not "running". # Can remove flag when that issue is fixed. legacy => false, + _catch_errors => true, # testing ) # start puppet service From 4abfcaa4d1d70481b18c6f61d3834a29a3c01366 Mon Sep 17 00:00:00 2001 From: Dimitri Tischenko Date: Fri, 11 Nov 2022 16:33:07 +0100 Subject: [PATCH 36/46] disable _catch_errors --- plans/add_replica.pp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plans/add_replica.pp b/plans/add_replica.pp index 914c6f91..95bfb5e1 100644 --- a/plans/add_replica.pp +++ b/plans/add_replica.pp @@ -121,7 +121,7 @@ # probably gets "starting", but fails out because that's not "running". # Can remove flag when that issue is fixed. legacy => false, - _catch_errors => true, # testing + # _catch_errors => true, # testing ) # start puppet service From 793b7f45e4951ce1c1f19d3a4e7e293b7a55310c Mon Sep 17 00:00:00 2001 From: Dimitri Tischenko Date: Mon, 6 Feb 2023 15:57:52 +0100 Subject: [PATCH 37/46] fix syntax and spec tests --- functions/assert_supported_pe_version.pp | 2 +- plans/convert.pp | 2 +- plans/status.pp | 4 +- .../plans/provision_test_cluster.pp | 40 +++++-------------- spec/plans/add_replica_spec.rb | 1 + spec/plans/modify_certificate_spec.rb | 1 + 6 files changed, 16 insertions(+), 34 deletions(-) diff --git a/functions/assert_supported_pe_version.pp b/functions/assert_supported_pe_version.pp index 66e84652..b01daece 100644 --- a/functions/assert_supported_pe_version.pp +++ b/functions/assert_supported_pe_version.pp @@ -40,5 +40,5 @@ function peadm::assert_supported_pe_version ( # lint:endignore } - return( { 'supported' => $supported }) + return({ 'supported' => $supported }) } diff --git a/plans/convert.pp b/plans/convert.pp index d94d6a77..1995a0b0 100644 --- a/plans/convert.pp +++ b/plans/convert.pp @@ -60,7 +60,7 @@ # Get trusted fact information for all compilers. Use peadm::certname() as # the hash key because the apply block below will break trying to parse the # $compiler_extensions variable if it has Target-type hash keys. - $cert_extensions = run_task('peadm::cert_data', $all_targets).reduce( {}) |$memo,$result| { + $cert_extensions = run_task('peadm::cert_data', $all_targets).reduce({}) |$memo,$result| { $memo + { $result.target.peadm::certname() => $result['extensions'] } } diff --git a/plans/status.pp b/plans/status.pp index 667855cc..0ebf2708 100644 --- a/plans/status.pp +++ b/plans/status.pp @@ -18,11 +18,11 @@ $results = run_task('peadm::infrastatus', $targets, { format => 'json' }) # returns the data in a hash - $stack_status = $results.reduce( {}) | $res, $item | { + $stack_status = $results.reduce({}) | $res, $item | { $data = $item.value[output] $stack_name = $item.target.peadm::certname() $status = peadm::determine_status($data, $colors).merge(stack_name => $stack_name ) - $res.merge( { $stack_name => $status }) + $res.merge({ $stack_name => $status }) } $overall_degraded_stacks = $stack_status.filter | $item | { $item[1][status] =~ /degraded/ } diff --git a/spec/acceptance/peadm_spec/plans/provision_test_cluster.pp b/spec/acceptance/peadm_spec/plans/provision_test_cluster.pp index a1def171..a3a18d4e 100644 --- a/spec/acceptance/peadm_spec/plans/provision_test_cluster.pp +++ b/spec/acceptance/peadm_spec/plans/provision_test_cluster.pp @@ -15,47 +15,27 @@ ['primary', 'compiler'] } 'large-with-dr': { - ['primary', 'compiler', - 'replica', 'compiler'] + ['primary', 'compiler', 'replica', 'compiler'] } 'extra-large': { ['primary', 'primary-pdb-postgresql', 'compiler'] } 'extra-large-with-dr': { - ['primary', 'primary-pdb-postgresql', 'compiler', - 'replica', 'replica-pdb-postgresql', 'compiler'] + ['primary', 'primary-pdb-postgresql', 'compiler', 'replica', 'replica-pdb-postgresql', 'compiler'] } 'large-with-extra-compiler': { ['primary', 'compiler', 'unconfigured-compiler'] } 'extra-large-with-extra-compiler': { - ['primary', 'primary-pdb-postgresql', 'compiler', 'unconfigured-compiler' ] + ['primary', 'primary-pdb-postgresql', 'compiler', 'unconfigured-compiler'] + } + 'extra-large-with-dr-and-spare-replica': { + ['primary', 'primary-pdb-postgresql', 'compiler', + 'replica', 'replica-pdb-postgresql', 'compiler', 'spare-replica'] + } + default: { + fail_plan("Unknown architecture: ${architecture}") } - } - 'standard-with-dr': { - ['primary', 'replica'] - } - 'large': { - ['primary', 'compiler'] - } - 'large-with-dr': { - ['primary', 'compiler', - 'replica', 'compiler'] - } - 'extra-large': { - ['primary', 'primary-pdb-postgresql', 'compiler'] - } - 'extra-large-with-dr': { - ['primary', 'primary-pdb-postgresql', 'compiler', - 'replica', 'replica-pdb-postgresql', 'compiler'] - } - 'extra-large-with-dr-and-spare-replica': { - ['primary', 'primary-pdb-postgresql', 'compiler', - 'replica', 'replica-pdb-postgresql', 'compiler', 'spare-replica'] - } - default: { - fail_plan("Unknown architecture: ${architecture}") - } } $provision_results = diff --git a/spec/plans/add_replica_spec.rb b/spec/plans/add_replica_spec.rb index 509ae13c..ed3ae7b2 100644 --- a/spec/plans/add_replica_spec.rb +++ b/spec/plans/add_replica_spec.rb @@ -7,6 +7,7 @@ def allow_standard_non_returning_calls allow_apply allow_any_task allow_any_command + allow_any_out_message end describe 'basic functionality' do diff --git a/spec/plans/modify_certificate_spec.rb b/spec/plans/modify_certificate_spec.rb index c6ab4a6d..6dc1a558 100644 --- a/spec/plans/modify_certificate_spec.rb +++ b/spec/plans/modify_certificate_spec.rb @@ -14,6 +14,7 @@ end it 'runs successfully ' do + allow_any_out_message allow_task('peadm::cert_data').always_return({ 'certname' => 'primary' }) expect_plan('peadm::subplans::modify_certificate').be_called_times(3) From 0585f6a7d037e5affb8f2b0cf25c41446263ddbf Mon Sep 17 00:00:00 2001 From: Dimitri Tischenko Date: Thu, 9 Feb 2023 17:59:21 +0100 Subject: [PATCH 38/46] add task to delete certname from psql db --- .gitignore | 1 + .../peadm_spec/plans/perform_failover.pp | 11 ++++++++ .../peadm_spec/tasks/delete_certname.json | 9 ++++++ .../peadm_spec/tasks/delete_certname.sh | 28 +++++++++++++++++++ 4 files changed, 49 insertions(+) create mode 100644 spec/acceptance/peadm_spec/tasks/delete_certname.json create mode 100644 spec/acceptance/peadm_spec/tasks/delete_certname.sh diff --git a/.gitignore b/.gitignore index aa93de59..04e37db2 100644 --- a/.gitignore +++ b/.gitignore @@ -36,3 +36,4 @@ spec/docker/**/*.tar.gz spec/docker/**/*.asc spec/docker/**/files/puppet-enterprise* spec/docker/.task_cache.json +.vscode/settings.json diff --git a/spec/acceptance/peadm_spec/plans/perform_failover.pp b/spec/acceptance/peadm_spec/plans/perform_failover.pp index 12ec767f..49ba3209 100644 --- a/spec/acceptance/peadm_spec/plans/perform_failover.pp +++ b/spec/acceptance/peadm_spec/plans/perform_failover.pp @@ -22,6 +22,11 @@ run_command('systemctl set-default basic.target', $primary_host, _catch_errors => true) run_task('reboot', $primary_host, shutdown_only => true, timeout => 0) + # remove the certname of the failed primary from the primary postgresql database + $primary_postgresql_host = $t.filter |$n| { $n.vars['role'] == 'primary-pdb-postgresql' }[0] + run_task('peadm_spec::delete_certname', $primary_postgresql_host, + certname => peadm::certname($primary_host)) + # promote the replica to new primary $replica_host = $t.filter |$n| { $n.vars['role'] == 'replica' }[0] out::verbose("Promoting replica host ${replica_host} to primary") @@ -61,6 +66,12 @@ fail_plan('"spare-replica" role missing from inventory, cannot continue') } + # run puppet on all infrastructure nodes (except the spare replica) + # to remove the "failed" primary node + run_task('peadm::puppet_runonce', $t - $new_replica_host) + + # TODO: remove the failed primary from the pe.conf file on the primary postgresql node + out::verbose("Adding new replica host ${new_replica_host} to primary") run_plan('peadm::add_replica', primary_host => $replica_host.uri, diff --git a/spec/acceptance/peadm_spec/tasks/delete_certname.json b/spec/acceptance/peadm_spec/tasks/delete_certname.json new file mode 100644 index 00000000..cb6d0172 --- /dev/null +++ b/spec/acceptance/peadm_spec/tasks/delete_certname.json @@ -0,0 +1,9 @@ +{ + "description": "Delete a certname from the Postgres DB", + "parameters": { + "certname": { + "type": "String", + "description": "The certname to delete" + } + } +} diff --git a/spec/acceptance/peadm_spec/tasks/delete_certname.sh b/spec/acceptance/peadm_spec/tasks/delete_certname.sh new file mode 100644 index 00000000..7adfecf4 --- /dev/null +++ b/spec/acceptance/peadm_spec/tasks/delete_certname.sh @@ -0,0 +1,28 @@ +#!/bin/bash +echo "Deleting certname ${PT_certname}" +read -r -d '' QUERY < Date: Fri, 10 Feb 2023 10:06:15 +0100 Subject: [PATCH 39/46] don't attept to run puppet on the failed primary --- spec/acceptance/peadm_spec/plans/perform_failover.pp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/spec/acceptance/peadm_spec/plans/perform_failover.pp b/spec/acceptance/peadm_spec/plans/perform_failover.pp index 49ba3209..0e0afef1 100644 --- a/spec/acceptance/peadm_spec/plans/perform_failover.pp +++ b/spec/acceptance/peadm_spec/plans/perform_failover.pp @@ -66,9 +66,9 @@ fail_plan('"spare-replica" role missing from inventory, cannot continue') } - # run puppet on all infrastructure nodes (except the spare replica) + # run puppet on all infrastructure nodes (except the failed primary and the spare replica) # to remove the "failed" primary node - run_task('peadm::puppet_runonce', $t - $new_replica_host) + run_task('peadm::puppet_runonce', $t - $new_replica_host - $primary_host) # TODO: remove the failed primary from the pe.conf file on the primary postgresql node @@ -76,7 +76,7 @@ run_plan('peadm::add_replica', primary_host => $replica_host.uri, replica_host => $new_replica_host.uri, - replica_postgresql_host => $replica_postgresql_host ? { [] => undef, default => $replica_postgresql_host.uri }, + replica_postgresql_host => $replica_postgresql_host ? {[] => undef, default => $replica_postgresql_host.uri }, ) $res3 = run_command("/opt/puppetlabs/bin/puppet query \'${query}\'", $replica_host) From bdd0c89e70c27f8e86dde2ef8a3be36bb3d89429 Mon Sep 17 00:00:00 2001 From: Dimitri Tischenko Date: Fri, 10 Feb 2023 14:33:14 +0100 Subject: [PATCH 40/46] fix primary/replica swap sed command --- .github/workflows/test-failover.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-failover.yaml b/.github/workflows/test-failover.yaml index e238cee6..d5a526a3 100644 --- a/.github/workflows/test-failover.yaml +++ b/.github/workflows/test-failover.yaml @@ -175,7 +175,7 @@ jobs: - name: 'Swap primary and replica nodes in inventory' run: | - sed -i .sedbak 's/ primary/ ___replica___/;s/ replica/ ___primary___/;s/___//g' \ + sed -i.sedbak 's/primary/__tmp__/g;s/replica/primary/g;s/__tmp__/replica/g' \ spec/fixtures/litmus_inventory.yaml - name: 'Upgrade PE on test cluster' From 2fb1e529d641d2144addb1bedc361e10871b326f Mon Sep 17 00:00:00 2001 From: Dimitri Tischenko Date: Fri, 10 Feb 2023 15:08:35 +0100 Subject: [PATCH 41/46] fix swap --- .github/workflows/test-failover.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test-failover.yaml b/.github/workflows/test-failover.yaml index d5a526a3..29d58354 100644 --- a/.github/workflows/test-failover.yaml +++ b/.github/workflows/test-failover.yaml @@ -23,7 +23,7 @@ on: log_level: description: 'Bolt log level' required: false - default: 'trace' + default: 'verbose' env: HONEYCOMB_WRITEKEY: 7f3c63a70eecc61d635917de46bea4e6 @@ -175,7 +175,7 @@ jobs: - name: 'Swap primary and replica nodes in inventory' run: | - sed -i.sedbak 's/primary/__tmp__/g;s/replica/primary/g;s/__tmp__/replica/g' \ + sed -i .sedbak 's/role: primary$/role: failed/;s/primary/__tmp__/;s/spare-replica/__tmp2__/;s/replica/primary/;s/__tmp__/replica/;s/__tmp2__/replica/' \ spec/fixtures/litmus_inventory.yaml - name: 'Upgrade PE on test cluster' From 7715a98142d7a11773ae28708a3acd606837954a Mon Sep 17 00:00:00 2001 From: Dimitri Tischenko Date: Fri, 10 Feb 2023 15:31:27 +0100 Subject: [PATCH 42/46] fix log level --- .github/workflows/test-failover.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-failover.yaml b/.github/workflows/test-failover.yaml index 29d58354..51484b69 100644 --- a/.github/workflows/test-failover.yaml +++ b/.github/workflows/test-failover.yaml @@ -23,7 +23,7 @@ on: log_level: description: 'Bolt log level' required: false - default: 'verbose' + default: 'debug' env: HONEYCOMB_WRITEKEY: 7f3c63a70eecc61d635917de46bea4e6 From 7d973dafd723f48bf6b2856cf6c319eb2f3550f8 Mon Sep 17 00:00:00 2001 From: Dimitri Tischenko Date: Fri, 10 Feb 2023 17:08:55 +0100 Subject: [PATCH 43/46] another sed command fix --- .github/workflows/test-failover.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-failover.yaml b/.github/workflows/test-failover.yaml index 51484b69..5c0c6bb5 100644 --- a/.github/workflows/test-failover.yaml +++ b/.github/workflows/test-failover.yaml @@ -175,7 +175,7 @@ jobs: - name: 'Swap primary and replica nodes in inventory' run: | - sed -i .sedbak 's/role: primary$/role: failed/;s/primary/__tmp__/;s/spare-replica/__tmp2__/;s/replica/primary/;s/__tmp__/replica/;s/__tmp2__/replica/' \ + sed -i.sedbak 's/role: primary$/role: failed/;s/primary/__tmp__/;s/spare-replica/__tmp2__/;s/replica/primary/;s/__tmp__/replica/;s/__tmp2__/replica/' \ spec/fixtures/litmus_inventory.yaml - name: 'Upgrade PE on test cluster' From 9ab4deb20004a7c54044da3f9eadaa5231300f83 Mon Sep 17 00:00:00 2001 From: Dimitri Tischenko Date: Sun, 12 Feb 2023 11:35:41 +0100 Subject: [PATCH 44/46] add updating inventory file --- .github/workflows/test-failover.yaml | 12 ++++++++++-- .gitignore | 2 +- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test-failover.yaml b/.github/workflows/test-failover.yaml index 5c0c6bb5..6e474419 100644 --- a/.github/workflows/test-failover.yaml +++ b/.github/workflows/test-failover.yaml @@ -173,9 +173,17 @@ jobs: done echo "${HOME}/pause absent, continuing workflow." - - name: 'Swap primary and replica nodes in inventory' + - name: Set up yq + uses: frenck/action-setup-yq@v1 + with: + version: v4.30.5 + + - name: 'Update inventory' run: | - sed -i.sedbak 's/role: primary$/role: failed/;s/primary/__tmp__/;s/spare-replica/__tmp2__/;s/replica/primary/;s/__tmp__/replica/;s/__tmp2__/replica/' \ + # Remove failed primary + yq -i 'del(.groups[].targets[] | select(.vars.role == "primary"))' spec/fixtures/litmus_inventory.yaml + # Swap primary and replica nodes + sed -i.sedbak 's/primary/__tmp__/;s/spare-replica/__tmp2__/;s/replica/primary/;s/__tmp__/replica/;s/__tmp2__/replica/' \ spec/fixtures/litmus_inventory.yaml - name: 'Upgrade PE on test cluster' diff --git a/.gitignore b/.gitignore index 04e37db2..1c920eef 100644 --- a/.gitignore +++ b/.gitignore @@ -36,4 +36,4 @@ spec/docker/**/*.tar.gz spec/docker/**/*.asc spec/docker/**/files/puppet-enterprise* spec/docker/.task_cache.json -.vscode/settings.json +.vscode/settings.json \ No newline at end of file From b793f1f50e49923181f574b83605449c0b95a19c Mon Sep 17 00:00:00 2001 From: Dimitri Tischenko Date: Tue, 14 Feb 2023 14:42:58 +0100 Subject: [PATCH 45/46] display peadm config before and after node manager config --- .github/workflows/test-failover.yaml | 3 +++ plans/upgrade.pp | 6 ++++++ 2 files changed, 9 insertions(+) diff --git a/.github/workflows/test-failover.yaml b/.github/workflows/test-failover.yaml index 6e474419..bc271208 100644 --- a/.github/workflows/test-failover.yaml +++ b/.github/workflows/test-failover.yaml @@ -185,6 +185,9 @@ jobs: # Swap primary and replica nodes sed -i.sedbak 's/primary/__tmp__/;s/spare-replica/__tmp2__/;s/replica/primary/;s/__tmp__/replica/;s/__tmp2__/replica/' \ spec/fixtures/litmus_inventory.yaml + echo ::group::info:inventory + sed -e 's/password: .*/password: "[redacted]"/' < spec/fixtures/litmus_inventory.yaml || true + echo ::endgroup:: - name: 'Upgrade PE on test cluster' if: ${{ success() && github.event.inputs.version_to_upgrade != '' }} diff --git a/plans/upgrade.pp b/plans/upgrade.pp index 7f6de081..1e202869 100644 --- a/plans/upgrade.pp +++ b/plans/upgrade.pp @@ -255,6 +255,9 @@ }, ) + # Show the peadm configuration before node manager setup + run_task('peadm::get_peadm_config', $primary_target) + # Update classification. This needs to be done now because if we don't, and # the PE Compiler node groups are wrong, then the compilers won't be able to # successfully classify and update @@ -275,6 +278,9 @@ require => Class['peadm::setup::node_manager_yaml'], } } + + # Show the peadm configuration after node manager setup + run_task('peadm::get_peadm_config', $primary_target) } peadm::plan_step('upgrade-primary-compilers') || { From 4e3e2c48f3e6a89ab63491fc8f742ad99eee1a87 Mon Sep 17 00:00:00 2001 From: Dimitri Tischenko Date: Wed, 15 Feb 2023 13:33:19 +0100 Subject: [PATCH 46/46] determine avail groups properly --- plans/upgrade.pp | 35 +++++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/plans/upgrade.pp b/plans/upgrade.pp index 1e202869..e6911874 100644 --- a/plans/upgrade.pp +++ b/plans/upgrade.pp @@ -255,12 +255,34 @@ }, ) - # Show the peadm configuration before node manager setup + # Log the peadm configuration before node manager setup run_task('peadm::get_peadm_config', $primary_target) # Update classification. This needs to be done now because if we don't, and # the PE Compiler node groups are wrong, then the compilers won't be able to # successfully classify and update + + # First, determine the correct hosts for the A and B availability groups + $server_a_host = $cert_extensions.dig($primary_target.peadm::certname(), peadm::oid('peadm_availability_group')) ? { + 'A' => $primary_target.peadm::certname(), + default => $replica_target.peadm::certname(), + } + + $server_b_host = $server_a_host ? { + $primary_target.peadm::certname() => $replica_target.peadm::certname(), + default => $primary_target.peadm::certname(), + } + + $postgresql_a_host = $cert_extensions.dig($primary_postgresql_target.peadm::certname(), peadm::oid('peadm_availability_group')) ? { + 'A' => $primary_postgresql_target.peadm::certname(), + default => $replica_postgresql_target.peadm::certname(), + } + + $postgresql_b_host = $postgresql_a_host ? { + $primary_postgresql_target.peadm::certname() => $replica_postgresql_target.peadm::certname(), + default => $primary_postgresql_target.peadm::certname(), + } + apply($primary_target) { class { 'peadm::setup::node_manager_yaml': primary_host => $primary_target.peadm::certname(), @@ -268,19 +290,16 @@ class { 'peadm::setup::node_manager': primary_host => $primary_target.peadm::certname(), - server_a_host => $primary_target.peadm::certname(), - server_b_host => $replica_target.peadm::certname(), - postgresql_a_host => $primary_postgresql_target.peadm::certname(), - postgresql_b_host => $replica_postgresql_target.peadm::certname(), + server_a_host => $server_a_host, + server_b_host => $server_b_host, + postgresql_a_host => $postgresql_a_host, + postgresql_b_host => $postgresql_b_host, compiler_pool_address => $compiler_pool_address, internal_compiler_a_pool_address => $internal_compiler_a_pool_address, internal_compiler_b_pool_address => $internal_compiler_b_pool_address, require => Class['peadm::setup::node_manager_yaml'], } } - - # Show the peadm configuration after node manager setup - run_task('peadm::get_peadm_config', $primary_target) } peadm::plan_step('upgrade-primary-compilers') || {