# This is run with Bolt over SSH.  Run from a primary that is a promoted replica,
# and turns the old primary into a replica. The dns_alt_names parameter
# must be supplied if the old primary has alt names set in puppet.conf.
#
# Steps this plan takes:
# 1.  If dns_alt_names is defined, verify that puppetserver conf files in /etc/puppetlabs/puppetserver/conf.d have certificate-authority.allow-subject-alt-names set to true.
#     This should be managed to true by default.
# 2.  Disable the agent on the current and old primaries with puppet agent --disable.
# 3.  Stop all PE services on the old primary, and remove the puppet infra recover_configuration cron job.
# 4.  On the old primary, rm -rf /etc/puppetlabs/enterprise/*
# 5.  On the current primary, ensure that the previous primary's certs are removed with puppetserver ca clean --certname <old primary>
# 6.  On the old primary, make sure pe-postgresql is running, then run the
#     following with: su -s /bin/bash - pe-postgres -c “/opt/puppetlabs/server/bin/psql”
#         a. SELECT pg_drop_replication_slot(slot_name) FROM pg_replication_slots;
#         b. UPDATE pg_database SET datallowcon = ‘false’ WHERE datname IN
#            (‘pe-activity’, ‘pe-classifier’, ‘pe-orchestrator’, ‘pe-rbac’);
#         c. SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE datname
#            IN (‘pe-activity’, ‘pe-classifier’, ‘pe-orchestrator’, ‘pe-rbac’);
#         d. DROP DATABASE IF EXISTS “pe-activity”;
#         e. DROP DATABASE IF EXISTS “pe-classifier”;
#         f. DROP DATABASE IF EXISTS “pe-orchestrator”;
#         g. DROP DATABASE IF EXISTS “pe-rbac”;
# 7.  On the old primary, stop pe-postgresql.
# 8.  On the old primary, ensure /etc/puppetlabs/puppet/ssl and /etc/puppetlabs/puppetserver/ca are removed.
# 9.  If dns_alt_names is defined, run puppet config set --section main dns_alt_names “<dns_alt_names value>” on the old primary.
# 10. On the old primary, run puppet ssl submit_request --server_list <current_primary>:8140. This will generate a CSR.
# 11. On the current primary, sign the cert, equivalent to 'puppetserver ca sign --certname <certname>'. Before we do this, we check the CSR with puppetserver ca list --certname <node>.
#     If dns_alt_names was not specified, we verify that no subject alt names appear in the CSR.
# 12. Copy /etc/puppetlabs/enterprise/hiera.yaml from the current primary to the old primary.
# 13. Re-enable the agent via puppet agent --enable.
# 14. Get a valid token via 'puppet access'. This plan uses a special temporary user for this purpose.
# 15. Run puppet infra provision replica <old_primary> --no-streaming.
# 16. Wait for replication and PuppetDB sync are complete.
# 17. Run puppet infra enable replica <old_primary>.
#
# If uninstall_workflow = true, then these are the steps:
# 1.  If dns_alt_names is defined, verify that puppetserver conf files in /etc/puppetlabs/puppetserver/conf.d have certificate-authority.allow-subject-alt-names set to true.
#     This should be managed to true by default.
# 2.  Disable the agent on the current primary.
# 3.  Run 'puppet-enterprise-uninstaller' on the old primary.
# 4.  Install the agent fresh on the old primary.
# 5.  Run 'puppet infra provision replica --enable' on the current primary.
plan enterprise_tasks::enable_ha_failover(
  TargetSpec $host,
  Enum['mono', 'mono-with-compile'] $topology,
  Optional[Boolean] $uninstall_workflow          = false,
  Optional[TargetSpec] $primary                  = 'localhost',
  Optional[Integer] $replication_timeout_seconds = 3600,
  Optional[Boolean] $skip_agent_config           = undef,
  Optional[String] $agent_server_urls            = undef,
  Optional[String] $pcp_brokers                  = undef,
  Optional[String] $dns_alt_names                = undef,
  Optional[String] $rbac_account                 = undef,
  Optional[Sensitive[String]] $rbac_password     = undef,
  Optional[Boolean] $force                       = false,
) {
  $plan = 'enable_ha_failover'
  $prev_primary = get_target($host)
  $curr_primary = get_target($primary)
  $connect_results = enterprise_tasks::test_connection([$prev_primary, $curr_primary])
  $failed_root_check = $connect_results.filter_set |$result| { $result.value['root'] != 'true' }
  unless $failed_root_check.empty {
    $failed_nodes = $failed_root_check.map |$result| { $result.target }
    fail_plan("Tasks are not running as root on the following nodes. Use the '--run-as root' flag.\n${failed_nodes}")
  }

  # Fail when either agent_server_urls or pcp_brokers is not supplied, when they need to be
  if ($topology == 'mono-with-compile' and !$skip_agent_config and (!$agent_server_urls or !$pcp_brokers)) {
    fail_plan(@(EOT/L))
      agent_server_urls and pcp_brokers are required parameters for the \
      'mono-with-compile' topology, unless skip_agent_config is set to true
      |-EOT
  }

  $certname_cmd = "${constants()['puppet_bin']} config print certname --section agent"
  $curr_primary_certname = run_command($certname_cmd, $curr_primary).first['stdout'].strip
  $prev_primary_certname = run_command($certname_cmd, $prev_primary).first['stdout'].strip
  $token_lifetime = $replication_timeout_seconds + 900

  enterprise_tasks::verify_node($curr_primary_certname, 'primary', $force)

  if $dns_alt_names {
    run_plan(enterprise_tasks::is_subject_alt_names_allowed, primary => $primary, force => $force)
  }
  $allow_subject_alt_names = $dns_alt_names ? {
    undef   => false,
    default => true
  }

  if $uninstall_workflow {
    # Keep log files so we maintain a history of what's happened on this node.
    $timestamp = strftime(Timestamp.new, '%Y-%m-%d_%H-%M-%S')
    $var_log_backup = "/var/log/puppetlabs_${timestamp}"
    enterprise_tasks::message($plan, "Moving log files to ${var_log_backup}")
    run_command("mv /var/log/puppetlabs ${var_log_backup} && chown -R root:root ${var_log_backup}", $prev_primary)

    # We don't run inside a with_agent_disabled block here because we can't
    # when we run the provision command, and the uninstall/reinstall agent
    # process shouldn't really need the agent to be disabled on the primary.
    enterprise_tasks::message($plan, 'Running uninstaller on old primary')
    run_command('/opt/puppetlabs/bin/puppet-enterprise-uninstaller -d -p -y', $prev_primary)

    enterprise_tasks::message($plan, 'Reinstalling agent on old primary')
    # We already checked if we're allowed to set alt names, so don't check again
    run_plan('enterprise_tasks::configure_agent',
      agent                         => $prev_primary_certname,
      dns_alt_names                 => $dns_alt_names,
      check_allow_subject_alt_names => false,
      force                         => $force,
    )

    enterprise_tasks::message($plan, 'Provisioning and enabling old primary as a new replica')
    enterprise_tasks::with_puppet_access($curr_primary,
        'account'  => $rbac_account,
        'password' => $rbac_password,
        'lifetime' => "${token_lifetime}s",
      ) |$token_file, $_token| {
      $provision_result = catch_errors() || {
        run_task('enterprise_tasks::provision_replica', $curr_primary,
          host                => $prev_primary_certname,
          replication_timeout => $replication_timeout_seconds,
          streaming           => true,
          enable              => true,
          topology            => $topology,
          skip_agent_config   => $skip_agent_config,
          agent_server_urls   => $agent_server_urls,
          pcp_brokers         => $pcp_brokers,
          token_file          => $token_file,
        )
      }
      if $provision_result =~ Error {
        enterprise_tasks::message("${provision_result.details['object']}", "${provision_result.details['result_set'].first.error.details()}")
        fail_plan('ERROR: Failed to provision and enable replica')
      }
    }
  } else {
    enterprise_tasks::with_agent_disabled([$prev_primary, $curr_primary]) || {
      # Don't stop puppet since we are disabling the agent, and this way we
      # don't have to mess with the state of the service and restore it to
      # what it was doing before the plan started.
      enterprise_tasks::message($plan, 'Stopping PE services on old primary')
      run_task('enterprise_tasks::pe_services', $prev_primary,
        role  => 'primary',
        state => 'stopped',
        include_puppet => false,
      )

      # Remove the recover_configuration cron job, since this should only
      # ever run on the primary, not the replica, and the puppet_enterprise
      # code will not remove the existing cron job.
      enterprise_tasks::message($plan, 'Removing recover_configuration cron job on old primary')
      apply($prev_primary) {
        cron { 'puppet infra recover_configuration':
          ensure => absent,
        }
      }

      enterprise_tasks::message($plan,'Removing configuration data on old primary')
      run_command('rm -rf /etc/puppetlabs/enterprise/*', $prev_primary)

      enterprise_tasks::message($plan, 'Dropping pglogical databases on old primary')
      run_task('enterprise_tasks::drop_pglogical_databases', $prev_primary)

      enterprise_tasks::message($plan, 'Removing SSL and CA directories on old primary')
      run_command('rm -rf /etc/puppetlabs/puppet/ssl', $prev_primary)
      run_command('rm -rf /etc/puppetlabs/puppetserver/ca', $prev_primary)

      enterprise_tasks::message($plan, 'Ensuring current primary does not contain SSL artifacts from the old primary')
      run_task('enterprise_tasks::puppetserver_ca_clean_api', $curr_primary,
        certname => $prev_primary_certname,
      )

      if $dns_alt_names {
        enterprise_tasks::message($plan, 'Setting dns_alt_names in puppet.conf')
        run_command("${constants()['puppet_bin']} config set --section main dns_alt_names \"${dns_alt_names}\"", $prev_primary)
      }

      enterprise_tasks::message($plan, 'Generating CSR')
      run_command("${constants()['puppet_bin']} ssl submit_request --server_list ${curr_primary_certname}:8140", $prev_primary)

      enterprise_tasks::message($plan, 'Signing cert')
      run_task('enterprise_tasks::puppetserver_ca_sign_api', $curr_primary,
        certname                => $prev_primary_certname,
        allow_subject_alt_names => $allow_subject_alt_names,
        dns_alt_names           => $dns_alt_names,
      )

      enterprise_tasks::message($plan, 'Run puppet to download cert and fix up puppet.conf')
      run_task('enterprise_tasks::run_puppet', $prev_primary,
        alternate_host => $curr_primary_certname,
      )

      enterprise_tasks::message($plan, 'Copying enterprise hiera.yaml from current primary to old primary')
      upload_file('/etc/puppetlabs/enterprise/hiera.yaml', '/etc/puppetlabs/enterprise/hiera.yaml', $prev_primary)
    }

    # Provision replica and running puppet through orchestrator requires the agent
    # to be enabled, so this is not included in the enterprise_tasks::with_agent_disabled block.
    # Should be safe to run this outside the block at this point.
    #
    # We run this with streaming = false because we do not want to use the pg_basebackup
    # workflow, as we are preserving the PuppetDB database to make this process faster.
    # This also means we can not use the --enable flag, so we have to do the enable
    # as a separate step.
    enterprise_tasks::message($plan, 'Provisioning old primary as a new replica')
    enterprise_tasks::with_puppet_access($curr_primary,
        'account'  => $rbac_account,
        'password' => $rbac_password,
        'lifetime' => "${token_lifetime}s",
      ) |$token_file, $_token| {
      $provision_result = catch_errors() || {
        run_task('enterprise_tasks::provision_replica', $curr_primary,
          host                => $prev_primary_certname,
          replication_timeout => $replication_timeout_seconds,
          streaming           => false,
          enable              => false,
          token_file          => $token_file,
        )
      }
      if $provision_result =~ Error {
        enterprise_tasks::message("${provision_result.details['object']}", "${provision_result.details['result_set'].first.error.details()}")
        fail_plan('ERROR: Failed to provision replica')
      }

      enterprise_tasks::message($plan,'Enabling new replica')
      $enable_result = catch_errors() || {
        run_task('enterprise_tasks::enable_replica', $curr_primary,
          host              => $prev_primary_certname,
          topology          => $topology,
          skip_agent_config => $skip_agent_config,
          agent_server_urls => $agent_server_urls,
          pcp_brokers       => $pcp_brokers,
          token_file        => $token_file,
        )
      }
      if $enable_result =~ Error {
        enterprise_tasks::message("${enable_result.details['object']}", "${enable_result.details['result_set'].first.error.details()}")
        fail_plan('ERROR: Failed to enable replica')
      }
    }
  }

  enterprise_tasks::message($plan, "Provisioning and enabling of ${host} as a replica is complete.")

  # If skipping agent config, there is no need to run Puppet everywhere
  unless ($skip_agent_config) {
    enterprise_tasks::message($plan, @("EOT"/L))
    Agent configuration is managed with Puppet, and will be updated
    the next time each agent runs to enable replica failover.

    If you wish to immediately run Puppet on all your agents, you can do so
    with this command:

      puppet job run --no-enforce-environment --query 'nodes {deactivated is null and expired is null}'
    |-EOT
  }
}
