require 'puppet_x/puppetlabs/meep/configure/psql'
require 'puppet_x/util/utilities'

module PuppetX
  module Util

    # Provides additional functions for establishing a PE Postgresql server's
    # running and pglogical replication status based on service.conf entries.
    module PostgresqlStatus
      extend PuppetX::Util::Utilities

      # Override for testing.
      def self.pg_isready_bin
        "/opt/puppetlabs/server/bin/pg_isready"
      end

      # @return [Boolean] true if PostgreSQL service is ready for connections.
      # @raise Puppet::ExecutionFailure if command fails.
      def self.pg_isready(service_url, timeout_seconds)
        uri = URI.parse(service_url)
        command = [pg_isready_bin, "--host=#{uri.host}", "--port=#{uri.port}", "--timeout=#{timeout_seconds}"]
        results = Puppet::Util::Execution.execute(command, { failonfail: true, combine: true })

        results.exitstatus == 0
      end

      # @param service_url [String] url to Postgresql to query from.
      # @param query [String] query to execute on the postgresql node.
      # @param database [String] database to connec to.
      # @param user [String] postgresql user to connect as.
      # @param certs_dir [String] absolute path to the cert and key for ssl.
      # @return [Array] of row hashes from the query results.
      # @raise Puppet::ExecutionFailure if the psql query command fails.
      def self.query_postgresql(service_url:, query:, database:, user:, certs_dir:)
        ssl_dir = Puppet[:ssldir]
        root_cert  = "#{ssl_dir}/certs/ca.pem"

        # Puppet's ssldir has write perm on keys, which psql will not allow, so use
        # the versioned postgresql certs dir which psql can access.
        # (The certs are the same as Puppet's.)
        local_cert = "#{certs_dir}/_local.cert.pem"
        local_key  = "#{certs_dir}/_local.private_key.pem"
        [root_cert, local_cert, local_key].each do |cert_file|
          Puppet.debug("PuppetX::Util::PostgresqlStatus.query_postgresql(): Unable to locate #{cert_file}.") if !File.exist?(cert_file)
        end

        uri = URI.parse(service_url)

        psql = PuppetX::Puppetlabs::Meep::Configure::PSQL.new(
          sslmode: 'require',
          sslrootcert: root_cert,
          sslcert: local_cert,
          sslkey: local_key,
          host: uri.host,
          database: database,
          user: user,
        )

        results = psql.query(query)
        symbolize_keys(results)
      end

      # Queries the url present in +service_config+, connecting with
      # the user/database specified by the +database_key+.
      # @return [Hash] of results or alerts, (caught errors are returned as
      # alert hashes):
      #
      # {
      #   results: [ query results ],
      #   alerts: [ {severity:, message: } ],
      # }
      def self.query_status(query, database_key, service_config:, running_on_master:)
        results = {
          results: [],
          alerts: [],
        }
        service_url = service_config[:url]
        database_configs = service_config[:database_configs] || {}

        db_config = database_configs[database_key] || {}
        database = db_config[:database] || "pe-#{database_key}"
        user = db_config[:user] || database
        # The primary must connect as a database user.
        # But the replica must connect as pe-ha-replication.
        user = running_on_master ? user : 'pe-ha-replication'
        certs_dir = service_config[:certs_dir]

        options = {
          service_url: service_url,
          query: query,
          database: database,
          user: user,
          certs_dir: certs_dir,
        }

        begin
          results[:results] = query_postgresql(**options)
        rescue Puppet::ExecutionFailure => e
          Puppet.debug("Error checking replication for #{options}: #{e}\n#{e.backtrace.join("\n")}")
          results[:alerts] << { :severity => :warning, :message => "Failed while checking replication status for #{database}; run with --debug for details." }
        end

        results
      end

      # Returns a hash of replication information queried from the given PostgreSQL
      # service_url, and an array of alerts for any encountered errors.
      #
      #   {
      #     provider_slots: [ the pg_replication_slots rows as Hashes ],
      #     provider_stats:  [ the pg_stat_replication rows as Hashes ],
      #     alerts: [ query errors ],
      #   }
      #
      # Depending on the type of node (primary or replica), it is expected that
      # some of the queries will be empty. In a normal HA system with one way
      # replication from the primary to the replica(s), only the primary will have
      # provider_slots and provider_stats.
      #
      # However if a system is configured for two-way replication (experimental) or
      # otherwise misconfigured, there may be additional values returned.
      #
      # @param service_config [Hash] details of the postgresql service to query
      # obtained from services.conf.
      # @param running_on_master [Boolean] true if we are executing on the primary.
      # Affects what user to connect as.
      # @return [Hash] of query data and alerts.
      # @raise Puppet::ExecutionFailure if a query fails.
      def self.get_replication_status(service_config, running_on_master:)
        query_options = {
          service_config: service_config,
          running_on_master: running_on_master,
        }

        results = { alerts: [] }

        # The pg_replication_slots view provides a listing of all replication slots
        # that currently exist on the database cluster, along with their current state.
        slot_query = [
          'SELECT *',
          'FROM pg_replication_slots as slots',
          'LEFT OUTER JOIN pg_stat_replication as stats',
          'ON slots.active_pid = stats.pid',
        ].join(' ')
        # These are system tables that are the same for the whole cluster,
        # so we only need to query once, but must connect to a database
        # that we have credentials to query.
        slot_results = query_status(slot_query, :classifier, **query_options)
        results[:provider_slots] = slot_results[:results]
        results[:alerts] += slot_results[:alerts]

        results
      end

      # @param service_url [String] url to the PostgreSQL instance to test.
      # @param service_status [Hash] hash of current status information to be updated
      # based on test results.
      # @param timeout_seconds [Integer] maximum seconds for connection attempt.
      # @return [Hash] updated service_status hash. The +state+ and possibly +alerts+
      # will be updated depending on success.
      def self.determine_postgresql_state(service_url, service_status, timeout_seconds)
        if pg_isready(service_url, timeout_seconds)
          service_status[:state] = :running
          service_status
        end
      rescue Puppet::ExecutionFailure => e
        Puppet.debug("Error executing pg_isready for #{service_url}: #{e}\n#{e.backtrace.join("\n")}")
        service_status[:state] = :unreachable
        service_status[:alerts] ||= []
        service_status[:alerts] << { :severity => :error, :message => "Failed attempting to connect: #{e} (Run with --debug for more information)" }
        service_status
      end

      def self.determine_postgresql_node_type(service_url, master, replicas)
        uri = URI.parse(service_url)
        host = uri.host
        if (master == host)
          :master
        elsif replicas.include?(host)
          :replica
        else
          :standalone
        end
      end

      # Given the hash output of {PostgresqlStatus.get_replication_status()},
      # transform from two arrays of slot and stat info to a hash
      # keyed by slot_name with a single matching slot and stat row, and a
      # translated subscriber certname.
      #
      #  {
      #    slot_1: {
      #      provider_slot: {},
      #      provider_stat: {},
      #      subscriber: '',
      #    },
      #    ...
      #  }
      #
      # @param psql_results [Hash] of query results.
      # @param certname_map [Hash] translation of pglogical subscription/node
      # hash strings -> certname.
      # @return [Hash] transformed as above or empty if inputs empty.
      def self.transform_master_replication_state(psql_results, certname_map)
        provider_slots = psql_results[:provider_slots] || []
        provider_slots.each_with_object({}) do |slot, hash|
          status_details = {}
          status_details[:provider_slot] = slot.dup
          status_details[:subscriber] = certname_map[slot[:application_name]]

          hash[slot[:slot_name].to_sym] = status_details
        end
      end

      # @param replication_status_provider_slots [Hash] the output of
      # {PostgresqlStatus.transform_master_replication_state()}
      # @return [Array] of alert hashes, each with a severity and message,
      # or an empty array if input is empty.
      def self.alerts_for_master_replication_state(replication_status_provider_slots)
        replication_status_provider_slots.map do |slot_name, details|
          slot = details[:provider_slot]
          subscriber = details[:subscriber] || ''

          activity = (slot[:active] == 't') ? 'active' : 'inactive'
          severity = (slot[:active] == 't' && slot[:state] == 'streaming') ? :info : :warning
          and_state = slot[:state].nil? ? ' (streaming state unknown)' : " and #{slot[:state]}"
          to_client = (subscriber.empty?) ? ' (to ?)' : " to #{subscriber}"
          message = (activity == 'inactive') ?
            "Replication of #{slot[:database]} is #{activity}" :
            "Replication of #{slot[:database]} is #{activity}#{and_state}#{to_client}"
          { severity: severity, message: message }
        end
      end

      # @return [String] the hash used to encode pglogical node and subscription
      # strings for a particular certname.
      def self.pglogical_hash(certname)
        Digest::SHA1.hexdigest(certname).slice(0, 14)
      end

      # Allows lookup of a certname based on either the pglogical node
      # or subscription hash that we originally created when subscribing.
      #
      # ({PostgresqlStatus.pglogical_hash()} is used to generate the same
      # hashes used by the puppet_enterprise module.)
      #
      # @param certnames [Array<String,Array<String>>] array of certnames and/or
      # arrays of certnames to be flattened and used to generate the map.
      # @return [Hash] mapping of the n123abc... and s123abc... hashes to
      # actual certnames.
      def self.create_pglogical_certname_map(*certnames)
        certnames.flatten.each_with_object({}) do |certname, map|
          hash = pglogical_hash(certname.to_s)
          map["s#{hash}"] = certname.to_s
          map["n#{hash}"] = certname.to_s
        end
      end

      # @param service_url [String] the url of the postgresql service to check.
      # @param service_status [Hash] the status Hash we are building for the service.
      # @param service_config [Hash] the services.conf hashl of details about
      # the service.
      # @param master [String] the primary certname.
      # @param replicas [Array<String>] array of replica certnames.
      # @return [Hash] updated +service_status+ hash with transformed postgres
      # query results about replication state and any alerts derived from that
      # data.
      def self.determine_pglogical_replication_status(service_url:, service_status:, service_config:, master:, replicas:)
        running_on_master = (master == Puppet[:certname])
        certname_map = create_pglogical_certname_map(master, replicas)

        replication_status = {}
        service_status[:status] = {
          :replication => replication_status
        }

        psql_results = get_replication_status(service_config, running_on_master: running_on_master)

        replication_status[:provider_slots] = transform_master_replication_state(psql_results, certname_map)

        service_status[:alerts] += alerts_for_master_replication_state(replication_status[:provider_slots])
        service_status[:alerts] += psql_results[:alerts]

        service_status
      end

      # The PE PostgreSQL service does not have a TK /status endpoint (or any
      # http endpoint). Looking up service status requires falling back to
      # Postgresql command line utilities.
      #
      # The details of what we report on differ depending on whether pglogical
      # replication is active.
      #
      # @param service_url [String] the url of the postgresql service to check.
      # @param configured_services [Array<Hash>] an array of postgresql service
      # hashes extracted from services.conf, for a single node. Typically will
      # have only one entry, but services_for() expects to work with and pass
      # on an Array.
      # @param timeout_seconds [Integer] timeout for connection attempts.
      # @param nodes_config [Array<Hash>] Array of PE infrastructure node
      # hashes from services.conf.
      # @param master [String] certname of the primary node.
      # @param replicas [Array<String>] array of replica certnames.
      def self.try_get_postgresql_status(service_url, configured_services, timeout_seconds, nodes_config:, master:, replicas:)
        target = determine_postgresql_node_type(service_url, master, replicas)

        # For each configured PostgreSQL service determine its status and flesh out
        # status details. TK apps would have done this internally as part of their
        # /status response...
        #
        # (Currently there is only one postgres service per node, but we are given an array.)
        configured_services.map do |svc|

          service_status = unreachable_status_for_service(svc) # until we know otherwise

          determine_postgresql_state(service_url, service_status, timeout_seconds)

          # PostgreSQL replication status requires PostgreSQL to be running.
          next service_status if service_status[:state] != :running
          # Determining pglogical replication status is only relevant for primary/replica nodes.
          next service_status if ![:master, :replica].include?(target)

          determine_pglogical_replication_status(
            service_url: service_url,
            service_status: service_status,
            service_config: svc,
            master: master,
            replicas: replicas
          )
          service_status[:replication_mode] = target
          service_status
        end
      end
    end
  end
end
