#!/opt/puppetlabs/server/apps/bolt-server/bin/ruby
#
# This will create puppet-strings docs for every module in JSON format.
#
# It currently creates a directory, ".perforce-generated-docs", in each
# environment desired and within that directory is:
#  - a "caches" directory containing the yardoc DB for each module
#  - "out.log" and "err.log" files with information about the generation run
#  - an "input.json" with the generation run's input
#  - a "list.json" file containing a short description of each entity documented
#    in the environment
#  - a <module name>.list.json file containing a short description of each entity
#    documented in the named module
#  - a <module name>.docs.json file containing the complete documentation for each
#    entity in the named module
#
# It expects its input on stdin and in the shape of
# (all config values are optional, defaults shown):
# {
#   "config": {
#     "environments": ["production"],
#     "repo-id": "puppet-code",
#     "concurrency": 3,
#     "timeout": 300
#   },
#   "repos-config": {
#     ...
#     "puppet-code": {
#       "status": "new-commits",
#       ...
#       "submodules": {
#         "production": {
#           "status": "new-commits",
#           ...
#         }
#         ...additional environments
#       }
#     }
#   }
# }
#
# It will match environments (submodules in file-sync) that have the "new-commits"
# status with those allowed via the environments config parameter. A missing or
# null value for the environments parameter implies the default of ["production"],
# an array with a sentinal value of "all-puppet-environments" implies all environments
# are allowed to have documentation generated for them.
#
# Note: this usage of the sentinal value is a work around for the pe_hocon Puppet
# type which will not reduce an existing array to an empty array - it will treat
# a value of an empty array as merely ensuring that the value exists as an array
# and not manage its content. For the setting to actually be an empty array it
# must have not existed in the file prior to that Puppet run)
#
# It will write a file "in-progress" as a signal to indicate it is still
# generating documentation to consumers. It will also attempt to reuse
# strings (yard) caches but only if versioned dirs is enabled
#
require 'stringio'
require 'yard'
require 'json'
require 'date'
require 'fileutils'

# This is adapted from the PE Plan Runner
class ProcessManager
  attr_accessor :process_table, :config

  def initialize(config)
    @config = config
    @process_table = {}
  end

  #
  # These top methods can be considered this class' public api.
  #

  # clear any finished processes from the process table,
  # block if we'd go over our process budget until we have space,
  # fork, and then update the process table
  def run(data, &block)
    clear_table_of_completed_processes

    while @process_table.length >= @config[:max_concurrency]
      puts("Reached maximum allowed concurrent docs processes: " \
                    "#{@process_table.length}/#{@config[:max_concurrency]}, " \
                    "waiting until one finishes.")

      _pid, process_status = wait_for_any_process
      clear_table_of_completed_process(process_status)
    end

    pid = managed_fork(block, data)

    if @process_table[pid]
      puts("Existing entry in process table with pid: #{pid}, overwriting!")
    end

    @process_table[pid] = { }
    pid
  end

  StubProcess = Struct.new(:pid, :exitstatus)

  def clear_process_table
    puts("Clearing all processes")
    @process_table.each_pair do |pid, info|
      _pid, status = fetch_given_process_result(pid)
      if status
        puts("Process #{pid} already completed")
      else
        terminate_pid(pid, info)
        _pid, status = fetch_given_process_result(pid)
      end
    rescue Errno::ECHILD => _e
      puts("Process #{pid} in process table could not be found by the kernel")
    ensure
      if status.nil?
        # Create a stubbed process so that we can still clear the process from the table
        # and more importantly read from the output reader.
        status = StubProcess.new(pid, 'unknown')
      end
      clear_table_of_completed_process(status)
    end
  end

  def clear_table_of_completed_process(process)
    info = @process_table[process.pid]

    puts("Process #{process.pid} generating docs, exited with #{process.exitstatus}.")

    @process_table.delete(process.pid)
  end

  def clear_table_of_completed_processes
    while (info = fetch_any_process_result)
      (pid, process_status) = info
      clear_table_of_completed_process(process_status)
    end
  rescue Errno::ECHILD => _e
  end

  def wait_for_children(timeout)
    iterations = timeout / 2
    begin
      iterations.times do
        info = fetch_any_process_result
        if info
          (pid, process_status) = info
          clear_table_of_completed_process(process_status)
        end
        sleep 2
      end
      clear_process_table
    rescue Errno::ECHILD => _e
    end
  end

  def terminate_pid(pid, info)
    puts("Sending process #{pid} generating docs the TERM signal")
    signal("TERM", pid)
    sleep 0.001

    _pid, status = fetch_given_process_result(pid)
    if status.nil?
      signal("KILL", pid)
      puts("Process #{pid} generating docs did not quit gracefully, forcefully stopped")
    end
  end

  def managed_fork(block, data)
    puts("Forking new process to run docs generation")

    internal_fork do
      set_signal_handlers

      # Actually run the thing now
      block.call(config, data)

      exit
    rescue StandardError => e
      puts e.full_message
      exit(1)
    end
  end

  def fetch_given_process_result(pid)
    Process.wait2(pid, Process::WNOHANG)
  end

  def fetch_any_process_result
    Process.wait2(-1, Process::WNOHANG)
  end

  def wait_for_any_process
    Process.wait2(-1)
  end

  def signal(sig, pid)
    Process.kill(sig, pid)
  end

  def internal_fork(&block)
    fork(&block)
  end

  def set_signal_handlers
    Signal.trap("TERM") do
      exit
    end
  end
end

module PuppetDocsGenerator
  MODULES_TO_SKIP = [
    'pe_staging', 'orchestrator_internal', 'enterprise_tasks', 'pe_support_script',
    'pe_bootstrap', 'pe_status_check', 'pe_concat', 'pe_databases', 'pe_hocon',
    'pe_infrastructure', 'pe_inifile', 'pe_install', 'pe_nginx', 'pe_patch',
    'pe_postgresql', 'pe_puppet_authorization', 'pe_r10k', 'pe_repo', 'edgeops',
    'playbook_runner'
  ].freeze

  ALL_ENVIRONMENTS_VALUE = 'all-puppet-environments'

  VERSIONED_DIRS_BASE_PATH = "/opt/puppetlabs/server/data/puppetserver/filesync/client/versioned-dirs/"
 
  DEFAULT_CODEDIR = "/etc/puppetlabs/puppetserver/code/environments/"
  DOCS_DIR = ".perforce-generated-docs"
  CACHES_DIR = "caches"

  now = Time.now
  AN_HOUR_AGO = (now - 3600).to_datetime
  NOW = now.to_datetime

  module OutputRedirection
    def self.capture_streams
      original_stdout = STDOUT.dup
      original_stderr = STDERR.dup
      stdout_log = StringIO.new
      stderr_log = StringIO.new

      $stdout = stdout_log
      $stderr = stderr_log

      [original_stdout, original_stderr, stdout_log, stderr_log]
    end

    def self.redirect_to_files(original_stdout, original_stderr, initial_stdout, initial_stderr, out_file, err_file)
      err_file.puts(initial_stderr)
      out_file.puts(initial_stdout)

      $stdout = original_stdout
      $stderr = original_stderr
      $stdout.reopen(out_file)
      $stderr.reopen(err_file)
      $stdout.sync = true
      $stderr.sync = true
    end

    def self.restore_streams(original_stdout, original_stderr, out_file = nil, err_file = nil)
      if $stdout.is_a?(StringIO)
        $stdout = original_stdout
      else
        $stdout.reopen(original_stdout)
      end

      if $stderr.is_a?(StringIO)
        $stderr = original_stderr
      else
        $stderr.reopen(original_stderr)
      end

      out_file.close if out_file
      err_file.close if err_file
    end
  end

  module InputParser
    def self.parse_stdin
      JSON.parse($stdin.read)
    end

    def self.extract_config(input)
      input["config"]
    end

    def self.extract_deployed_environments(input, config)
      environment_statuses = input.dig('repos-status', config["repo-id"], 'submodules')
      return [] unless environment_statuses

      environment_statuses.select { |_, info| info['status'] == 'new-commits' }.keys
    end

    def self.determine_allowed_environments(config)
      allowed = config["environments"]

      if allowed.nil?
        puts "Allowed environments unconfigured, assuming default of 'production'."
        ["production"]
      elsif (allowed.length == 1) && (allowed.first == ALL_ENVIRONMENTS_VALUE)
        puts "Allowed environments set to ['#{ALL_ENVIRONMENTS_VALUE}'], allowing all evironments."
        []
      else
        allowed
      end
    end

    def self.compute_environments_to_generate(allowed_environments, deployed_environments)
      if allowed_environments.empty?
        puts "Allowed environments explicitly set to '[]'; " +
          "generating docs for all deployed environments: #{deployed_environments}."
        deployed_environments
      else
        intersection = allowed_environments.intersection(deployed_environments)
        puts "Allowed environments set to '#{allowed_environments}'; " +
          "script called for deployed environments '#{deployed_environments}'; " +
          "generating docs for their intersection: #{intersection}."
        intersection
      end
    end
  end

  module PuppetInitializer
    def self.initialize_puppet
      require 'puppet'
      require 'puppet/application_support'
      require 'puppet-strings'

      run_mode = Puppet::Util::RunMode[:server]
      app_defaults = {
        :name => run_mode,
        :run_mode => :server,
        :confdir => "/etc/puppetlabs/puppet",
        :codedir => "/etc/puppetlabs/code",
        :vardir => "/opt/puppetlabs/puppet/cache",
        :publicdir => "/opt/puppetlabs/puppet/public",
        :rundir => "/var/run/puppetlabs",
        :logdir => "/var/log/puppetlabs/puppet"
      }

      Puppet.settings.preferred_run_mode = :server
      Puppet.settings.initialize_app_defaults(app_defaults)
      Puppet.initialize_settings
    end
  end

  module DirectoryManager
    def self.resolve_environment_path(environment, desired_environment)
      environment.resolved_path || (DEFAULT_CODEDIR + desired_environment)
    end

    def self.setup_docs_directory(env_path)
      docs_dir = File.join(env_path, DOCS_DIR)
      FileUtils.mkdir_p(docs_dir)

      caches_dir = File.join(docs_dir, CACHES_DIR)
      FileUtils.mkdir_p(caches_dir)

      [docs_dir, caches_dir]
    end

    def self.get_superseded_datetime(versioned_dir)
      metadata_file = File.join(versioned_dir, ".metadata.json")
      return if !File.exist?(metadata_file)

      metadata_file_content = File.read(metadata_file)
      metadata = JSON.parse(metadata_file_content)
      return if !metadata.is_a?(Hash)

      datetime = metadata['superseded-at']
      return if datetime.nil?

      DateTime.parse(datetime)
    end

    def self.find_latest_versioned_dir(versioned_dirs, desired_environment)
      versioned_dir_dirs = Dir[File.join(versioned_dirs, desired_environment + "**")]

      versioned_dirs_by_superseded_at_time = versioned_dir_dirs.map do |dir|
        [get_superseded_datetime(dir), dir]
      end.to_h

      latest_time = versioned_dirs_by_superseded_at_time.keys.compact.sort.last

      [latest_time, versioned_dirs_by_superseded_at_time[latest_time]]
    end

    def self.recently_superseded?(datetime)
      # If a versioned dir hasn't been superseded recently then versioned dirs
      # is likely disabled and we've found an orphaned environment
      datetime.between?(AN_HOUR_AGO, NOW)
    end

    def self.copy_cache_if_exists(latest_versioned_dir, caches_dir, desired_environment)
      previous_caches_dir = File.join(latest_versioned_dir, DOCS_DIR, CACHES_DIR)

      if File.exist?(previous_caches_dir)
        copy_start = Time.now
        puts "Copying previous deployment's cache"

        `/bin/cp -Rp #{previous_caches_dir}/* #{caches_dir}`

        copy_end = Time.now
        puts "Finished copying caches in #{copy_end - copy_start} seconds"
      else
        puts "Not using cache for environment #{desired_environment}, no cache dir found"
      end
    end

    def self.setup_versioned_caches(repo_id, desired_environment, caches_dir)
      versioned_dirs = VERSIONED_DIRS_BASE_PATH + repo_id

      if !File.exist?(versioned_dirs)
        puts "Not using cache for environment #{desired_environment}, versioned dirs not found"
        return
      end

      (latest_time, latest_versioned_dir) = find_latest_versioned_dir(versioned_dirs, desired_environment)

      if latest_time.nil?
        puts "Not using cache for environment #{desired_environment}, no previous versioned dir found"
        return
      end

      if !recently_superseded?(latest_time)
        puts "Not using cache for environment #{desired_environment}, no recent versioned dir found"
        return
      end

      copy_cache_if_exists(latest_versioned_dir, caches_dir, desired_environment)
    end

    def self.create_processing_lock(docs_dir)
      processing_file = File.join(docs_dir, "in-progress")
      File.write(processing_file, Time.now.iso8601, flags: File::CREAT | File::EXCL)
      processing_file
    end

    def self.remove_processing_lock(processing_file)
      File.unlink(processing_file) if processing_file
    end

    def self.open_log_files(docs_dir)
      err_file = File.open(File.join(docs_dir, 'err.log'), 'w+')
      out_file = File.open(File.join(docs_dir, 'out.log'), 'w+')
      [out_file, err_file]
    end
  end

  module DocumentationExtractor
    REGISTRY_TYPE_MAP = {
      puppet_classes: :puppet_class,
      data_types: :puppet_data_type,
      data_type_aliases: :puppet_data_type_alias,
      defined_types: :puppet_defined_type,
      resource_types: :puppet_type,
      providers: :puppet_provider,
      puppet_functions: :puppet_function,
      puppet_tasks: :puppet_task,
      puppet_plans: :puppet_plan
    }.freeze

    def self.extract_all_entities(registry)
      REGISTRY_TYPE_MAP.transform_values do |registry_type|
        extract_entities_by_type(registry, registry_type)
      end
    end

    def self.extract_entities_by_type(registry, registry_type)
      registry.all(registry_type)
        .sort_by!(&:name)
        .map! { |entity| sanitize_entity(entity.to_hash) }
    end

    def self.sanitize_entity(entity_hash)
      entity_hash.delete(:source)
      entity_hash
    end

    def self.build_entity_list(docs, module_name)
      docs.flat_map do |entity_type, entities|
        entities.map do |entity|
          {
            "name" => entity[:name],
            "entity_type" => entity_type,
            "description" => entity.dig(:docstring, :text),
            "module" => module_name
          }
        end
      end
    end
  end

  module ModuleDocGenerator
    def self.generate_search_patterns(module_path)
      PuppetStrings::DEFAULT_SEARCH_PATTERNS.map do |pattern|
        File.join(module_path, pattern)
      end
    end

    def self.yard_arguments(db_path)
      [
        '--db', db_path,
        '--no-stats',
        '--quiet',
        '--no-progress',
        '--no-output',
        '--no-document',
        '--use-cache',
        '--protected',
        '--private'
      ]
    end

    def self.generate_for_module(mod, caches_dir, docs_dir)
      module_name = mod.name
      mod_start_time = Time.now
      puts "Starting documentation generation for module #{module_name} at #{mod_start_time}"

      db_path = File.join(caches_dir, "#{module_name}_cache")
      patterns = generate_search_patterns(mod.path)
      registry = YARD::Registry

      puts "Generating documentation for Puppet module '#{module_name}'."
      PuppetStrings.generate(patterns, {:yard_args => yard_arguments(db_path)})

      docs = DocumentationExtractor.extract_all_entities(registry)
      module_entities = DocumentationExtractor.build_entity_list(docs, module_name)

      write_module_docs(docs_dir, module_name, docs, module_entities)
      registry.clear

      mod_end_time = Time.now
      puts "Finished documentation generation for module #{module_name} at #{mod_end_time} took #{mod_end_time - mod_start_time} seconds"

      module_entities
    end

    def self.write_module_docs(docs_dir, module_name, docs, module_entities)
      File.write(File.join(docs_dir, "#{module_name}.docs.json"), JSON.pretty_generate(docs))
      File.write(File.join(docs_dir, "#{module_name}.list.json"), JSON.pretty_generate(module_entities))
    end
  end

  module EnvironmentProcessor
    def self.process_modules(environment, caches_dir, docs_dir)
      env_start_time = Time.now
      puts "Starting documentation generation for environment #{environment.name} at #{env_start_time}"

      environment_entities = environment.modules.each_with_object([]) do |mod, entities|
        next if MODULES_TO_SKIP.include?(mod.name)

        module_entities = ModuleDocGenerator.generate_for_module(mod, caches_dir, docs_dir)
        entities.concat(module_entities)
      end

      env_end_time = Time.now
      puts "Finished documentation generation for environment #{environment.name} at #{env_end_time} took #{env_end_time - env_start_time} seconds"

      environment_entities
    end
  end

  module Runner
    def self.generate_environment_docs(data)
      processing_file = nil
      (original_stdout, original_stderr, stdout_sio, stderr_sio) = OutputRedirection.capture_streams

      begin
        environment_entities = []
        environment = Puppet.lookup(:environments).get(data[:environment])
        environment_path = DirectoryManager.resolve_environment_path(environment, data[:environment])

        docs_dir, caches_dir = DirectoryManager.setup_docs_directory(environment_path)
        File.write(File.join(docs_dir, "input.json"), data[:input])

        out_file, err_file = DirectoryManager.open_log_files(docs_dir)
        OutputRedirection.redirect_to_files(original_stdout,
                                            original_stderr,
                                            data[:stdout] + stdout_sio.string,
                                            data[:stderr] + stderr_sio.string,
                                            out_file,
                                            err_file)

        processing_file = DirectoryManager.create_processing_lock(docs_dir)
        DirectoryManager.setup_versioned_caches(data[:repo_id], data[:environment], caches_dir)

        environment_entities = EnvironmentProcessor.process_modules(environment, caches_dir, docs_dir)
        File.write(File.join(docs_dir, "list.json"), JSON.pretty_generate(environment_entities))

      rescue Exception => e
        $stderr.puts "Failed because of #{e.full_message(highlight: false)}"
        exit 1
      ensure
        DirectoryManager.remove_processing_lock(processing_file)
        OutputRedirection.restore_streams(original_stdout, original_stderr, out_file, err_file)
      end
    end

    def self.run
      (original_stdout, original_stderr, stdout_sio, stderr_sio) = OutputRedirection.capture_streams
      start_time = Time.now
      puts "Starting documentation generation at: #{start_time}"

      input = InputParser.parse_stdin
      config = InputParser.extract_config(input)
      deployed_environments = InputParser.extract_deployed_environments(input, config)
      allowed_environments = InputParser.determine_allowed_environments(config)
      environments_to_generate = InputParser.compute_environments_to_generate(allowed_environments, deployed_environments)

      PuppetInitializer.initialize_puppet

      process_manager = ProcessManager.new({ max_concurrency: config['concurrency'] || 3 })

      data = {
        input: JSON.pretty_generate(input),
        stdout: stdout_sio.string,
        stderr: stderr_sio.string,
        repo_id: config['repo-id'] || 'puppet-code'
      }.freeze

      OutputRedirection.restore_streams(original_stdout, original_stderr)

      environments_to_generate.each do |desired_environment|
        process_manager.run(data.merge({environment: desired_environment})) do |_, data|
          generate_environment_docs(data)
        end
      end

      process_manager.wait_for_children(config['timeout'] || 300)

      end_time = Time.now
      puts "Finished documentation generation at #{end_time} took #{end_time - start_time} seconds"
    end
  end
end

PuppetDocsGenerator::Runner.run
