rspec/ci/asciidoc_validation/custom-asciidoctor

#!/usr/bin/env ruby
# Based on asciidoctor main ruby script.
# This is only meant to introspect and log which asciidoc files were used.

require 'asciidoctor'
require 'asciidoctor/cli'

MAIN_FILE_REGEX = /^.*\/rules\/(?<id>S\d+)\/(?:(?<lang>\w+)\/)?rule.adoc$/

class MainFileLogger < Asciidoctor::Extensions::Preprocessor
  include Asciidoctor::Logging

  def process document, reader
    # Enable sourcemap to track source location.
    # This is useful to report more accurate errors in other loggers.
    document.sourcemap = true

    main_file = document.normalize_system_path(reader.file, document.reader.dir)

    # This assumes unix-style path separator.
    if nil == main_file.match(MAIN_FILE_REGEX)
      abort("Main file does not follow expected pattern: #{main_file}")
    end

    logger.info("ASCIIDOC LOGGER MAIN FILE:#{main_file}")
    reader
  end
end

class IncludeLogger < Asciidoctor::Extensions::IncludeProcessor
  include Asciidoctor::Logging

  def initialize document
    @config = {} # Defined in parent class; will be updated by the extension registry mechanism.
    @document = document

    # @document.reader.file is not defined yet at this stage.
    # Therefore we cannot compute the main file path and cache it.
    # This cannot be done once in handles? because the object is then frozen.
    # For these reasons, we end up recomputing the rule directory path each time.
  end

  def get_main_file reader
    # See how include_stack is used:
    # https://github.com/asciidoctor/asciidoctor/blob/f3800cc9c92faf8370041b2b27a61124318ed289/lib/asciidoctor/reader.rb#L669
    if reader.include_stack.empty?
      reader.file
    else
      main_frame = reader.include_stack.fetch(0)
      main_frame.fetch(1)
    end
  end

  def handles? target
    include_path = @document.normalize_system_path(target, @document.reader.dir)

    main_file = get_main_file(@document.reader)
    main_file = @document.normalize_system_path(main_file, @document.reader.dir)
    rule_dir = File.dirname(File.dirname(main_file))
    rule_id = File.basename(rule_dir)
    if rule_id == 'rules'
      # This is a language-agnostic rule description.
      rule_dir = File.dirname(main_file)
      rule_id = File.basename(rule_dir)
    end
    git_dir = File.dirname(File.dirname(rule_dir))
    shared_dir = File.join(git_dir, 'shared_content')

    rule_dir = rule_dir + '/' # Don't allow S100 to include things from S1000.
    if !include_path.start_with?(rule_dir) && !include_path.start_with?(shared_dir)
      logger.info("ASCIIDOC LOGGER CROSSREFERENCE:#{rule_id} cross-references #{include_path}")
    end

    logger.info("ASCIIDOC LOGGER INCLUDE:#{include_path}")

    false # Actually, this include processor does nothing.
  end

  # Intentionnaly no process function here.
end

class SourceLogger < Asciidoctor::Extensions::TreeProcessor
  include Asciidoctor::Logging

  def get_source_location block
    loc = block.source_location # Asciidoctor::Reader::Cursor.
    return "#{loc.file}:#{loc.lineno}"
  end

  def get_rule document
    main_file = document.normalize_system_path(document.reader.file, document.reader.dir)
    res = main_file.match(MAIN_FILE_REGEX)
    lang = res[:lang] || 'default'
    "#{res[:id]}/#{lang}"
  end

  def process document
    rule = get_rule(document)
    source_blocks = document.find_by(context: :listing, style: 'source')

    # Collect individually valid blocks per diff-id.
    blocks_per_id = Hash.new { |hash, key| hash[key] = Array.new }

    # Find blocks with only diff-id but no diff-type, or vice-versa, or invalid diff-type.
    source_blocks.each do |block|
      id = block.attr('diff-id', nil)
      type = block.attr('diff-type', nil)

      if !id and !type
        next # Nothing to validate
      end

      loc = get_source_location(block)

      if !id
        logger.info("ASCIIDOC LOGGER DIFF:[#{rule}] diff-id is missing in #{loc}")
        next
      end

      if !type
        logger.info("ASCIIDOC LOGGER DIFF:[#{rule}] diff-type is missing in #{loc}")
        next
      elsif !['compliant', 'noncompliant'].include?(type)
        logger.info("ASCIIDOC LOGGER DIFF:[#{rule}] diff-type '#{type}' is not valid in #{loc}")
        next
      end

      # The block is valid on its own.
      blocks_per_id[id].push(block)
    end

    # Each diff-id should have exactly 1 compliant and 1 noncompliant block.
    # Find blocks that break this rule.
    blocks_per_id.each do |id, blocks|
      # Sort to ensure deterministic output.
      blocks.sort_by! { |block| get_source_location(block) }

      case blocks.length
      when 1
        loc = get_source_location(blocks[0])
        logger.info("ASCIIDOC LOGGER DIFF:[#{rule}] Incomplete example for diff-id=#{id}, missing counterpart for #{loc}")
      when 2
        type1 = blocks[0].attr('diff-type')
        type2 = blocks[1].attr('diff-type')
        if type1 == type2
          loc1 = get_source_location(blocks[0])
          loc2 = get_source_location(blocks[1])
          logger.info("ASCIIDOC LOGGER DIFF:[#{rule}] Two #{type1} examples for diff-id=#{id}: #{loc1} and #{loc2}")
        end
      else
        locs = blocks.map { |block| get_source_location(block) }
        locs = locs.join(', ')
        logger.info("ASCIIDOC LOGGER DIFF:[#{rule}] #{blocks.length} examples for diff-id=#{id}: #{locs}")
      end
    end
  end
end

Asciidoctor::Extensions.register do
  preprocessor MainFileLogger
  include_processor IncludeLogger.new @document
  treeprocessor SourceLogger
end

invoker = Asciidoctor::Cli::Invoker.new ARGV
GC.start
invoker.invoke!
exit invoker.code