289 lines
10 KiB
Python
289 lines
10 KiB
Python
""" Ensure the asciidoc code for a rule description follows best practices
|
|
|
|
Checks are:
|
|
* "ifdef"/"endif" blocks should be well-formed for RSPEC
|
|
* Inline code with backquotes is correctly escaped and balanced
|
|
* Include commands are not appended to other code
|
|
"""
|
|
import re
|
|
from pathlib import Path
|
|
|
|
VALID_IFDEF = "ifdef::env-github,rspecator-view[]"
|
|
VALID_ENDIF = "endif::env-github,rspecator-view[]"
|
|
|
|
VARIABLE_DECL = re.compile(r":\w+: ")
|
|
|
|
INCLUDE = re.compile(r"include::")
|
|
|
|
FORMATTING_CHARS = ["_", r"\*", r"\#"]
|
|
WORD_FORMATTING_CHARS = [r"\~", r"\^"]
|
|
|
|
# If the formatting char is repeated twice, it can go anywhere
|
|
UNCONSTRAINED_FORMATTING = "|".join(x + x for x in FORMATTING_CHARS)
|
|
# Single formatting char are dangerous at the beginning of a word
|
|
FORMATTING_OPENING = "|".join(r"(\W|^)" + x + r"\w" for x in FORMATTING_CHARS)
|
|
# Single formatting char are dangerous at the end of a word
|
|
FORMATTING_CLOSING = "|".join(r"\w" + x + r"(\W|$)" for x in FORMATTING_CHARS)
|
|
# Word formatting is broken by spaces so we look for things like `#word#`
|
|
WORD_FORMATTING = "|".join(x + r"\S+" + x for x in WORD_FORMATTING_CHARS)
|
|
|
|
# We combine all the matchers
|
|
NEED_PROTECTION = re.compile(
|
|
"("
|
|
f"{UNCONSTRAINED_FORMATTING}|"
|
|
f"{FORMATTING_OPENING}|"
|
|
f"{FORMATTING_CLOSING}|"
|
|
f"{WORD_FORMATTING}"
|
|
")"
|
|
)
|
|
|
|
# There is a regex trick here:
|
|
# We want to stop the search if there is a backquote
|
|
# We do that by matching backquote OR the closing passthrough
|
|
# Then we'll ignore any match of backquote
|
|
CLOSE_CONSTRAINED_PASSTHROUGH = re.compile(r"`|((?<!\s)\+(?=`))")
|
|
|
|
CLOSE_CONSTRAINED_BACKQUOTE = re.compile(r"`(?!\w)")
|
|
CLOSE_UNCONSTRAINED_BACKQUOTE = re.compile("``")
|
|
|
|
PASSTHROUGH_MACRO_TEXT = r"pass:\w*\[(\\\]|[^\]])*\]"
|
|
|
|
PASSTHROUGH_MACRO = re.compile(PASSTHROUGH_MACRO_TEXT)
|
|
|
|
CPP = re.compile(r"\b[Cc]\+\+")
|
|
|
|
# There is a regex trick here:
|
|
# We want to skip passthrough macros, to not find pass:[``whatever``]
|
|
# We do that by matching
|
|
# * EITHER passthrough macros including their ignored backquotes
|
|
# * OR backquotes
|
|
# Then we'll ignore any match of PASSTHROUGH_MACRO
|
|
BACKQUOTE = re.compile(
|
|
PASSTHROUGH_MACRO_TEXT + r"|(?P<backquote>(``+)|(?<![\\\w])(`)(?!\s))"
|
|
)
|
|
|
|
|
|
def close_passthrough(count, pos, line):
|
|
"""Find the end of a passthrough block marked by *count* plus signs"""
|
|
while count > 0:
|
|
# `+++a++` will display '+a' in case of inbalance, we try to find the biggest closing block
|
|
if count == 1:
|
|
if not line[pos + count].isspace() and line[pos - 1] == "`":
|
|
# constrained '+'. It is a passthrough only if it is directly around text and surrounded by backquotes: `+Some Content+`
|
|
close_pattern = CLOSE_CONSTRAINED_PASSTHROUGH
|
|
else:
|
|
return pos
|
|
else:
|
|
close_pattern = re.compile("(" + r"\+" * count + ")")
|
|
end = close_pattern.search(line, pos + count)
|
|
if end and end.group(1):
|
|
return end.end()
|
|
count -= 1
|
|
return pos
|
|
|
|
|
|
def skip_passthrough_macro(line, pos):
|
|
"""If this is a passthrough macro, skip to the end"""
|
|
if line[pos] == "p":
|
|
pm = PASSTHROUGH_MACRO.match(line, pos)
|
|
if pm:
|
|
return pm.end()
|
|
return pos
|
|
|
|
|
|
def skip_passthrough_plus(line, pos):
|
|
"""If this is a passthrough +, skip to the end"""
|
|
if line[pos] == "+":
|
|
count = 1
|
|
while pos + count < len(line) and line[pos + count] == "+":
|
|
count += 1
|
|
return close_passthrough(count, pos, line)
|
|
return pos
|
|
|
|
|
|
def close_inline_block(line: str, pos: int, closing_pattern: re.Pattern[str]):
|
|
"""Find the end of an inline block started with *pattern*"""
|
|
content = ""
|
|
while pos < len(line):
|
|
pos = skip_passthrough_macro(line, pos)
|
|
pos = skip_passthrough_plus(line, pos)
|
|
if closing_pattern.match(line, pos):
|
|
return pos, content
|
|
content += line[pos]
|
|
pos += 1
|
|
return -1, content
|
|
|
|
|
|
class Sanitizer:
|
|
def __init__(self, file: Path):
|
|
assert file.exists()
|
|
assert file.is_file()
|
|
|
|
self._file = file
|
|
self._is_env_open = False
|
|
self._has_env = False
|
|
self._error_count = 0
|
|
self._is_inside_code = False
|
|
self._empty_line = True
|
|
self._previous_line_was_include = False
|
|
|
|
def process(self) -> bool:
|
|
content = self._file.read_text(encoding="utf-8")
|
|
lines = content.splitlines(keepends=False)
|
|
for line_index, line in enumerate(lines):
|
|
if self._is_inside_code:
|
|
if line == "----":
|
|
self._is_inside_code = False
|
|
continue
|
|
if line == "----":
|
|
self._is_inside_code = True
|
|
continue
|
|
line_number = line_index + 1
|
|
if line.startswith("ifdef::"):
|
|
self._process_open_ifdef(line_number, line)
|
|
elif line.startswith("endif::"):
|
|
self._process_close_ifdef(line_number, line)
|
|
elif not line.strip():
|
|
self._empty_line = True
|
|
else:
|
|
self._process_description(line_number, line)
|
|
self._empty_line = False
|
|
|
|
if self._is_env_open:
|
|
self._on_error(len(lines), "An ifdef command is opened but never closed.")
|
|
|
|
return self._error_count
|
|
|
|
def _process_open_ifdef(self, line_number: int, line: str):
|
|
if self._has_env:
|
|
message = "Only one ifdef command is allowed per file."
|
|
if self._is_env_open:
|
|
message += "\nThe previous ifdef command was not closed."
|
|
self._on_error(line_number, message)
|
|
|
|
self._has_env = True
|
|
self._is_env_open = True
|
|
|
|
# IDEs should be configured to properly display the description,
|
|
# not the other way around.
|
|
# "env-vscode" was used in the passed. Instead, user should be able to
|
|
# toggle the rspecator view based on their needs. Help these users migrate.
|
|
if "vscode" in line:
|
|
self._on_error(
|
|
line_number,
|
|
"Configure VS Code to display rspecator-view by setting the asciidoctor attribute.",
|
|
)
|
|
elif line != VALID_IFDEF:
|
|
self._on_error(
|
|
line_number,
|
|
f'Incorrect asciidoc environment. "{VALID_IFDEF}" should be used instead.',
|
|
)
|
|
|
|
def _process_close_ifdef(self, line_number: int, line: str):
|
|
if not self._is_env_open:
|
|
self._on_error(line_number, "Unexpected endif command.")
|
|
|
|
self._is_env_open = False
|
|
|
|
if line != VALID_ENDIF:
|
|
self._on_error(
|
|
line_number,
|
|
f'Incorrect endif command. "{VALID_ENDIF}" should be used instead.',
|
|
)
|
|
|
|
def _advance_to_next_backquote(self, line: str, pos: int, line_number: int):
|
|
next_pos = BACKQUOTE.search(line, pos)
|
|
if next_pos:
|
|
cpp = CPP.search(line, pos, endpos=next_pos.pos)
|
|
else:
|
|
cpp = CPP.search(line, pos)
|
|
if cpp:
|
|
self._on_error(
|
|
line_number, 'To avoid rendering issues, always use the "{cpp}" attribute to refer to the language C++'
|
|
)
|
|
return next_pos
|
|
|
|
def _process_description(self, line_number: int, line: str):
|
|
if VARIABLE_DECL.match(line):
|
|
return
|
|
if self._previous_line_was_include and not self._empty_line:
|
|
self._on_error(
|
|
line_number - 1,
|
|
"""An empty line is missing after the include.
|
|
This may result in broken tags and other display issues.
|
|
Make sure there are always empty lines before and after each include""",
|
|
)
|
|
if INCLUDE.match(line):
|
|
self._previous_line_was_include = True
|
|
if not self._empty_line:
|
|
self._on_error(
|
|
line_number,
|
|
"""An empty line is missing before the include.
|
|
This may result in broken tags and other display issues.
|
|
Make sure there are always empty lines before and after each include""",
|
|
)
|
|
return
|
|
else:
|
|
self._previous_line_was_include = False
|
|
pos = 0
|
|
res = self._advance_to_next_backquote(line, pos, line_number)
|
|
# We filter out matches for passthrough. See comment near the BACKQUOTE declaration
|
|
while res and res.group("backquote"):
|
|
pos = self._check_inlined_code(
|
|
line_number, res.end(), line, res.group("backquote")
|
|
)
|
|
res = self._advance_to_next_backquote(line, pos, line_number)
|
|
|
|
def _check_inlined_code(
|
|
self, line_number: int, pos: int, line: str, opening_pattern: str
|
|
):
|
|
if len(opening_pattern) > 2:
|
|
# Part of the backquotes are displayed as backquotes.
|
|
self._on_error(
|
|
line_number,
|
|
'Use "++" to isolate the backquotes you want to display from the ones that should be interpreted by AsciiDoc.',
|
|
)
|
|
return pos
|
|
elif len(opening_pattern) == 2:
|
|
closing_pattern = CLOSE_UNCONSTRAINED_BACKQUOTE
|
|
else:
|
|
closing_pattern = CLOSE_CONSTRAINED_BACKQUOTE
|
|
|
|
content_end, content = close_inline_block(line, pos, closing_pattern)
|
|
if content_end < 0:
|
|
message = "Unbalanced code inlining tags."
|
|
if len(opening_pattern) == 1:
|
|
message += """
|
|
If you are trying to write inline code that is glued to text without a space,
|
|
you need to use double backquotes:
|
|
> Replace all `reference`s.
|
|
Will not display correctly. You need to write:
|
|
> Replace all ``reference``s.
|
|
"""
|
|
self._on_error(line_number, message)
|
|
return len(line)
|
|
pos = content_end + len(opening_pattern)
|
|
if NEED_PROTECTION.search(content):
|
|
self._on_error(
|
|
line_number,
|
|
f"""
|
|
Using backquotes does not protect against asciidoc interpretation. Starting or
|
|
ending a word with '*', '#', '_' or having two of them consecutively will
|
|
trigger unintended behavior with the rest of the text.
|
|
Use ``++{content}++`` to avoid that.
|
|
If you really want to have formatting inside your code, you can write
|
|
``pass:n[{content}]``
|
|
""",
|
|
)
|
|
return pos
|
|
return pos
|
|
|
|
def _on_error(self, line_number: int, message: str):
|
|
print(f"{self._file}:{line_number} {message}")
|
|
self._error_count += 1
|
|
|
|
|
|
def sanitize_asciidoc(file_path: Path):
|
|
"""Called by the CLI"""
|
|
return Sanitizer(file_path).process()
|