255 lines
9.7 KiB
Python
255 lines
9.7 KiB
Python
![]() |
""" Ensure the asciidoc code for a rule description follows best practices
|
||
|
|
||
|
Checks are:
|
||
|
* "ifdef"/"endif" blocks should be well-formed for RSPEC
|
||
|
* Inline code with backquotes is correctly escaped and balanced
|
||
|
* Include commands are not appended to other code
|
||
|
"""
|
||
|
from pathlib import Path
|
||
|
import re
|
||
|
|
||
|
|
||
|
VALID_IFDEF = "ifdef::env-github,rspecator-view[]"
|
||
|
VALID_ENDIF = "endif::env-github,rspecator-view[]"
|
||
|
|
||
|
VARIABLE_DECL = re.compile(r':\w+: ')
|
||
|
|
||
|
INCLUDE = re.compile(r'include::')
|
||
|
|
||
|
FORMATTING_CHARS = ['_', r'\*', r'\#']
|
||
|
WORD_FORMATTING_CHARS = [r'\~', r'\^']
|
||
|
|
||
|
# If the formatting char is repeated twice, it can go anywhere
|
||
|
UNCONSTRAINED_FORMATTING = '|'.join(x + x for x in FORMATTING_CHARS)
|
||
|
# Single formatting char are dangerous at the beginning of a word
|
||
|
FORMATTING_OPENING = '|'.join(r'(\W|^)' + x + r'\w' for x in FORMATTING_CHARS)
|
||
|
# Single formatting char are dangerous at the end of a word
|
||
|
FORMATTING_CLOSING = '|'.join(r'\w' + x + r'(\W|$)' for x in FORMATTING_CHARS)
|
||
|
# Word formatting is broken by spaces so we look for things like `#word#`
|
||
|
WORD_FORMATTING = "|".join(x + r'\S+' + x for x in WORD_FORMATTING_CHARS)
|
||
|
|
||
|
# We combine all the matchers
|
||
|
NEED_PROTECTION = re.compile('('
|
||
|
f'{UNCONSTRAINED_FORMATTING}|'
|
||
|
f'{FORMATTING_OPENING}|'
|
||
|
f'{FORMATTING_CLOSING}|'
|
||
|
f'{WORD_FORMATTING}'
|
||
|
')')
|
||
|
|
||
|
# There is a regex trick here:
|
||
|
# We want to stop the search if there is a backquote
|
||
|
# We do that by matching backquote OR the closing passthrough
|
||
|
# Then we'll ignore any match of backquote
|
||
|
CLOSE_CONSTRAINED_PASSTHROUGH = re.compile(r'`|((?<!\s)\+(?=`))')
|
||
|
|
||
|
CLOSE_CONSTRAINED_BACKQUOTE = re.compile(r'`(?!\w)')
|
||
|
CLOSE_UNCONSTRAINED_BACKQUOTE = re.compile('``')
|
||
|
|
||
|
PASSTHROUGH_MACRO_TEXT = r'pass:\w*\[(\\\]|[^\]])*\]'
|
||
|
|
||
|
PASSTHROUGH_MACRO = re.compile(PASSTHROUGH_MACRO_TEXT)
|
||
|
|
||
|
# There is a regex trick here:
|
||
|
# We want to skip passthrough macros, to not find pass:[``whatever``]
|
||
|
# We do that by matching
|
||
|
# * EITHER passthrough macros including their ignored backquotes
|
||
|
# * OR backquotes
|
||
|
# Then we'll ignore any match of PASSTHROUGH_MACRO
|
||
|
BACKQUOTE = re.compile(PASSTHROUGH_MACRO_TEXT + r'|(?P<backquote>(``+)|(?<![\\\w])(`)(?!\s))')
|
||
|
|
||
|
def close_passthrough(count, pos, line):
|
||
|
"""Find the end of a passthrough block marked by *count* plus signs"""
|
||
|
while count > 0:
|
||
|
# `+++a++` will display '+a' in case of inbalance, we try to find the biggest closing block
|
||
|
if count == 1:
|
||
|
if not line[pos + count].isspace() and line[pos - 1] == '`':
|
||
|
#constrained '+'. It is a passthrough only if it is directly around text and surrounded by backquotes: `+Some Content+`
|
||
|
close_pattern = CLOSE_CONSTRAINED_PASSTHROUGH
|
||
|
else:
|
||
|
return pos
|
||
|
else:
|
||
|
close_pattern = re.compile('(' + r'\+' * count + ')')
|
||
|
end = close_pattern.search(line, pos + count)
|
||
|
if end and end.group(1):
|
||
|
return end.end()
|
||
|
count -= 1
|
||
|
return pos
|
||
|
|
||
|
|
||
|
def skip_passthrough_macro(line, pos):
|
||
|
'''If this is a passthrough macro, skip to the end'''
|
||
|
if line[pos] == 'p':
|
||
|
pm = PASSTHROUGH_MACRO.match(line, pos)
|
||
|
if pm:
|
||
|
return pm.end()
|
||
|
return pos
|
||
|
|
||
|
|
||
|
def skip_passthrough_plus(line, pos):
|
||
|
'''If this is a passthrough +, skip to the end'''
|
||
|
if line[pos] == '+':
|
||
|
count = 1
|
||
|
while pos + count < len(line) and line[pos + count] == '+':
|
||
|
count += 1
|
||
|
return close_passthrough(count, pos, line)
|
||
|
return pos
|
||
|
|
||
|
|
||
|
def close_inline_block(line: str, pos: int, closing_pattern: re.Pattern[str]):
|
||
|
"""Find the end of an inline block started with *pattern*"""
|
||
|
content = ""
|
||
|
while pos < len(line):
|
||
|
pos = skip_passthrough_macro(line, pos)
|
||
|
pos = skip_passthrough_plus(line, pos)
|
||
|
if closing_pattern.match(line, pos):
|
||
|
return pos, content
|
||
|
content += line[pos]
|
||
|
pos += 1
|
||
|
return -1, content
|
||
|
|
||
|
|
||
|
class Sanitizer:
|
||
|
def __init__(self, file: Path):
|
||
|
assert file.exists()
|
||
|
assert file.is_file()
|
||
|
|
||
|
self._file = file
|
||
|
self._is_env_open = False
|
||
|
self._has_env = False
|
||
|
self._error_count = 0
|
||
|
self._is_inside_code = False
|
||
|
self._empty_line = True
|
||
|
self._previous_line_was_include = False
|
||
|
|
||
|
def process(self) -> bool:
|
||
|
content = self._file.read_text(encoding="utf-8")
|
||
|
lines = content.splitlines(keepends=False)
|
||
|
for line_index, line in enumerate(lines):
|
||
|
if self._is_inside_code:
|
||
|
if line == '----':
|
||
|
self._is_inside_code = False
|
||
|
continue
|
||
|
if line == '----':
|
||
|
self._is_inside_code = True
|
||
|
continue
|
||
|
line_number = line_index + 1
|
||
|
if line.startswith("ifdef::"):
|
||
|
self._process_open_ifdef(line_number, line)
|
||
|
elif line.startswith("endif::"):
|
||
|
self._process_close_ifdef(line_number, line)
|
||
|
elif not line.strip():
|
||
|
self._empty_line = True
|
||
|
else:
|
||
|
self._process_description(line_number, line)
|
||
|
self._empty_line = False
|
||
|
|
||
|
if self._is_env_open:
|
||
|
self._on_error(len(lines), "An ifdef command is opened but never closed.")
|
||
|
|
||
|
return self._error_count
|
||
|
|
||
|
def _process_open_ifdef(self, line_number: int, line: str):
|
||
|
if self._has_env:
|
||
|
message = "Only one ifdef command is allowed per file."
|
||
|
if self._is_env_open:
|
||
|
message += "\nThe previous ifdef command was not closed."
|
||
|
self._on_error(line_number, message)
|
||
|
|
||
|
self._has_env = True
|
||
|
self._is_env_open = True
|
||
|
|
||
|
# IDEs should be configured to properly display the description,
|
||
|
# not the other way around.
|
||
|
# "env-vscode" was used in the passed. Instead, user should be able to
|
||
|
# toggle the rspecator view based on their needs. Help these users migrate.
|
||
|
if "vscode" in line:
|
||
|
self._on_error(
|
||
|
line_number,
|
||
|
"Configure VS Code to display rspecator-view by setting the asciidoctor attribute.",
|
||
|
)
|
||
|
elif line != VALID_IFDEF:
|
||
|
self._on_error(
|
||
|
line_number,
|
||
|
f'Incorrect asciidoc environment. "{VALID_IFDEF}" should be used instead.',
|
||
|
)
|
||
|
|
||
|
def _process_close_ifdef(self, line_number: int, line: str):
|
||
|
if not self._is_env_open:
|
||
|
self._on_error(line_number, "Unexpected endif command.")
|
||
|
|
||
|
self._is_env_open = False
|
||
|
|
||
|
if line != VALID_ENDIF:
|
||
|
self._on_error(
|
||
|
line_number,
|
||
|
f'Incorrect endif command. "{VALID_ENDIF}" should be used instead.',
|
||
|
)
|
||
|
|
||
|
def _process_description(self, line_number: int, line: str):
|
||
|
if VARIABLE_DECL.match(line):
|
||
|
return
|
||
|
if self._previous_line_was_include and not self._empty_line:
|
||
|
self._on_error(line_number - 1, '''An empty line is missing after the include.
|
||
|
This may result in broken tags and other display issues.
|
||
|
Make sure there are always empty lines before and after each include''')
|
||
|
if INCLUDE.match(line):
|
||
|
self._previous_line_was_include = True
|
||
|
if not self._empty_line:
|
||
|
self._on_error(line_number, '''An empty line is missing before the include.
|
||
|
This may result in broken tags and other display issues.
|
||
|
Make sure there are always empty lines before and after each include''')
|
||
|
return
|
||
|
else:
|
||
|
self._previous_line_was_include = False
|
||
|
pos = 0
|
||
|
res = BACKQUOTE.search(line, pos)
|
||
|
# We filter out matches for passthrough. See comment near the BACKQUOTE declaration
|
||
|
while res and res.group('backquote'):
|
||
|
pos = self._check_inlined_code(line_number, res.end(), line, res.group('backquote'))
|
||
|
res = BACKQUOTE.search(line, pos)
|
||
|
|
||
|
def _check_inlined_code(self, line_number: int, pos: int, line: str, opening_pattern: str):
|
||
|
if len(opening_pattern) > 2:
|
||
|
# Part of the backquotes are displayed as backquotes.
|
||
|
self._on_error(line_number, 'Use "++" to isolate the backquotes you want to display from the ones that should be interpreted by AsciiDoc.')
|
||
|
return pos
|
||
|
elif len(opening_pattern) == 2:
|
||
|
closing_pattern = CLOSE_UNCONSTRAINED_BACKQUOTE
|
||
|
else:
|
||
|
closing_pattern = CLOSE_CONSTRAINED_BACKQUOTE
|
||
|
|
||
|
content_end, content = close_inline_block(line, pos, closing_pattern)
|
||
|
if content_end < 0:
|
||
|
message='Unbalanced code inlining tags.'
|
||
|
if len(opening_pattern) == 1:
|
||
|
message += '''
|
||
|
If you are trying to write inline code that is glued to text without a space,
|
||
|
you need to use double backquotes:
|
||
|
> Replace all `reference`s.
|
||
|
Will not display correctly. You need to write:
|
||
|
> Replace all ``reference``s.
|
||
|
'''
|
||
|
self._on_error(line_number, message)
|
||
|
return len(line)
|
||
|
pos = content_end + len(opening_pattern)
|
||
|
if NEED_PROTECTION.search(content):
|
||
|
self._on_error (line_number, f'''
|
||
|
Using backquotes does not protect against asciidoc interpretation. Starting or
|
||
|
ending a word with '*', '#', '_' or having two of them consecutively will
|
||
|
trigger unintended behavior with the rest of the text.
|
||
|
Use ``++{content}++`` to avoid that.
|
||
|
If you really want to have formatting inside your code, you can write
|
||
|
``pass:n[{content}]``
|
||
|
''')
|
||
|
return pos
|
||
|
return pos
|
||
|
|
||
|
def _on_error(self, line_number: int, message: str):
|
||
|
print(f"{self._file}:{line_number} {message}")
|
||
|
self._error_count += 1
|
||
|
|
||
|
|
||
|
def sanitize_asciidoc(file_path: Path):
|
||
|
"""Called by the CLI"""
|
||
|
return Sanitizer(file_path).process()
|