""" Ensure the asciidoc code for a rule description follows best practices Checks are: * "ifdef"/"endif" blocks should be well-formed for RSPEC * Inline code with backquotes is correctly escaped and balanced * Include commands are not appended to other code * "C++" is referred to using the {cpp} attribute * rules.sonarsource.com is not linked directly """ import re from pathlib import Path VALID_IFDEF = "ifdef::env-github,rspecator-view[]" VALID_ENDIF = "endif::env-github,rspecator-view[]" VARIABLE_DECL = re.compile(r":\w+: ") INCLUDE = re.compile(r"include::") FORMATTING_CHARS = ["_", r"\*", r"\#"] WORD_FORMATTING_CHARS = [r"\~", r"\^"] # If the formatting char is repeated twice, it can go anywhere UNCONSTRAINED_FORMATTING = "|".join(x + x for x in FORMATTING_CHARS) # Single formatting char are dangerous at the beginning of a word FORMATTING_OPENING = "|".join(r"(\W|^)" + x + r"\w" for x in FORMATTING_CHARS) # Single formatting char are dangerous at the end of a word FORMATTING_CLOSING = "|".join(r"\w" + x + r"(\W|$)" for x in FORMATTING_CHARS) # Word formatting is broken by spaces so we look for things like `#word#` WORD_FORMATTING = "|".join(x + r"\S+" + x for x in WORD_FORMATTING_CHARS) # We combine all the matchers NEED_PROTECTION = re.compile( "(" f"{UNCONSTRAINED_FORMATTING}|" f"{FORMATTING_OPENING}|" f"{FORMATTING_CLOSING}|" f"{WORD_FORMATTING}" ")" ) # There is a regex trick here: # We want to stop the search if there is a backquote # We do that by matching backquote OR the closing passthrough # Then we'll ignore any match of backquote CLOSE_CONSTRAINED_PASSTHROUGH = re.compile(r"`|((?(``+)|(? 0: # `+++a++` will display '+a' in case of inbalance, we try to find the biggest closing block if count == 1: if not line[pos + count].isspace() and line[pos - 1] == "`": # constrained '+'. It is a passthrough only if it is directly around text and surrounded by backquotes: `+Some Content+` close_pattern = CLOSE_CONSTRAINED_PASSTHROUGH else: return pos else: close_pattern = re.compile("(" + r"\+" * count + ")") end = close_pattern.search(line, pos + count) if end and end.group(1): return end.end() count -= 1 return pos def skip_passthrough_macro(line, pos): """If this is a passthrough macro, skip to the end""" if line[pos] == "p": pm = PASSTHROUGH_MACRO.match(line, pos) if pm: return pm.end() return pos def skip_passthrough_plus(line, pos): """If this is a passthrough +, skip to the end""" if line[pos] == "+": count = 1 while pos + count < len(line) and line[pos + count] == "+": count += 1 return close_passthrough(count, pos, line) return pos def close_inline_block(line: str, pos: int, closing_pattern: re.Pattern[str]): """Find the end of an inline block started with *pattern*""" content = "" while pos < len(line): pos = skip_passthrough_macro(line, pos) pos = skip_passthrough_plus(line, pos) if closing_pattern.match(line, pos): return pos, content content += line[pos] pos += 1 return -1, content class Sanitizer: def __init__(self, file: Path): assert file.exists() assert file.is_file() self._file = file self._is_env_open = False self._has_env = False self._error_count = 0 self._is_inside_code = False self._empty_line = True self._previous_line_was_include = False def process(self) -> bool: content = self._file.read_text(encoding="utf-8") lines = content.splitlines(keepends=False) for line_index, line in enumerate(lines): if self._is_inside_code: if line == "----": self._is_inside_code = False continue if line == "----": self._is_inside_code = True continue line_number = line_index + 1 if line.startswith("ifdef::"): self._process_open_ifdef(line_number, line) elif line.startswith("endif::"): self._process_close_ifdef(line_number, line) elif not line.strip(): self._empty_line = True else: self._process_description(line_number, line) self._empty_line = False if self._is_env_open: self._on_error(len(lines), "An ifdef command is opened but never closed.") return self._error_count def _process_open_ifdef(self, line_number: int, line: str): if self._has_env: message = "Only one ifdef command is allowed per file." if self._is_env_open: message += "\nThe previous ifdef command was not closed." self._on_error(line_number, message) self._has_env = True self._is_env_open = True # IDEs should be configured to properly display the description, # not the other way around. # "env-vscode" was used in the passed. Instead, user should be able to # toggle the rspecator view based on their needs. Help these users migrate. if "vscode" in line: self._on_error( line_number, "Configure VS Code to display rspecator-view by setting the asciidoctor attribute.", ) elif line != VALID_IFDEF: self._on_error( line_number, f'Incorrect asciidoc environment. "{VALID_IFDEF}" should be used instead.', ) def _process_close_ifdef(self, line_number: int, line: str): if not self._is_env_open: self._on_error(line_number, "Unexpected endif command.") self._is_env_open = False if line != VALID_ENDIF: self._on_error( line_number, f'Incorrect endif command. "{VALID_ENDIF}" should be used instead.', ) def _advance_to_next_backquote(self, line: str, pos: int, line_number: int): next_pos = BACKQUOTE.search(line, pos) if next_pos: cpp = CPP.search(line, pos, endpos=next_pos.pos) else: cpp = CPP.search(line, pos) if cpp: self._on_error( line_number, 'To avoid rendering issues, always use the "{cpp}" attribute to refer to the language C++.', ) return next_pos def _process_description(self, line_number: int, line: str): if VARIABLE_DECL.match(line): return if self._previous_line_was_include and not self._empty_line: self._on_error( line_number - 1, """An empty line is missing after the include. This may result in broken tags and other display issues. Make sure there are always empty lines before and after each include.""", ) if INCLUDE.match(line): self._previous_line_was_include = True if not self._empty_line: self._on_error( line_number, """An empty line is missing before the include. This may result in broken tags and other display issues. Make sure there are always empty lines before and after each include.""", ) return else: self._previous_line_was_include = False if RULES_SONARSOURCE.search(line) and not self._is_env_open: self._on_error( line_number, """Do not put direct links to https://rules.sonarsource.com/. Just use the rule ID and let cross-reference substitution do its job.""", ) pos = 0 res = self._advance_to_next_backquote(line, pos, line_number) # We filter out matches for passthrough. See comment near the BACKQUOTE declaration while res and res.group("backquote"): pos = self._check_inlined_code( line_number, res.end(), line, res.group("backquote") ) res = self._advance_to_next_backquote(line, pos, line_number) def _check_inlined_code( self, line_number: int, pos: int, line: str, opening_pattern: str ): if len(opening_pattern) > 2: # Part of the backquotes are displayed as backquotes. self._on_error( line_number, 'Use "++" to isolate the backquotes you want to display from the ones that should be interpreted by AsciiDoc.', ) return pos elif len(opening_pattern) == 2: closing_pattern = CLOSE_UNCONSTRAINED_BACKQUOTE else: closing_pattern = CLOSE_CONSTRAINED_BACKQUOTE content_end, content = close_inline_block(line, pos, closing_pattern) if content_end < 0: message = "Unbalanced code inlining tags." if len(opening_pattern) == 1: message += """ If you are trying to write inline code that is glued to text without a space, you need to use double backquotes: > Replace all `reference`s. Will not display correctly. You need to write: > Replace all ``reference``s. """ self._on_error(line_number, message) return len(line) pos = content_end + len(opening_pattern) if NEED_PROTECTION.search(content): self._on_error( line_number, f""" Using backquotes does not protect against asciidoc interpretation. Starting or ending a word with '*', '#', '_' or having two of them consecutively will trigger unintended behavior with the rest of the text. Use ``++{content}++`` to avoid that. If you really want to have formatting inside your code, you can write ``pass:n[{content}]`` """, ) return pos return pos def _on_error(self, line_number: int, message: str): print(f"{self._file}:{line_number} {message}") self._error_count += 1 def sanitize_asciidoc(file_path: Path): """Called by the CLI""" return Sanitizer(file_path).process()