RULEAPI-800 Detect usages of C++ instead of {cpp} in asciidoc

This commit is contained in:
Fred Tingaud 2023-12-22 13:58:58 +01:00 committed by GitHub
parent c292108e16
commit 1012001409
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 137 additions and 84 deletions

View File

@ -5,70 +5,76 @@ Checks are:
* Inline code with backquotes is correctly escaped and balanced * Inline code with backquotes is correctly escaped and balanced
* Include commands are not appended to other code * Include commands are not appended to other code
""" """
from pathlib import Path
import re import re
from pathlib import Path
VALID_IFDEF = "ifdef::env-github,rspecator-view[]" VALID_IFDEF = "ifdef::env-github,rspecator-view[]"
VALID_ENDIF = "endif::env-github,rspecator-view[]" VALID_ENDIF = "endif::env-github,rspecator-view[]"
VARIABLE_DECL = re.compile(r':\w+: ') VARIABLE_DECL = re.compile(r":\w+: ")
INCLUDE = re.compile(r'include::') INCLUDE = re.compile(r"include::")
FORMATTING_CHARS = ['_', r'\*', r'\#'] FORMATTING_CHARS = ["_", r"\*", r"\#"]
WORD_FORMATTING_CHARS = [r'\~', r'\^'] WORD_FORMATTING_CHARS = [r"\~", r"\^"]
# If the formatting char is repeated twice, it can go anywhere # If the formatting char is repeated twice, it can go anywhere
UNCONSTRAINED_FORMATTING = '|'.join(x + x for x in FORMATTING_CHARS) UNCONSTRAINED_FORMATTING = "|".join(x + x for x in FORMATTING_CHARS)
# Single formatting char are dangerous at the beginning of a word # Single formatting char are dangerous at the beginning of a word
FORMATTING_OPENING = '|'.join(r'(\W|^)' + x + r'\w' for x in FORMATTING_CHARS) FORMATTING_OPENING = "|".join(r"(\W|^)" + x + r"\w" for x in FORMATTING_CHARS)
# Single formatting char are dangerous at the end of a word # Single formatting char are dangerous at the end of a word
FORMATTING_CLOSING = '|'.join(r'\w' + x + r'(\W|$)' for x in FORMATTING_CHARS) FORMATTING_CLOSING = "|".join(r"\w" + x + r"(\W|$)" for x in FORMATTING_CHARS)
# Word formatting is broken by spaces so we look for things like `#word#` # Word formatting is broken by spaces so we look for things like `#word#`
WORD_FORMATTING = "|".join(x + r'\S+' + x for x in WORD_FORMATTING_CHARS) WORD_FORMATTING = "|".join(x + r"\S+" + x for x in WORD_FORMATTING_CHARS)
# We combine all the matchers # We combine all the matchers
NEED_PROTECTION = re.compile('(' NEED_PROTECTION = re.compile(
f'{UNCONSTRAINED_FORMATTING}|' "("
f'{FORMATTING_OPENING}|' f"{UNCONSTRAINED_FORMATTING}|"
f'{FORMATTING_CLOSING}|' f"{FORMATTING_OPENING}|"
f'{WORD_FORMATTING}' f"{FORMATTING_CLOSING}|"
')') f"{WORD_FORMATTING}"
")"
)
# There is a regex trick here: # There is a regex trick here:
# We want to stop the search if there is a backquote # We want to stop the search if there is a backquote
# We do that by matching backquote OR the closing passthrough # We do that by matching backquote OR the closing passthrough
# Then we'll ignore any match of backquote # Then we'll ignore any match of backquote
CLOSE_CONSTRAINED_PASSTHROUGH = re.compile(r'`|((?<!\s)\+(?=`))') CLOSE_CONSTRAINED_PASSTHROUGH = re.compile(r"`|((?<!\s)\+(?=`))")
CLOSE_CONSTRAINED_BACKQUOTE = re.compile(r'`(?!\w)') CLOSE_CONSTRAINED_BACKQUOTE = re.compile(r"`(?!\w)")
CLOSE_UNCONSTRAINED_BACKQUOTE = re.compile('``') CLOSE_UNCONSTRAINED_BACKQUOTE = re.compile("``")
PASSTHROUGH_MACRO_TEXT = r'pass:\w*\[(\\\]|[^\]])*\]' PASSTHROUGH_MACRO_TEXT = r"pass:\w*\[(\\\]|[^\]])*\]"
PASSTHROUGH_MACRO = re.compile(PASSTHROUGH_MACRO_TEXT) PASSTHROUGH_MACRO = re.compile(PASSTHROUGH_MACRO_TEXT)
CPP = re.compile(r"\b[Cc]\+\+")
# There is a regex trick here: # There is a regex trick here:
# We want to skip passthrough macros, to not find pass:[``whatever``] # We want to skip passthrough macros, to not find pass:[``whatever``]
# We do that by matching # We do that by matching
# * EITHER passthrough macros including their ignored backquotes # * EITHER passthrough macros including their ignored backquotes
# * OR backquotes # * OR backquotes
# Then we'll ignore any match of PASSTHROUGH_MACRO # Then we'll ignore any match of PASSTHROUGH_MACRO
BACKQUOTE = re.compile(PASSTHROUGH_MACRO_TEXT + r'|(?P<backquote>(``+)|(?<![\\\w])(`)(?!\s))') BACKQUOTE = re.compile(
PASSTHROUGH_MACRO_TEXT + r"|(?P<backquote>(``+)|(?<![\\\w])(`)(?!\s))"
)
def close_passthrough(count, pos, line): def close_passthrough(count, pos, line):
"""Find the end of a passthrough block marked by *count* plus signs""" """Find the end of a passthrough block marked by *count* plus signs"""
while count > 0: while count > 0:
# `+++a++` will display '+a' in case of inbalance, we try to find the biggest closing block # `+++a++` will display '+a' in case of inbalance, we try to find the biggest closing block
if count == 1: if count == 1:
if not line[pos + count].isspace() and line[pos - 1] == '`': if not line[pos + count].isspace() and line[pos - 1] == "`":
#constrained '+'. It is a passthrough only if it is directly around text and surrounded by backquotes: `+Some Content+` # constrained '+'. It is a passthrough only if it is directly around text and surrounded by backquotes: `+Some Content+`
close_pattern = CLOSE_CONSTRAINED_PASSTHROUGH close_pattern = CLOSE_CONSTRAINED_PASSTHROUGH
else: else:
return pos return pos
else: else:
close_pattern = re.compile('(' + r'\+' * count + ')') close_pattern = re.compile("(" + r"\+" * count + ")")
end = close_pattern.search(line, pos + count) end = close_pattern.search(line, pos + count)
if end and end.group(1): if end and end.group(1):
return end.end() return end.end()
@ -77,8 +83,8 @@ def close_passthrough(count, pos, line):
def skip_passthrough_macro(line, pos): def skip_passthrough_macro(line, pos):
'''If this is a passthrough macro, skip to the end''' """If this is a passthrough macro, skip to the end"""
if line[pos] == 'p': if line[pos] == "p":
pm = PASSTHROUGH_MACRO.match(line, pos) pm = PASSTHROUGH_MACRO.match(line, pos)
if pm: if pm:
return pm.end() return pm.end()
@ -86,10 +92,10 @@ def skip_passthrough_macro(line, pos):
def skip_passthrough_plus(line, pos): def skip_passthrough_plus(line, pos):
'''If this is a passthrough +, skip to the end''' """If this is a passthrough +, skip to the end"""
if line[pos] == '+': if line[pos] == "+":
count = 1 count = 1
while pos + count < len(line) and line[pos + count] == '+': while pos + count < len(line) and line[pos + count] == "+":
count += 1 count += 1
return close_passthrough(count, pos, line) return close_passthrough(count, pos, line)
return pos return pos
@ -126,10 +132,10 @@ class Sanitizer:
lines = content.splitlines(keepends=False) lines = content.splitlines(keepends=False)
for line_index, line in enumerate(lines): for line_index, line in enumerate(lines):
if self._is_inside_code: if self._is_inside_code:
if line == '----': if line == "----":
self._is_inside_code = False self._is_inside_code = False
continue continue
if line == '----': if line == "----":
self._is_inside_code = True self._is_inside_code = True
continue continue
line_number = line_index + 1 line_number = line_index + 1
@ -185,33 +191,58 @@ class Sanitizer:
f'Incorrect endif command. "{VALID_ENDIF}" should be used instead.', f'Incorrect endif command. "{VALID_ENDIF}" should be used instead.',
) )
def _advance_to_next_backquote(self, line: str, pos: int, line_number: int):
next_pos = BACKQUOTE.search(line, pos)
if next_pos:
cpp = CPP.search(line, pos, endpos=next_pos.pos)
else:
cpp = CPP.search(line, pos)
if cpp:
self._on_error(
line_number, 'To avoid rendering issues, always use the "{cpp}" attribute to refer to the language C++'
)
return next_pos
def _process_description(self, line_number: int, line: str): def _process_description(self, line_number: int, line: str):
if VARIABLE_DECL.match(line): if VARIABLE_DECL.match(line):
return return
if self._previous_line_was_include and not self._empty_line: if self._previous_line_was_include and not self._empty_line:
self._on_error(line_number - 1, '''An empty line is missing after the include. self._on_error(
line_number - 1,
"""An empty line is missing after the include.
This may result in broken tags and other display issues. This may result in broken tags and other display issues.
Make sure there are always empty lines before and after each include''') Make sure there are always empty lines before and after each include""",
)
if INCLUDE.match(line): if INCLUDE.match(line):
self._previous_line_was_include = True self._previous_line_was_include = True
if not self._empty_line: if not self._empty_line:
self._on_error(line_number, '''An empty line is missing before the include. self._on_error(
line_number,
"""An empty line is missing before the include.
This may result in broken tags and other display issues. This may result in broken tags and other display issues.
Make sure there are always empty lines before and after each include''') Make sure there are always empty lines before and after each include""",
)
return return
else: else:
self._previous_line_was_include = False self._previous_line_was_include = False
pos = 0 pos = 0
res = BACKQUOTE.search(line, pos) res = self._advance_to_next_backquote(line, pos, line_number)
# We filter out matches for passthrough. See comment near the BACKQUOTE declaration # We filter out matches for passthrough. See comment near the BACKQUOTE declaration
while res and res.group('backquote'): while res and res.group("backquote"):
pos = self._check_inlined_code(line_number, res.end(), line, res.group('backquote')) pos = self._check_inlined_code(
res = BACKQUOTE.search(line, pos) line_number, res.end(), line, res.group("backquote")
)
res = self._advance_to_next_backquote(line, pos, line_number)
def _check_inlined_code(self, line_number: int, pos: int, line: str, opening_pattern: str): def _check_inlined_code(
self, line_number: int, pos: int, line: str, opening_pattern: str
):
if len(opening_pattern) > 2: if len(opening_pattern) > 2:
# Part of the backquotes are displayed as backquotes. # Part of the backquotes are displayed as backquotes.
self._on_error(line_number, 'Use "++" to isolate the backquotes you want to display from the ones that should be interpreted by AsciiDoc.') self._on_error(
line_number,
'Use "++" to isolate the backquotes you want to display from the ones that should be interpreted by AsciiDoc.',
)
return pos return pos
elif len(opening_pattern) == 2: elif len(opening_pattern) == 2:
closing_pattern = CLOSE_UNCONSTRAINED_BACKQUOTE closing_pattern = CLOSE_UNCONSTRAINED_BACKQUOTE
@ -220,27 +251,30 @@ Make sure there are always empty lines before and after each include''')
content_end, content = close_inline_block(line, pos, closing_pattern) content_end, content = close_inline_block(line, pos, closing_pattern)
if content_end < 0: if content_end < 0:
message='Unbalanced code inlining tags.' message = "Unbalanced code inlining tags."
if len(opening_pattern) == 1: if len(opening_pattern) == 1:
message += ''' message += """
If you are trying to write inline code that is glued to text without a space, If you are trying to write inline code that is glued to text without a space,
you need to use double backquotes: you need to use double backquotes:
> Replace all `reference`s. > Replace all `reference`s.
Will not display correctly. You need to write: Will not display correctly. You need to write:
> Replace all ``reference``s. > Replace all ``reference``s.
''' """
self._on_error(line_number, message) self._on_error(line_number, message)
return len(line) return len(line)
pos = content_end + len(opening_pattern) pos = content_end + len(opening_pattern)
if NEED_PROTECTION.search(content): if NEED_PROTECTION.search(content):
self._on_error (line_number, f''' self._on_error(
line_number,
f"""
Using backquotes does not protect against asciidoc interpretation. Starting or Using backquotes does not protect against asciidoc interpretation. Starting or
ending a word with '*', '#', '_' or having two of them consecutively will ending a word with '*', '#', '_' or having two of them consecutively will
trigger unintended behavior with the rest of the text. trigger unintended behavior with the rest of the text.
Use ``++{content}++`` to avoid that. Use ``++{content}++`` to avoid that.
If you really want to have formatting inside your code, you can write If you really want to have formatting inside your code, you can write
``pass:n[{content}]`` ``pass:n[{content}]``
''') """,
)
return pos return pos
return pos return pos

View File

@ -24,8 +24,13 @@ The pass:[``++Can have __ [escaped brackets\] __ ++``]
[source,python] [source,python]
---- ----
# We don't care about `in the code # We don't care about `in the code
We also don't care about writing C++ or c++
---- ----
Inside descriptions, we only use {cpp} to refer to the language
We can have a sole ` surrounded by spaces We can have a sole ` surrounded by spaces
This file does not exist but we only check that the include is well placed: This file does not exist but we only check that the include is well placed:

View File

@ -0,0 +1,2 @@
$PATH/unnamed_language.adoc:1 To avoid rendering issues, always use the "{cpp}" attribute to refer to the language C++
$PATH/unnamed_language.adoc:3 To avoid rendering issues, always use the "{cpp}" attribute to refer to the language C++

View File

@ -0,0 +1,5 @@
We shouldn't mention the language C++ by its name.
Nor by c++ for what it's worth.
We should use the built-in attribute {cpp} instead.

View File

@ -6,38 +6,45 @@ from rspec_tools.validation.sanitize_asciidoc import sanitize_asciidoc
def relative_output(capsys, path: Path): def relative_output(capsys, path: Path):
return capsys.readouterr().out.replace(str(path), '$PATH') return capsys.readouterr().out.replace(str(path), "$PATH")
@pytest.mark.parametrize('invalid_file,expected_count', [('unbalanced_single_backquotes', 1), @pytest.mark.parametrize(
('unbalanced_double_backquotes', 1), "invalid_file,expected_count",
('triple_backquotes', 1), [
('unprotected_formatting', 4), ("unbalanced_single_backquotes", 1),
('unprotected_formatting_with_plusses', 1), ("unbalanced_double_backquotes", 1),
('wrong_constrained_passthrough', 1), ("triple_backquotes", 1),
('unclosed_ifdef', 1), ("unprotected_formatting", 4),
('close_unopened_ifdef', 1), ("unprotected_formatting_with_plusses", 1),
('two_ifdef', 1), ("wrong_constrained_passthrough", 1),
('two_ifdef_unclosed', 1), ("unclosed_ifdef", 1),
('vscode_ifdef', 2), ("close_unopened_ifdef", 1),
('wrong_ifdef', 1), ("two_ifdef", 1),
('wrong_endif', 1), ("two_ifdef_unclosed", 1),
('include_stuck_before', 1), ("vscode_ifdef", 2),
('include_stuck_after', 1), ("wrong_ifdef", 1),
('two_stuck_includes', 2) ("wrong_endif", 1),
]) ("include_stuck_before", 1),
def test_need_sanitation(mockinvalidasciidoc: Path, invalid_file, expected_count, capsys, snapshot): ("include_stuck_after", 1),
'''Check that we detect needs for sanitation.''' ("two_stuck_includes", 2),
("unnamed_language", 2),
],
)
def test_need_sanitation(
mockinvalidasciidoc: Path, invalid_file, expected_count, capsys, snapshot
):
"""Check that we detect needs for sanitation."""
name_path = Path(invalid_file) name_path = Path(invalid_file)
adoc = mockinvalidasciidoc / name_path.with_suffix('.adoc') adoc = mockinvalidasciidoc / name_path.with_suffix(".adoc")
expected = mockinvalidasciidoc / 'snapshots' / name_path.with_suffix('.txt') expected = mockinvalidasciidoc / "snapshots" / name_path.with_suffix(".txt")
assert sanitize_asciidoc(adoc) == expected_count assert sanitize_asciidoc(adoc) == expected_count
snapshot.snapshot_dir = mockinvalidasciidoc / 'snapshots' snapshot.snapshot_dir = mockinvalidasciidoc / "snapshots"
snapshot.assert_match(relative_output(capsys, mockinvalidasciidoc), expected) snapshot.assert_match(relative_output(capsys, mockinvalidasciidoc), expected)
def test_correctly_sanitized(mockasciidoc: Path): def test_correctly_sanitized(mockasciidoc: Path):
'''Check that we raise no issue on correctly sanitized asciidoc''' """Check that we raise no issue on correctly sanitized asciidoc"""
name_path = Path('valid') name_path = Path("valid")
adoc = mockasciidoc / name_path.with_suffix('.adoc') adoc = mockasciidoc / name_path.with_suffix(".adoc")
assert sanitize_asciidoc(adoc) == 0 assert sanitize_asciidoc(adoc) == 0

View File

@ -24,7 +24,7 @@ class MyClass {
=== Documentation === Documentation
* Geeksforgeeks - https://www.geeksforgeeks.org/naming-convention-in-c/[Naming convention in C++] * Geeksforgeeks - https://www.geeksforgeeks.org/naming-convention-in-c/[Naming convention in {cpp}]
* Wikipedia - https://en.wikipedia.org/wiki/Naming_convention_(programming)[Naming Convention (programming)] * Wikipedia - https://en.wikipedia.org/wiki/Naming_convention_(programming)[Naming Convention (programming)]
ifdef::env-github,rspecator-view[] ifdef::env-github,rspecator-view[]

View File

@ -25,8 +25,8 @@ void precept(int finalValue); // Compliant
* {cpp} reference - https://en.cppreference.com/w/cpp/language/final[final specifier] * {cpp} reference - https://en.cppreference.com/w/cpp/language/final[final specifier]
* {cpp} reference - https://en.cppreference.com/w/cpp/language/override[override specifier] * {cpp} reference - https://en.cppreference.com/w/cpp/language/override[override specifier]
* {cpp} reference - https://en.cppreference.com/w/cpp/keyword/module[C++ keyword: module] * {cpp} reference - https://en.cppreference.com/w/cpp/keyword/module[{cpp} keyword: module]
* {cpp} reference - https://en.cppreference.com/w/cpp/keyword/import[C++ keyword: import] * {cpp} reference - https://en.cppreference.com/w/cpp/keyword/import[{cpp} keyword: import]
ifdef::env-github,rspecator-view[] ifdef::env-github,rspecator-view[]

View File

@ -375,8 +375,8 @@ void tar(std::string const &s) {
=== Conference presentations === Conference presentations
* CppCon 2014 - https://youtu.be/V2_80g0eOMc?si=U_qv9iBKI5B3a_EL[Sanitize your C++ code] * CppCon 2014 - https://youtu.be/V2_80g0eOMc?si=U_qv9iBKI5B3a_EL[Sanitize your {cpp} code]
* CppCon 2018 - https://youtu.be/0S0QgQd75Sw?si=AW9mA09L5PEbkqXc[Software Vulnerabilities in C and C++] * CppCon 2018 - https://youtu.be/0S0QgQd75Sw?si=AW9mA09L5PEbkqXc[Software Vulnerabilities in C and {cpp}]
* CppCon 2020 - https://youtu.be/xEzfnbTabyE?si=9yJQkrcRKn6tuPaV[2020: The Year of Sanitizers?] * CppCon 2020 - https://youtu.be/xEzfnbTabyE?si=9yJQkrcRKn6tuPaV[2020: The Year of Sanitizers?]
=== Standards === Standards

View File

@ -8,7 +8,7 @@ This rule raises an issue whenever the file specified in a ``++#include++`` dire
== Resources == Resources
* Microsoft Learn - https://learn.microsoft.com/en-us/cpp/preprocessor/hash-include-directive-c-cpp[``++#include++`` directive (C/C++)] * Microsoft Learn - https://learn.microsoft.com/en-us/cpp/preprocessor/hash-include-directive-c-cpp[``++#include++`` directive (C/{cpp})]
ifdef::env-github,rspecator-view[] ifdef::env-github,rspecator-view[]

View File

@ -133,7 +133,7 @@ void bar(const char *src) {
=== Conference presentations === Conference presentations
* CppCon 2018 - https://www.youtube.com/watch?v=0S0QgQd75Sw&ab_channel=CppCon[Software Vulnerabilities in C and C++] * CppCon 2018 - https://www.youtube.com/watch?v=0S0QgQd75Sw&ab_channel=CppCon[Software Vulnerabilities in C and {cpp}]
=== Standards === Standards

View File

@ -211,7 +211,7 @@ void use_and_destroy_initialized()
=== Conference presentations === Conference presentations
* CppCon 2020 - https://youtu.be/A7sVFJLJM-A?si=v76jhmv3XnHExZYU[An Introduction to Multithreading in C++20] * CppCon 2020 - https://youtu.be/A7sVFJLJM-A?si=v76jhmv3XnHExZYU[An Introduction to Multithreading in {cpp}20]
=== Related rules === Related rules

View File

@ -43,5 +43,5 @@ if (nullptr == ptr) [[unlikely]] {
== Resources == Resources
* {cpp} reference - https://en.cppreference.com/w/cpp/language/attributes/likely[C++ attribute: likely, unlikely] * {cpp} reference - https://en.cppreference.com/w/cpp/language/attributes/likely[{cpp} attribute: likely, unlikely]

View File

@ -58,7 +58,7 @@ This rule does not apply to fields whose class has a non-default alignment.
== Resources == Resources
* {cpp} reference - https://en.cppreference.com/w/cpp/language/attributes/no_unique_address[C++ attribute: no_unique_address] * {cpp} reference - https://en.cppreference.com/w/cpp/language/attributes/no_unique_address[{cpp} attribute: no_unique_address]
ifdef::env-github,rspecator-view[] ifdef::env-github,rspecator-view[]
''' '''

View File

@ -103,7 +103,7 @@ float fastInvSqrt(float number) {
=== Conference presentations === Conference presentations
* CppCon 2019 - https://www.youtube.com/watch?v=_qzMpk-22cc[Type punning in modern C++, Timur Doumler] * CppCon 2019 - https://www.youtube.com/watch?v=_qzMpk-22cc[Type punning in modern {cpp}, Timur Doumler]
=== Related rules === Related rules

View File

@ -67,7 +67,7 @@ void increment (int & value,
=== Articles & blog posts === Articles & blog posts
* https://isocpp.org/wiki/faq/const-correctness[ISO C++ FAQ about const correctness]. * https://isocpp.org/wiki/faq/const-correctness[ISO {cpp} FAQ about const correctness].
=== External coding guidelines === External coding guidelines