RULEAPI-800 Detect usages of C++ instead of {cpp} in asciidoc

This commit is contained in:
Fred Tingaud 2023-12-22 13:58:58 +01:00 committed by GitHub
parent c292108e16
commit 1012001409
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 137 additions and 84 deletions

View File

@ -5,70 +5,76 @@ Checks are:
* Inline code with backquotes is correctly escaped and balanced
* Include commands are not appended to other code
"""
from pathlib import Path
import re
from pathlib import Path
VALID_IFDEF = "ifdef::env-github,rspecator-view[]"
VALID_ENDIF = "endif::env-github,rspecator-view[]"
VARIABLE_DECL = re.compile(r':\w+: ')
VARIABLE_DECL = re.compile(r":\w+: ")
INCLUDE = re.compile(r'include::')
INCLUDE = re.compile(r"include::")
FORMATTING_CHARS = ['_', r'\*', r'\#']
WORD_FORMATTING_CHARS = [r'\~', r'\^']
FORMATTING_CHARS = ["_", r"\*", r"\#"]
WORD_FORMATTING_CHARS = [r"\~", r"\^"]
# If the formatting char is repeated twice, it can go anywhere
UNCONSTRAINED_FORMATTING = '|'.join(x + x for x in FORMATTING_CHARS)
UNCONSTRAINED_FORMATTING = "|".join(x + x for x in FORMATTING_CHARS)
# Single formatting char are dangerous at the beginning of a word
FORMATTING_OPENING = '|'.join(r'(\W|^)' + x + r'\w' for x in FORMATTING_CHARS)
FORMATTING_OPENING = "|".join(r"(\W|^)" + x + r"\w" for x in FORMATTING_CHARS)
# Single formatting char are dangerous at the end of a word
FORMATTING_CLOSING = '|'.join(r'\w' + x + r'(\W|$)' for x in FORMATTING_CHARS)
FORMATTING_CLOSING = "|".join(r"\w" + x + r"(\W|$)" for x in FORMATTING_CHARS)
# Word formatting is broken by spaces so we look for things like `#word#`
WORD_FORMATTING = "|".join(x + r'\S+' + x for x in WORD_FORMATTING_CHARS)
WORD_FORMATTING = "|".join(x + r"\S+" + x for x in WORD_FORMATTING_CHARS)
# We combine all the matchers
NEED_PROTECTION = re.compile('('
f'{UNCONSTRAINED_FORMATTING}|'
f'{FORMATTING_OPENING}|'
f'{FORMATTING_CLOSING}|'
f'{WORD_FORMATTING}'
')')
NEED_PROTECTION = re.compile(
"("
f"{UNCONSTRAINED_FORMATTING}|"
f"{FORMATTING_OPENING}|"
f"{FORMATTING_CLOSING}|"
f"{WORD_FORMATTING}"
")"
)
# There is a regex trick here:
# We want to stop the search if there is a backquote
# We do that by matching backquote OR the closing passthrough
# Then we'll ignore any match of backquote
CLOSE_CONSTRAINED_PASSTHROUGH = re.compile(r'`|((?<!\s)\+(?=`))')
CLOSE_CONSTRAINED_PASSTHROUGH = re.compile(r"`|((?<!\s)\+(?=`))")
CLOSE_CONSTRAINED_BACKQUOTE = re.compile(r'`(?!\w)')
CLOSE_UNCONSTRAINED_BACKQUOTE = re.compile('``')
CLOSE_CONSTRAINED_BACKQUOTE = re.compile(r"`(?!\w)")
CLOSE_UNCONSTRAINED_BACKQUOTE = re.compile("``")
PASSTHROUGH_MACRO_TEXT = r'pass:\w*\[(\\\]|[^\]])*\]'
PASSTHROUGH_MACRO_TEXT = r"pass:\w*\[(\\\]|[^\]])*\]"
PASSTHROUGH_MACRO = re.compile(PASSTHROUGH_MACRO_TEXT)
CPP = re.compile(r"\b[Cc]\+\+")
# There is a regex trick here:
# We want to skip passthrough macros, to not find pass:[``whatever``]
# We do that by matching
# * EITHER passthrough macros including their ignored backquotes
# * OR backquotes
# Then we'll ignore any match of PASSTHROUGH_MACRO
BACKQUOTE = re.compile(PASSTHROUGH_MACRO_TEXT + r'|(?P<backquote>(``+)|(?<![\\\w])(`)(?!\s))')
BACKQUOTE = re.compile(
PASSTHROUGH_MACRO_TEXT + r"|(?P<backquote>(``+)|(?<![\\\w])(`)(?!\s))"
)
def close_passthrough(count, pos, line):
"""Find the end of a passthrough block marked by *count* plus signs"""
while count > 0:
# `+++a++` will display '+a' in case of inbalance, we try to find the biggest closing block
if count == 1:
if not line[pos + count].isspace() and line[pos - 1] == '`':
#constrained '+'. It is a passthrough only if it is directly around text and surrounded by backquotes: `+Some Content+`
if not line[pos + count].isspace() and line[pos - 1] == "`":
# constrained '+'. It is a passthrough only if it is directly around text and surrounded by backquotes: `+Some Content+`
close_pattern = CLOSE_CONSTRAINED_PASSTHROUGH
else:
return pos
else:
close_pattern = re.compile('(' + r'\+' * count + ')')
close_pattern = re.compile("(" + r"\+" * count + ")")
end = close_pattern.search(line, pos + count)
if end and end.group(1):
return end.end()
@ -77,8 +83,8 @@ def close_passthrough(count, pos, line):
def skip_passthrough_macro(line, pos):
'''If this is a passthrough macro, skip to the end'''
if line[pos] == 'p':
"""If this is a passthrough macro, skip to the end"""
if line[pos] == "p":
pm = PASSTHROUGH_MACRO.match(line, pos)
if pm:
return pm.end()
@ -86,10 +92,10 @@ def skip_passthrough_macro(line, pos):
def skip_passthrough_plus(line, pos):
'''If this is a passthrough +, skip to the end'''
if line[pos] == '+':
"""If this is a passthrough +, skip to the end"""
if line[pos] == "+":
count = 1
while pos + count < len(line) and line[pos + count] == '+':
while pos + count < len(line) and line[pos + count] == "+":
count += 1
return close_passthrough(count, pos, line)
return pos
@ -126,10 +132,10 @@ class Sanitizer:
lines = content.splitlines(keepends=False)
for line_index, line in enumerate(lines):
if self._is_inside_code:
if line == '----':
if line == "----":
self._is_inside_code = False
continue
if line == '----':
if line == "----":
self._is_inside_code = True
continue
line_number = line_index + 1
@ -185,33 +191,58 @@ class Sanitizer:
f'Incorrect endif command. "{VALID_ENDIF}" should be used instead.',
)
def _advance_to_next_backquote(self, line: str, pos: int, line_number: int):
next_pos = BACKQUOTE.search(line, pos)
if next_pos:
cpp = CPP.search(line, pos, endpos=next_pos.pos)
else:
cpp = CPP.search(line, pos)
if cpp:
self._on_error(
line_number, 'To avoid rendering issues, always use the "{cpp}" attribute to refer to the language C++'
)
return next_pos
def _process_description(self, line_number: int, line: str):
if VARIABLE_DECL.match(line):
return
if self._previous_line_was_include and not self._empty_line:
self._on_error(line_number - 1, '''An empty line is missing after the include.
self._on_error(
line_number - 1,
"""An empty line is missing after the include.
This may result in broken tags and other display issues.
Make sure there are always empty lines before and after each include''')
Make sure there are always empty lines before and after each include""",
)
if INCLUDE.match(line):
self._previous_line_was_include = True
if not self._empty_line:
self._on_error(line_number, '''An empty line is missing before the include.
self._on_error(
line_number,
"""An empty line is missing before the include.
This may result in broken tags and other display issues.
Make sure there are always empty lines before and after each include''')
Make sure there are always empty lines before and after each include""",
)
return
else:
self._previous_line_was_include = False
pos = 0
res = BACKQUOTE.search(line, pos)
res = self._advance_to_next_backquote(line, pos, line_number)
# We filter out matches for passthrough. See comment near the BACKQUOTE declaration
while res and res.group('backquote'):
pos = self._check_inlined_code(line_number, res.end(), line, res.group('backquote'))
res = BACKQUOTE.search(line, pos)
while res and res.group("backquote"):
pos = self._check_inlined_code(
line_number, res.end(), line, res.group("backquote")
)
res = self._advance_to_next_backquote(line, pos, line_number)
def _check_inlined_code(self, line_number: int, pos: int, line: str, opening_pattern: str):
def _check_inlined_code(
self, line_number: int, pos: int, line: str, opening_pattern: str
):
if len(opening_pattern) > 2:
# Part of the backquotes are displayed as backquotes.
self._on_error(line_number, 'Use "++" to isolate the backquotes you want to display from the ones that should be interpreted by AsciiDoc.')
self._on_error(
line_number,
'Use "++" to isolate the backquotes you want to display from the ones that should be interpreted by AsciiDoc.',
)
return pos
elif len(opening_pattern) == 2:
closing_pattern = CLOSE_UNCONSTRAINED_BACKQUOTE
@ -220,27 +251,30 @@ Make sure there are always empty lines before and after each include''')
content_end, content = close_inline_block(line, pos, closing_pattern)
if content_end < 0:
message='Unbalanced code inlining tags.'
message = "Unbalanced code inlining tags."
if len(opening_pattern) == 1:
message += '''
message += """
If you are trying to write inline code that is glued to text without a space,
you need to use double backquotes:
> Replace all `reference`s.
Will not display correctly. You need to write:
> Replace all ``reference``s.
'''
"""
self._on_error(line_number, message)
return len(line)
pos = content_end + len(opening_pattern)
if NEED_PROTECTION.search(content):
self._on_error (line_number, f'''
self._on_error(
line_number,
f"""
Using backquotes does not protect against asciidoc interpretation. Starting or
ending a word with '*', '#', '_' or having two of them consecutively will
trigger unintended behavior with the rest of the text.
Use ``++{content}++`` to avoid that.
If you really want to have formatting inside your code, you can write
``pass:n[{content}]``
''')
""",
)
return pos
return pos

View File

@ -24,8 +24,13 @@ The pass:[``++Can have __ [escaped brackets\] __ ++``]
[source,python]
----
# We don't care about `in the code
We also don't care about writing C++ or c++
----
Inside descriptions, we only use {cpp} to refer to the language
We can have a sole ` surrounded by spaces
This file does not exist but we only check that the include is well placed:

View File

@ -0,0 +1,2 @@
$PATH/unnamed_language.adoc:1 To avoid rendering issues, always use the "{cpp}" attribute to refer to the language C++
$PATH/unnamed_language.adoc:3 To avoid rendering issues, always use the "{cpp}" attribute to refer to the language C++

View File

@ -0,0 +1,5 @@
We shouldn't mention the language C++ by its name.
Nor by c++ for what it's worth.
We should use the built-in attribute {cpp} instead.

View File

@ -6,38 +6,45 @@ from rspec_tools.validation.sanitize_asciidoc import sanitize_asciidoc
def relative_output(capsys, path: Path):
return capsys.readouterr().out.replace(str(path), '$PATH')
return capsys.readouterr().out.replace(str(path), "$PATH")
@pytest.mark.parametrize('invalid_file,expected_count', [('unbalanced_single_backquotes', 1),
('unbalanced_double_backquotes', 1),
('triple_backquotes', 1),
('unprotected_formatting', 4),
('unprotected_formatting_with_plusses', 1),
('wrong_constrained_passthrough', 1),
('unclosed_ifdef', 1),
('close_unopened_ifdef', 1),
('two_ifdef', 1),
('two_ifdef_unclosed', 1),
('vscode_ifdef', 2),
('wrong_ifdef', 1),
('wrong_endif', 1),
('include_stuck_before', 1),
('include_stuck_after', 1),
('two_stuck_includes', 2)
])
def test_need_sanitation(mockinvalidasciidoc: Path, invalid_file, expected_count, capsys, snapshot):
'''Check that we detect needs for sanitation.'''
@pytest.mark.parametrize(
"invalid_file,expected_count",
[
("unbalanced_single_backquotes", 1),
("unbalanced_double_backquotes", 1),
("triple_backquotes", 1),
("unprotected_formatting", 4),
("unprotected_formatting_with_plusses", 1),
("wrong_constrained_passthrough", 1),
("unclosed_ifdef", 1),
("close_unopened_ifdef", 1),
("two_ifdef", 1),
("two_ifdef_unclosed", 1),
("vscode_ifdef", 2),
("wrong_ifdef", 1),
("wrong_endif", 1),
("include_stuck_before", 1),
("include_stuck_after", 1),
("two_stuck_includes", 2),
("unnamed_language", 2),
],
)
def test_need_sanitation(
mockinvalidasciidoc: Path, invalid_file, expected_count, capsys, snapshot
):
"""Check that we detect needs for sanitation."""
name_path = Path(invalid_file)
adoc = mockinvalidasciidoc / name_path.with_suffix('.adoc')
expected = mockinvalidasciidoc / 'snapshots' / name_path.with_suffix('.txt')
adoc = mockinvalidasciidoc / name_path.with_suffix(".adoc")
expected = mockinvalidasciidoc / "snapshots" / name_path.with_suffix(".txt")
assert sanitize_asciidoc(adoc) == expected_count
snapshot.snapshot_dir = mockinvalidasciidoc / 'snapshots'
snapshot.snapshot_dir = mockinvalidasciidoc / "snapshots"
snapshot.assert_match(relative_output(capsys, mockinvalidasciidoc), expected)
def test_correctly_sanitized(mockasciidoc: Path):
'''Check that we raise no issue on correctly sanitized asciidoc'''
name_path = Path('valid')
adoc = mockasciidoc / name_path.with_suffix('.adoc')
"""Check that we raise no issue on correctly sanitized asciidoc"""
name_path = Path("valid")
adoc = mockasciidoc / name_path.with_suffix(".adoc")
assert sanitize_asciidoc(adoc) == 0

View File

@ -24,7 +24,7 @@ class MyClass {
=== Documentation
* Geeksforgeeks - https://www.geeksforgeeks.org/naming-convention-in-c/[Naming convention in C++]
* Geeksforgeeks - https://www.geeksforgeeks.org/naming-convention-in-c/[Naming convention in {cpp}]
* Wikipedia - https://en.wikipedia.org/wiki/Naming_convention_(programming)[Naming Convention (programming)]
ifdef::env-github,rspecator-view[]

View File

@ -25,8 +25,8 @@ void precept(int finalValue); // Compliant
* {cpp} reference - https://en.cppreference.com/w/cpp/language/final[final specifier]
* {cpp} reference - https://en.cppreference.com/w/cpp/language/override[override specifier]
* {cpp} reference - https://en.cppreference.com/w/cpp/keyword/module[C++ keyword: module]
* {cpp} reference - https://en.cppreference.com/w/cpp/keyword/import[C++ keyword: import]
* {cpp} reference - https://en.cppreference.com/w/cpp/keyword/module[{cpp} keyword: module]
* {cpp} reference - https://en.cppreference.com/w/cpp/keyword/import[{cpp} keyword: import]
ifdef::env-github,rspecator-view[]

View File

@ -375,8 +375,8 @@ void tar(std::string const &s) {
=== Conference presentations
* CppCon 2014 - https://youtu.be/V2_80g0eOMc?si=U_qv9iBKI5B3a_EL[Sanitize your C++ code]
* CppCon 2018 - https://youtu.be/0S0QgQd75Sw?si=AW9mA09L5PEbkqXc[Software Vulnerabilities in C and C++]
* CppCon 2014 - https://youtu.be/V2_80g0eOMc?si=U_qv9iBKI5B3a_EL[Sanitize your {cpp} code]
* CppCon 2018 - https://youtu.be/0S0QgQd75Sw?si=AW9mA09L5PEbkqXc[Software Vulnerabilities in C and {cpp}]
* CppCon 2020 - https://youtu.be/xEzfnbTabyE?si=9yJQkrcRKn6tuPaV[2020: The Year of Sanitizers?]
=== Standards

View File

@ -8,7 +8,7 @@ This rule raises an issue whenever the file specified in a ``++#include++`` dire
== Resources
* Microsoft Learn - https://learn.microsoft.com/en-us/cpp/preprocessor/hash-include-directive-c-cpp[``++#include++`` directive (C/C++)]
* Microsoft Learn - https://learn.microsoft.com/en-us/cpp/preprocessor/hash-include-directive-c-cpp[``++#include++`` directive (C/{cpp})]
ifdef::env-github,rspecator-view[]

View File

@ -133,7 +133,7 @@ void bar(const char *src) {
=== Conference presentations
* CppCon 2018 - https://www.youtube.com/watch?v=0S0QgQd75Sw&ab_channel=CppCon[Software Vulnerabilities in C and C++]
* CppCon 2018 - https://www.youtube.com/watch?v=0S0QgQd75Sw&ab_channel=CppCon[Software Vulnerabilities in C and {cpp}]
=== Standards

View File

@ -211,7 +211,7 @@ void use_and_destroy_initialized()
=== Conference presentations
* CppCon 2020 - https://youtu.be/A7sVFJLJM-A?si=v76jhmv3XnHExZYU[An Introduction to Multithreading in C++20]
* CppCon 2020 - https://youtu.be/A7sVFJLJM-A?si=v76jhmv3XnHExZYU[An Introduction to Multithreading in {cpp}20]
=== Related rules

View File

@ -43,5 +43,5 @@ if (nullptr == ptr) [[unlikely]] {
== Resources
* {cpp} reference - https://en.cppreference.com/w/cpp/language/attributes/likely[C++ attribute: likely, unlikely]
* {cpp} reference - https://en.cppreference.com/w/cpp/language/attributes/likely[{cpp} attribute: likely, unlikely]

View File

@ -58,7 +58,7 @@ This rule does not apply to fields whose class has a non-default alignment.
== Resources
* {cpp} reference - https://en.cppreference.com/w/cpp/language/attributes/no_unique_address[C++ attribute: no_unique_address]
* {cpp} reference - https://en.cppreference.com/w/cpp/language/attributes/no_unique_address[{cpp} attribute: no_unique_address]
ifdef::env-github,rspecator-view[]
'''

View File

@ -103,7 +103,7 @@ float fastInvSqrt(float number) {
=== Conference presentations
* CppCon 2019 - https://www.youtube.com/watch?v=_qzMpk-22cc[Type punning in modern C++, Timur Doumler]
* CppCon 2019 - https://www.youtube.com/watch?v=_qzMpk-22cc[Type punning in modern {cpp}, Timur Doumler]
=== Related rules

View File

@ -67,7 +67,7 @@ void increment (int & value,
=== Articles & blog posts
* https://isocpp.org/wiki/faq/const-correctness[ISO C++ FAQ about const correctness].
* https://isocpp.org/wiki/faq/const-correctness[ISO {cpp} FAQ about const correctness].
=== External coding guidelines