RULEAPI-800 Detect usages of C++ instead of {cpp} in asciidoc
This commit is contained in:
parent
c292108e16
commit
1012001409
@ -5,70 +5,76 @@ Checks are:
|
||||
* Inline code with backquotes is correctly escaped and balanced
|
||||
* Include commands are not appended to other code
|
||||
"""
|
||||
from pathlib import Path
|
||||
import re
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
VALID_IFDEF = "ifdef::env-github,rspecator-view[]"
|
||||
VALID_ENDIF = "endif::env-github,rspecator-view[]"
|
||||
|
||||
VARIABLE_DECL = re.compile(r':\w+: ')
|
||||
VARIABLE_DECL = re.compile(r":\w+: ")
|
||||
|
||||
INCLUDE = re.compile(r'include::')
|
||||
INCLUDE = re.compile(r"include::")
|
||||
|
||||
FORMATTING_CHARS = ['_', r'\*', r'\#']
|
||||
WORD_FORMATTING_CHARS = [r'\~', r'\^']
|
||||
FORMATTING_CHARS = ["_", r"\*", r"\#"]
|
||||
WORD_FORMATTING_CHARS = [r"\~", r"\^"]
|
||||
|
||||
# If the formatting char is repeated twice, it can go anywhere
|
||||
UNCONSTRAINED_FORMATTING = '|'.join(x + x for x in FORMATTING_CHARS)
|
||||
UNCONSTRAINED_FORMATTING = "|".join(x + x for x in FORMATTING_CHARS)
|
||||
# Single formatting char are dangerous at the beginning of a word
|
||||
FORMATTING_OPENING = '|'.join(r'(\W|^)' + x + r'\w' for x in FORMATTING_CHARS)
|
||||
FORMATTING_OPENING = "|".join(r"(\W|^)" + x + r"\w" for x in FORMATTING_CHARS)
|
||||
# Single formatting char are dangerous at the end of a word
|
||||
FORMATTING_CLOSING = '|'.join(r'\w' + x + r'(\W|$)' for x in FORMATTING_CHARS)
|
||||
FORMATTING_CLOSING = "|".join(r"\w" + x + r"(\W|$)" for x in FORMATTING_CHARS)
|
||||
# Word formatting is broken by spaces so we look for things like `#word#`
|
||||
WORD_FORMATTING = "|".join(x + r'\S+' + x for x in WORD_FORMATTING_CHARS)
|
||||
WORD_FORMATTING = "|".join(x + r"\S+" + x for x in WORD_FORMATTING_CHARS)
|
||||
|
||||
# We combine all the matchers
|
||||
NEED_PROTECTION = re.compile('('
|
||||
f'{UNCONSTRAINED_FORMATTING}|'
|
||||
f'{FORMATTING_OPENING}|'
|
||||
f'{FORMATTING_CLOSING}|'
|
||||
f'{WORD_FORMATTING}'
|
||||
')')
|
||||
NEED_PROTECTION = re.compile(
|
||||
"("
|
||||
f"{UNCONSTRAINED_FORMATTING}|"
|
||||
f"{FORMATTING_OPENING}|"
|
||||
f"{FORMATTING_CLOSING}|"
|
||||
f"{WORD_FORMATTING}"
|
||||
")"
|
||||
)
|
||||
|
||||
# There is a regex trick here:
|
||||
# We want to stop the search if there is a backquote
|
||||
# We do that by matching backquote OR the closing passthrough
|
||||
# Then we'll ignore any match of backquote
|
||||
CLOSE_CONSTRAINED_PASSTHROUGH = re.compile(r'`|((?<!\s)\+(?=`))')
|
||||
CLOSE_CONSTRAINED_PASSTHROUGH = re.compile(r"`|((?<!\s)\+(?=`))")
|
||||
|
||||
CLOSE_CONSTRAINED_BACKQUOTE = re.compile(r'`(?!\w)')
|
||||
CLOSE_UNCONSTRAINED_BACKQUOTE = re.compile('``')
|
||||
CLOSE_CONSTRAINED_BACKQUOTE = re.compile(r"`(?!\w)")
|
||||
CLOSE_UNCONSTRAINED_BACKQUOTE = re.compile("``")
|
||||
|
||||
PASSTHROUGH_MACRO_TEXT = r'pass:\w*\[(\\\]|[^\]])*\]'
|
||||
PASSTHROUGH_MACRO_TEXT = r"pass:\w*\[(\\\]|[^\]])*\]"
|
||||
|
||||
PASSTHROUGH_MACRO = re.compile(PASSTHROUGH_MACRO_TEXT)
|
||||
|
||||
CPP = re.compile(r"\b[Cc]\+\+")
|
||||
|
||||
# There is a regex trick here:
|
||||
# We want to skip passthrough macros, to not find pass:[``whatever``]
|
||||
# We do that by matching
|
||||
# * EITHER passthrough macros including their ignored backquotes
|
||||
# * OR backquotes
|
||||
# Then we'll ignore any match of PASSTHROUGH_MACRO
|
||||
BACKQUOTE = re.compile(PASSTHROUGH_MACRO_TEXT + r'|(?P<backquote>(``+)|(?<![\\\w])(`)(?!\s))')
|
||||
BACKQUOTE = re.compile(
|
||||
PASSTHROUGH_MACRO_TEXT + r"|(?P<backquote>(``+)|(?<![\\\w])(`)(?!\s))"
|
||||
)
|
||||
|
||||
|
||||
def close_passthrough(count, pos, line):
|
||||
"""Find the end of a passthrough block marked by *count* plus signs"""
|
||||
while count > 0:
|
||||
# `+++a++` will display '+a' in case of inbalance, we try to find the biggest closing block
|
||||
if count == 1:
|
||||
if not line[pos + count].isspace() and line[pos - 1] == '`':
|
||||
#constrained '+'. It is a passthrough only if it is directly around text and surrounded by backquotes: `+Some Content+`
|
||||
if not line[pos + count].isspace() and line[pos - 1] == "`":
|
||||
# constrained '+'. It is a passthrough only if it is directly around text and surrounded by backquotes: `+Some Content+`
|
||||
close_pattern = CLOSE_CONSTRAINED_PASSTHROUGH
|
||||
else:
|
||||
return pos
|
||||
else:
|
||||
close_pattern = re.compile('(' + r'\+' * count + ')')
|
||||
close_pattern = re.compile("(" + r"\+" * count + ")")
|
||||
end = close_pattern.search(line, pos + count)
|
||||
if end and end.group(1):
|
||||
return end.end()
|
||||
@ -77,8 +83,8 @@ def close_passthrough(count, pos, line):
|
||||
|
||||
|
||||
def skip_passthrough_macro(line, pos):
|
||||
'''If this is a passthrough macro, skip to the end'''
|
||||
if line[pos] == 'p':
|
||||
"""If this is a passthrough macro, skip to the end"""
|
||||
if line[pos] == "p":
|
||||
pm = PASSTHROUGH_MACRO.match(line, pos)
|
||||
if pm:
|
||||
return pm.end()
|
||||
@ -86,10 +92,10 @@ def skip_passthrough_macro(line, pos):
|
||||
|
||||
|
||||
def skip_passthrough_plus(line, pos):
|
||||
'''If this is a passthrough +, skip to the end'''
|
||||
if line[pos] == '+':
|
||||
"""If this is a passthrough +, skip to the end"""
|
||||
if line[pos] == "+":
|
||||
count = 1
|
||||
while pos + count < len(line) and line[pos + count] == '+':
|
||||
while pos + count < len(line) and line[pos + count] == "+":
|
||||
count += 1
|
||||
return close_passthrough(count, pos, line)
|
||||
return pos
|
||||
@ -126,10 +132,10 @@ class Sanitizer:
|
||||
lines = content.splitlines(keepends=False)
|
||||
for line_index, line in enumerate(lines):
|
||||
if self._is_inside_code:
|
||||
if line == '----':
|
||||
if line == "----":
|
||||
self._is_inside_code = False
|
||||
continue
|
||||
if line == '----':
|
||||
if line == "----":
|
||||
self._is_inside_code = True
|
||||
continue
|
||||
line_number = line_index + 1
|
||||
@ -185,33 +191,58 @@ class Sanitizer:
|
||||
f'Incorrect endif command. "{VALID_ENDIF}" should be used instead.',
|
||||
)
|
||||
|
||||
def _advance_to_next_backquote(self, line: str, pos: int, line_number: int):
|
||||
next_pos = BACKQUOTE.search(line, pos)
|
||||
if next_pos:
|
||||
cpp = CPP.search(line, pos, endpos=next_pos.pos)
|
||||
else:
|
||||
cpp = CPP.search(line, pos)
|
||||
if cpp:
|
||||
self._on_error(
|
||||
line_number, 'To avoid rendering issues, always use the "{cpp}" attribute to refer to the language C++'
|
||||
)
|
||||
return next_pos
|
||||
|
||||
def _process_description(self, line_number: int, line: str):
|
||||
if VARIABLE_DECL.match(line):
|
||||
return
|
||||
if self._previous_line_was_include and not self._empty_line:
|
||||
self._on_error(line_number - 1, '''An empty line is missing after the include.
|
||||
self._on_error(
|
||||
line_number - 1,
|
||||
"""An empty line is missing after the include.
|
||||
This may result in broken tags and other display issues.
|
||||
Make sure there are always empty lines before and after each include''')
|
||||
Make sure there are always empty lines before and after each include""",
|
||||
)
|
||||
if INCLUDE.match(line):
|
||||
self._previous_line_was_include = True
|
||||
if not self._empty_line:
|
||||
self._on_error(line_number, '''An empty line is missing before the include.
|
||||
self._on_error(
|
||||
line_number,
|
||||
"""An empty line is missing before the include.
|
||||
This may result in broken tags and other display issues.
|
||||
Make sure there are always empty lines before and after each include''')
|
||||
Make sure there are always empty lines before and after each include""",
|
||||
)
|
||||
return
|
||||
else:
|
||||
self._previous_line_was_include = False
|
||||
pos = 0
|
||||
res = BACKQUOTE.search(line, pos)
|
||||
res = self._advance_to_next_backquote(line, pos, line_number)
|
||||
# We filter out matches for passthrough. See comment near the BACKQUOTE declaration
|
||||
while res and res.group('backquote'):
|
||||
pos = self._check_inlined_code(line_number, res.end(), line, res.group('backquote'))
|
||||
res = BACKQUOTE.search(line, pos)
|
||||
while res and res.group("backquote"):
|
||||
pos = self._check_inlined_code(
|
||||
line_number, res.end(), line, res.group("backquote")
|
||||
)
|
||||
res = self._advance_to_next_backquote(line, pos, line_number)
|
||||
|
||||
def _check_inlined_code(self, line_number: int, pos: int, line: str, opening_pattern: str):
|
||||
def _check_inlined_code(
|
||||
self, line_number: int, pos: int, line: str, opening_pattern: str
|
||||
):
|
||||
if len(opening_pattern) > 2:
|
||||
# Part of the backquotes are displayed as backquotes.
|
||||
self._on_error(line_number, 'Use "++" to isolate the backquotes you want to display from the ones that should be interpreted by AsciiDoc.')
|
||||
self._on_error(
|
||||
line_number,
|
||||
'Use "++" to isolate the backquotes you want to display from the ones that should be interpreted by AsciiDoc.',
|
||||
)
|
||||
return pos
|
||||
elif len(opening_pattern) == 2:
|
||||
closing_pattern = CLOSE_UNCONSTRAINED_BACKQUOTE
|
||||
@ -220,27 +251,30 @@ Make sure there are always empty lines before and after each include''')
|
||||
|
||||
content_end, content = close_inline_block(line, pos, closing_pattern)
|
||||
if content_end < 0:
|
||||
message='Unbalanced code inlining tags.'
|
||||
message = "Unbalanced code inlining tags."
|
||||
if len(opening_pattern) == 1:
|
||||
message += '''
|
||||
message += """
|
||||
If you are trying to write inline code that is glued to text without a space,
|
||||
you need to use double backquotes:
|
||||
> Replace all `reference`s.
|
||||
Will not display correctly. You need to write:
|
||||
> Replace all ``reference``s.
|
||||
'''
|
||||
"""
|
||||
self._on_error(line_number, message)
|
||||
return len(line)
|
||||
pos = content_end + len(opening_pattern)
|
||||
if NEED_PROTECTION.search(content):
|
||||
self._on_error (line_number, f'''
|
||||
self._on_error(
|
||||
line_number,
|
||||
f"""
|
||||
Using backquotes does not protect against asciidoc interpretation. Starting or
|
||||
ending a word with '*', '#', '_' or having two of them consecutively will
|
||||
trigger unintended behavior with the rest of the text.
|
||||
Use ``++{content}++`` to avoid that.
|
||||
If you really want to have formatting inside your code, you can write
|
||||
``pass:n[{content}]``
|
||||
''')
|
||||
""",
|
||||
)
|
||||
return pos
|
||||
return pos
|
||||
|
||||
|
@ -24,8 +24,13 @@ The pass:[``++Can have __ [escaped brackets\] __ ++``]
|
||||
[source,python]
|
||||
----
|
||||
# We don't care about `in the code
|
||||
|
||||
We also don't care about writing C++ or c++
|
||||
|
||||
----
|
||||
|
||||
Inside descriptions, we only use {cpp} to refer to the language
|
||||
|
||||
We can have a sole ` surrounded by spaces
|
||||
|
||||
This file does not exist but we only check that the include is well placed:
|
||||
|
@ -0,0 +1,2 @@
|
||||
$PATH/unnamed_language.adoc:1 To avoid rendering issues, always use the "{cpp}" attribute to refer to the language C++
|
||||
$PATH/unnamed_language.adoc:3 To avoid rendering issues, always use the "{cpp}" attribute to refer to the language C++
|
@ -0,0 +1,5 @@
|
||||
We shouldn't mention the language C++ by its name.
|
||||
|
||||
Nor by c++ for what it's worth.
|
||||
|
||||
We should use the built-in attribute {cpp} instead.
|
@ -6,38 +6,45 @@ from rspec_tools.validation.sanitize_asciidoc import sanitize_asciidoc
|
||||
|
||||
|
||||
def relative_output(capsys, path: Path):
|
||||
return capsys.readouterr().out.replace(str(path), '$PATH')
|
||||
return capsys.readouterr().out.replace(str(path), "$PATH")
|
||||
|
||||
|
||||
@pytest.mark.parametrize('invalid_file,expected_count', [('unbalanced_single_backquotes', 1),
|
||||
('unbalanced_double_backquotes', 1),
|
||||
('triple_backquotes', 1),
|
||||
('unprotected_formatting', 4),
|
||||
('unprotected_formatting_with_plusses', 1),
|
||||
('wrong_constrained_passthrough', 1),
|
||||
('unclosed_ifdef', 1),
|
||||
('close_unopened_ifdef', 1),
|
||||
('two_ifdef', 1),
|
||||
('two_ifdef_unclosed', 1),
|
||||
('vscode_ifdef', 2),
|
||||
('wrong_ifdef', 1),
|
||||
('wrong_endif', 1),
|
||||
('include_stuck_before', 1),
|
||||
('include_stuck_after', 1),
|
||||
('two_stuck_includes', 2)
|
||||
])
|
||||
def test_need_sanitation(mockinvalidasciidoc: Path, invalid_file, expected_count, capsys, snapshot):
|
||||
'''Check that we detect needs for sanitation.'''
|
||||
@pytest.mark.parametrize(
|
||||
"invalid_file,expected_count",
|
||||
[
|
||||
("unbalanced_single_backquotes", 1),
|
||||
("unbalanced_double_backquotes", 1),
|
||||
("triple_backquotes", 1),
|
||||
("unprotected_formatting", 4),
|
||||
("unprotected_formatting_with_plusses", 1),
|
||||
("wrong_constrained_passthrough", 1),
|
||||
("unclosed_ifdef", 1),
|
||||
("close_unopened_ifdef", 1),
|
||||
("two_ifdef", 1),
|
||||
("two_ifdef_unclosed", 1),
|
||||
("vscode_ifdef", 2),
|
||||
("wrong_ifdef", 1),
|
||||
("wrong_endif", 1),
|
||||
("include_stuck_before", 1),
|
||||
("include_stuck_after", 1),
|
||||
("two_stuck_includes", 2),
|
||||
("unnamed_language", 2),
|
||||
],
|
||||
)
|
||||
def test_need_sanitation(
|
||||
mockinvalidasciidoc: Path, invalid_file, expected_count, capsys, snapshot
|
||||
):
|
||||
"""Check that we detect needs for sanitation."""
|
||||
name_path = Path(invalid_file)
|
||||
adoc = mockinvalidasciidoc / name_path.with_suffix('.adoc')
|
||||
expected = mockinvalidasciidoc / 'snapshots' / name_path.with_suffix('.txt')
|
||||
adoc = mockinvalidasciidoc / name_path.with_suffix(".adoc")
|
||||
expected = mockinvalidasciidoc / "snapshots" / name_path.with_suffix(".txt")
|
||||
assert sanitize_asciidoc(adoc) == expected_count
|
||||
snapshot.snapshot_dir = mockinvalidasciidoc / 'snapshots'
|
||||
snapshot.snapshot_dir = mockinvalidasciidoc / "snapshots"
|
||||
snapshot.assert_match(relative_output(capsys, mockinvalidasciidoc), expected)
|
||||
|
||||
|
||||
def test_correctly_sanitized(mockasciidoc: Path):
|
||||
'''Check that we raise no issue on correctly sanitized asciidoc'''
|
||||
name_path = Path('valid')
|
||||
adoc = mockasciidoc / name_path.with_suffix('.adoc')
|
||||
"""Check that we raise no issue on correctly sanitized asciidoc"""
|
||||
name_path = Path("valid")
|
||||
adoc = mockasciidoc / name_path.with_suffix(".adoc")
|
||||
assert sanitize_asciidoc(adoc) == 0
|
||||
|
@ -24,7 +24,7 @@ class MyClass {
|
||||
|
||||
=== Documentation
|
||||
|
||||
* Geeksforgeeks - https://www.geeksforgeeks.org/naming-convention-in-c/[Naming convention in C++]
|
||||
* Geeksforgeeks - https://www.geeksforgeeks.org/naming-convention-in-c/[Naming convention in {cpp}]
|
||||
* Wikipedia - https://en.wikipedia.org/wiki/Naming_convention_(programming)[Naming Convention (programming)]
|
||||
|
||||
ifdef::env-github,rspecator-view[]
|
||||
|
@ -25,8 +25,8 @@ void precept(int finalValue); // Compliant
|
||||
|
||||
* {cpp} reference - https://en.cppreference.com/w/cpp/language/final[final specifier]
|
||||
* {cpp} reference - https://en.cppreference.com/w/cpp/language/override[override specifier]
|
||||
* {cpp} reference - https://en.cppreference.com/w/cpp/keyword/module[C++ keyword: module]
|
||||
* {cpp} reference - https://en.cppreference.com/w/cpp/keyword/import[C++ keyword: import]
|
||||
* {cpp} reference - https://en.cppreference.com/w/cpp/keyword/module[{cpp} keyword: module]
|
||||
* {cpp} reference - https://en.cppreference.com/w/cpp/keyword/import[{cpp} keyword: import]
|
||||
|
||||
ifdef::env-github,rspecator-view[]
|
||||
|
||||
|
@ -375,8 +375,8 @@ void tar(std::string const &s) {
|
||||
|
||||
=== Conference presentations
|
||||
|
||||
* CppCon 2014 - https://youtu.be/V2_80g0eOMc?si=U_qv9iBKI5B3a_EL[Sanitize your C++ code]
|
||||
* CppCon 2018 - https://youtu.be/0S0QgQd75Sw?si=AW9mA09L5PEbkqXc[Software Vulnerabilities in C and C++]
|
||||
* CppCon 2014 - https://youtu.be/V2_80g0eOMc?si=U_qv9iBKI5B3a_EL[Sanitize your {cpp} code]
|
||||
* CppCon 2018 - https://youtu.be/0S0QgQd75Sw?si=AW9mA09L5PEbkqXc[Software Vulnerabilities in C and {cpp}]
|
||||
* CppCon 2020 - https://youtu.be/xEzfnbTabyE?si=9yJQkrcRKn6tuPaV[2020: The Year of Sanitizers?]
|
||||
|
||||
=== Standards
|
||||
|
@ -8,7 +8,7 @@ This rule raises an issue whenever the file specified in a ``++#include++`` dire
|
||||
|
||||
== Resources
|
||||
|
||||
* Microsoft Learn - https://learn.microsoft.com/en-us/cpp/preprocessor/hash-include-directive-c-cpp[``++#include++`` directive (C/C++)]
|
||||
* Microsoft Learn - https://learn.microsoft.com/en-us/cpp/preprocessor/hash-include-directive-c-cpp[``++#include++`` directive (C/{cpp})]
|
||||
|
||||
ifdef::env-github,rspecator-view[]
|
||||
|
||||
|
@ -133,7 +133,7 @@ void bar(const char *src) {
|
||||
|
||||
=== Conference presentations
|
||||
|
||||
* CppCon 2018 - https://www.youtube.com/watch?v=0S0QgQd75Sw&ab_channel=CppCon[Software Vulnerabilities in C and C++]
|
||||
* CppCon 2018 - https://www.youtube.com/watch?v=0S0QgQd75Sw&ab_channel=CppCon[Software Vulnerabilities in C and {cpp}]
|
||||
|
||||
=== Standards
|
||||
|
||||
|
@ -211,7 +211,7 @@ void use_and_destroy_initialized()
|
||||
|
||||
=== Conference presentations
|
||||
|
||||
* CppCon 2020 - https://youtu.be/A7sVFJLJM-A?si=v76jhmv3XnHExZYU[An Introduction to Multithreading in C++20]
|
||||
* CppCon 2020 - https://youtu.be/A7sVFJLJM-A?si=v76jhmv3XnHExZYU[An Introduction to Multithreading in {cpp}20]
|
||||
|
||||
=== Related rules
|
||||
|
||||
|
@ -43,5 +43,5 @@ if (nullptr == ptr) [[unlikely]] {
|
||||
|
||||
== Resources
|
||||
|
||||
* {cpp} reference - https://en.cppreference.com/w/cpp/language/attributes/likely[C++ attribute: likely, unlikely]
|
||||
* {cpp} reference - https://en.cppreference.com/w/cpp/language/attributes/likely[{cpp} attribute: likely, unlikely]
|
||||
|
||||
|
@ -58,7 +58,7 @@ This rule does not apply to fields whose class has a non-default alignment.
|
||||
|
||||
== Resources
|
||||
|
||||
* {cpp} reference - https://en.cppreference.com/w/cpp/language/attributes/no_unique_address[C++ attribute: no_unique_address]
|
||||
* {cpp} reference - https://en.cppreference.com/w/cpp/language/attributes/no_unique_address[{cpp} attribute: no_unique_address]
|
||||
|
||||
ifdef::env-github,rspecator-view[]
|
||||
'''
|
||||
|
@ -103,7 +103,7 @@ float fastInvSqrt(float number) {
|
||||
|
||||
=== Conference presentations
|
||||
|
||||
* CppCon 2019 - https://www.youtube.com/watch?v=_qzMpk-22cc[Type punning in modern C++, Timur Doumler]
|
||||
* CppCon 2019 - https://www.youtube.com/watch?v=_qzMpk-22cc[Type punning in modern {cpp}, Timur Doumler]
|
||||
|
||||
=== Related rules
|
||||
|
||||
|
@ -67,7 +67,7 @@ void increment (int & value,
|
||||
|
||||
=== Articles & blog posts
|
||||
|
||||
* https://isocpp.org/wiki/faq/const-correctness[ISO C++ FAQ about const correctness].
|
||||
* https://isocpp.org/wiki/faq/const-correctness[ISO {cpp} FAQ about const correctness].
|
||||
|
||||
=== External coding guidelines
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user