RULEAPI-800 Detect usages of C++ instead of {cpp} in asciidoc

2023-12-22 13:58:58 +01:00 · 2023-12-22 13:58:58 +01:00 · 1012001409
commit 1012001409
parent c292108e16
15 changed files with 137 additions and 84 deletions
--- a/rspec-tools/rspec_tools/validation/sanitize_asciidoc.py
+++ b/rspec-tools/rspec_tools/validation/sanitize_asciidoc.py
@ -5,70 +5,76 @@ Checks are:
 * Inline code with backquotes is correctly escaped and balanced
 * Include commands are not appended to other code
 """
-from pathlib import Path
 import re
-
+from pathlib import Path

 VALID_IFDEF = "ifdef::env-github,rspecator-view[]"
 VALID_ENDIF = "endif::env-github,rspecator-view[]"

-VARIABLE_DECL = re.compile(r':\w+: ')
+VARIABLE_DECL = re.compile(r":\w+: ")

-INCLUDE = re.compile(r'include::')
+INCLUDE = re.compile(r"include::")

-FORMATTING_CHARS = ['_', r'\*', r'\#']
-WORD_FORMATTING_CHARS = [r'\~', r'\^']
+FORMATTING_CHARS = ["_", r"\*", r"\#"]
+WORD_FORMATTING_CHARS = [r"\~", r"\^"]

 # If the formatting char is repeated twice, it can go anywhere
-UNCONSTRAINED_FORMATTING = '|'.join(x + x for x in FORMATTING_CHARS)
+UNCONSTRAINED_FORMATTING = "|".join(x + x for x in FORMATTING_CHARS)
 # Single formatting char are dangerous at the beginning of a word
-FORMATTING_OPENING = '|'.join(r'(\W|^)' + x + r'\w' for x in FORMATTING_CHARS)
+FORMATTING_OPENING = "|".join(r"(\W|^)" + x + r"\w" for x in FORMATTING_CHARS)
 # Single formatting char are dangerous at the end of a word
-FORMATTING_CLOSING = '|'.join(r'\w' + x + r'(\W|$)' for x in FORMATTING_CHARS)
+FORMATTING_CLOSING = "|".join(r"\w" + x + r"(\W|$)" for x in FORMATTING_CHARS)
 # Word formatting is broken by spaces so we look for things like `#word#`
-WORD_FORMATTING = "|".join(x + r'\S+' + x for x in WORD_FORMATTING_CHARS)
+WORD_FORMATTING = "|".join(x + r"\S+" + x for x in WORD_FORMATTING_CHARS)

 # We combine all the matchers
-NEED_PROTECTION = re.compile('('
-                             f'{UNCONSTRAINED_FORMATTING}|'
-                             f'{FORMATTING_OPENING}|'
-                             f'{FORMATTING_CLOSING}|'
-                             f'{WORD_FORMATTING}'
-                             ')')
+NEED_PROTECTION = re.compile(
+    "("
+    f"{UNCONSTRAINED_FORMATTING}|"
+    f"{FORMATTING_OPENING}|"
+    f"{FORMATTING_CLOSING}|"
+    f"{WORD_FORMATTING}"
+    ")"
+)

 # There is a regex trick here:
 # We want to stop the search if there is a backquote
 # We do that by matching backquote OR the closing passthrough
 # Then we'll ignore any match of backquote
-CLOSE_CONSTRAINED_PASSTHROUGH = re.compile(r'`|((?<!\s)\+(?=`))')
+CLOSE_CONSTRAINED_PASSTHROUGH = re.compile(r"`|((?<!\s)\+(?=`))")

-CLOSE_CONSTRAINED_BACKQUOTE = re.compile(r'`(?!\w)')
-CLOSE_UNCONSTRAINED_BACKQUOTE = re.compile('``')
+CLOSE_CONSTRAINED_BACKQUOTE = re.compile(r"`(?!\w)")
+CLOSE_UNCONSTRAINED_BACKQUOTE = re.compile("``")

-PASSTHROUGH_MACRO_TEXT = r'pass:\w*\[(\\\]|[^\]])*\]'
+PASSTHROUGH_MACRO_TEXT = r"pass:\w*\[(\\\]|[^\]])*\]"

 PASSTHROUGH_MACRO = re.compile(PASSTHROUGH_MACRO_TEXT)

+CPP = re.compile(r"\b[Cc]\+\+")
+
 # There is a regex trick here:
 # We want to skip passthrough macros, to not find pass:[``whatever``]
 # We do that by matching
 # * EITHER passthrough macros including their ignored backquotes
 # * OR backquotes
 # Then we'll ignore any match of PASSTHROUGH_MACRO
-BACKQUOTE = re.compile(PASSTHROUGH_MACRO_TEXT + r'|(?P<backquote>(``+)|(?<![\\\w])(`)(?!\s))')
+BACKQUOTE = re.compile(
+    PASSTHROUGH_MACRO_TEXT + r"|(?P<backquote>(``+)|(?<![\\\w])(`)(?!\s))"
+)
+

 def close_passthrough(count, pos, line):
    """Find the end of a passthrough block marked by *count* plus signs"""
    while count > 0:
        # `+++a++` will display '+a' in case of inbalance, we try to find the biggest closing block
        if count == 1:
-            if not line[pos + count].isspace() and line[pos - 1] == '`':
-                #constrained '+'. It is a passthrough only if it is directly around text and surrounded by backquotes: `+Some Content+`
+            if not line[pos + count].isspace() and line[pos - 1] == "`":
+                # constrained '+'. It is a passthrough only if it is directly around text and surrounded by backquotes: `+Some Content+`
                close_pattern = CLOSE_CONSTRAINED_PASSTHROUGH
            else:
                return pos
        else:
-            close_pattern = re.compile('(' + r'\+' * count + ')')
+            close_pattern = re.compile("(" + r"\+" * count + ")")
        end = close_pattern.search(line, pos + count)
        if end and end.group(1):
            return end.end()
@ -77,8 +83,8 @@ def close_passthrough(count, pos, line):


 def skip_passthrough_macro(line, pos):
-    '''If this is a passthrough macro, skip to the end'''
-    if line[pos] == 'p':
+    """If this is a passthrough macro, skip to the end"""
+    if line[pos] == "p":
        pm = PASSTHROUGH_MACRO.match(line, pos)
        if pm:
            return pm.end()
@ -86,10 +92,10 @@ def skip_passthrough_macro(line, pos):


 def skip_passthrough_plus(line, pos):
-    '''If this is a passthrough +, skip to the end'''
-    if line[pos] == '+':
+    """If this is a passthrough +, skip to the end"""
+    if line[pos] == "+":
        count = 1
-        while pos + count < len(line) and line[pos + count] == '+':
+        while pos + count < len(line) and line[pos + count] == "+":
            count += 1
        return close_passthrough(count, pos, line)
    return pos
@ -126,10 +132,10 @@ class Sanitizer:
        lines = content.splitlines(keepends=False)
        for line_index, line in enumerate(lines):
            if self._is_inside_code:
-                if line == '----':
+                if line == "----":
                    self._is_inside_code = False
                continue
-            if line == '----':
+            if line == "----":
                self._is_inside_code = True
                continue
            line_number = line_index + 1
@ -185,33 +191,58 @@ class Sanitizer:
                f'Incorrect endif command. "{VALID_ENDIF}" should be used instead.',
            )

+    def _advance_to_next_backquote(self, line: str, pos: int, line_number: int):
+        next_pos = BACKQUOTE.search(line, pos)
+        if next_pos:
+            cpp = CPP.search(line, pos, endpos=next_pos.pos)
+        else:
+            cpp = CPP.search(line, pos)
+        if cpp:
+            self._on_error(
+                line_number, 'To avoid rendering issues, always use the "{cpp}" attribute to refer to the language C++'
+            )
+        return next_pos
+
    def _process_description(self, line_number: int, line: str):
        if VARIABLE_DECL.match(line):
            return
        if self._previous_line_was_include and not self._empty_line:
-            self._on_error(line_number - 1, '''An empty line is missing after the include.
+            self._on_error(
+                line_number - 1,
+                """An empty line is missing after the include.
 This may result in broken tags and other display issues.
-Make sure there are always empty lines before and after each include''')
+Make sure there are always empty lines before and after each include""",
+            )
        if INCLUDE.match(line):
            self._previous_line_was_include = True
            if not self._empty_line:
-                self._on_error(line_number, '''An empty line is missing before the include.
+                self._on_error(
+                    line_number,
+                    """An empty line is missing before the include.
 This may result in broken tags and other display issues.
-Make sure there are always empty lines before and after each include''')
+Make sure there are always empty lines before and after each include""",
+                )
            return
        else:
            self._previous_line_was_include = False
        pos = 0
-        res = BACKQUOTE.search(line, pos)
+        res = self._advance_to_next_backquote(line, pos, line_number)
        # We filter out matches for passthrough. See comment near the BACKQUOTE declaration
-        while res and res.group('backquote'):
-            pos = self._check_inlined_code(line_number, res.end(), line, res.group('backquote'))
-            res = BACKQUOTE.search(line, pos)
+        while res and res.group("backquote"):
+            pos = self._check_inlined_code(
+                line_number, res.end(), line, res.group("backquote")
+            )
+            res = self._advance_to_next_backquote(line, pos, line_number)

-    def _check_inlined_code(self, line_number: int, pos: int, line: str, opening_pattern: str):
+    def _check_inlined_code(
+        self, line_number: int, pos: int, line: str, opening_pattern: str
+    ):
        if len(opening_pattern) > 2:
            # Part of the backquotes are displayed as backquotes.
-            self._on_error(line_number, 'Use "++" to isolate the backquotes you want to display from the ones that should be interpreted by AsciiDoc.')
+            self._on_error(
+                line_number,
+                'Use "++" to isolate the backquotes you want to display from the ones that should be interpreted by AsciiDoc.',
+            )
            return pos
        elif len(opening_pattern) == 2:
            closing_pattern = CLOSE_UNCONSTRAINED_BACKQUOTE
@ -220,27 +251,30 @@ Make sure there are always empty lines before and after each include''')

        content_end, content = close_inline_block(line, pos, closing_pattern)
        if content_end < 0:
-            message='Unbalanced code inlining tags.'
+            message = "Unbalanced code inlining tags."
            if len(opening_pattern) == 1:
-                message += '''
+                message += """
 If you are trying to write inline code that is glued to text without a space,
 you need to use double backquotes:
 > Replace all `reference`s.
 Will not display correctly. You need to write:
 > Replace all ``reference``s.
-'''
+"""
            self._on_error(line_number, message)
            return len(line)
        pos = content_end + len(opening_pattern)
        if NEED_PROTECTION.search(content):
-            self._on_error (line_number, f'''
+            self._on_error(
+                line_number,
+                f"""
 Using backquotes does not protect against asciidoc interpretation. Starting or
 ending a word with '*', '#', '_' or having two of them consecutively will
 trigger unintended behavior with the rest of the text.
 Use ``++{content}++`` to avoid that.
 If you really want to have formatting inside your code, you can write
 ``pass:n[{content}]``
-''')
+""",
+            )
            return pos
        return pos

--- a/rspec-tools/tests/resources/asciidoc/valid.adoc
+++ b/rspec-tools/tests/resources/asciidoc/valid.adoc
@ -24,8 +24,13 @@ The pass:[``++Can have __ [escaped brackets\] __ ++``]
 [source,python]
 ----
 # We don't care about `in the code
+
+We also don't care about writing C++ or c++
+
 ----

+Inside descriptions, we only use {cpp} to refer to the language
+
 We can have a sole ` surrounded by spaces

 This file does not exist but we only check that the include is well placed:
--- a/rspec-tools/tests/resources/invalid-asciidoc/snapshots/unnamed_language.txt
+++ b/rspec-tools/tests/resources/invalid-asciidoc/snapshots/unnamed_language.txt
@ -0,0 +1,2 @@
+$PATH/unnamed_language.adoc:1 To avoid rendering issues, always use the "{cpp}" attribute to refer to the language C++
+$PATH/unnamed_language.adoc:3 To avoid rendering issues, always use the "{cpp}" attribute to refer to the language C++
--- a/rspec-tools/tests/resources/invalid-asciidoc/unnamed_language.adoc
+++ b/rspec-tools/tests/resources/invalid-asciidoc/unnamed_language.adoc
@ -0,0 +1,5 @@
+We shouldn't mention the language C++ by its name.
+
+Nor by c++ for what it's worth.
+
+We should use the built-in attribute {cpp} instead.
--- a/rspec-tools/tests/validation/test_asciidoc_sanitization.py
+++ b/rspec-tools/tests/validation/test_asciidoc_sanitization.py
@ -6,38 +6,45 @@ from rspec_tools.validation.sanitize_asciidoc import sanitize_asciidoc


 def relative_output(capsys, path: Path):
-  return capsys.readouterr().out.replace(str(path), '$PATH')
+    return capsys.readouterr().out.replace(str(path), "$PATH")


-@pytest.mark.parametrize('invalid_file,expected_count', [('unbalanced_single_backquotes', 1),
-                                                         ('unbalanced_double_backquotes', 1),
-                                                         ('triple_backquotes', 1),
-                                                         ('unprotected_formatting', 4),
-                                                         ('unprotected_formatting_with_plusses', 1),
-                                                         ('wrong_constrained_passthrough', 1),
-                                                         ('unclosed_ifdef', 1),
-                                                         ('close_unopened_ifdef', 1),
-                                                         ('two_ifdef', 1),
-                                                         ('two_ifdef_unclosed', 1),
-                                                         ('vscode_ifdef', 2),
-                                                         ('wrong_ifdef', 1),
-                                                         ('wrong_endif', 1),
-                                                         ('include_stuck_before', 1),
-                                                         ('include_stuck_after', 1),
-                                                         ('two_stuck_includes', 2)
-                                                         ])
-def test_need_sanitation(mockinvalidasciidoc: Path, invalid_file, expected_count, capsys, snapshot):
-    '''Check that we detect needs for sanitation.'''
+@pytest.mark.parametrize(
+    "invalid_file,expected_count",
+    [
+        ("unbalanced_single_backquotes", 1),
+        ("unbalanced_double_backquotes", 1),
+        ("triple_backquotes", 1),
+        ("unprotected_formatting", 4),
+        ("unprotected_formatting_with_plusses", 1),
+        ("wrong_constrained_passthrough", 1),
+        ("unclosed_ifdef", 1),
+        ("close_unopened_ifdef", 1),
+        ("two_ifdef", 1),
+        ("two_ifdef_unclosed", 1),
+        ("vscode_ifdef", 2),
+        ("wrong_ifdef", 1),
+        ("wrong_endif", 1),
+        ("include_stuck_before", 1),
+        ("include_stuck_after", 1),
+        ("two_stuck_includes", 2),
+        ("unnamed_language", 2),
+    ],
+)
+def test_need_sanitation(
+    mockinvalidasciidoc: Path, invalid_file, expected_count, capsys, snapshot
+):
+    """Check that we detect needs for sanitation."""
    name_path = Path(invalid_file)
-    adoc = mockinvalidasciidoc / name_path.with_suffix('.adoc')
-    expected = mockinvalidasciidoc / 'snapshots' / name_path.with_suffix('.txt')
+    adoc = mockinvalidasciidoc / name_path.with_suffix(".adoc")
+    expected = mockinvalidasciidoc / "snapshots" / name_path.with_suffix(".txt")
    assert sanitize_asciidoc(adoc) == expected_count
-    snapshot.snapshot_dir = mockinvalidasciidoc / 'snapshots'
+    snapshot.snapshot_dir = mockinvalidasciidoc / "snapshots"
    snapshot.assert_match(relative_output(capsys, mockinvalidasciidoc), expected)


 def test_correctly_sanitized(mockasciidoc: Path):
-    '''Check that we raise no issue on correctly sanitized asciidoc'''
-    name_path = Path('valid')
-    adoc = mockasciidoc / name_path.with_suffix('.adoc')
+    """Check that we raise no issue on correctly sanitized asciidoc"""
+    name_path = Path("valid")
+    adoc = mockasciidoc / name_path.with_suffix(".adoc")
    assert sanitize_asciidoc(adoc) == 0
--- a/rules/S116/cfamily/rule.adoc
+++ b/rules/S116/cfamily/rule.adoc
@ -24,7 +24,7 @@ class MyClass {

 === Documentation

-* Geeksforgeeks - https://www.geeksforgeeks.org/naming-convention-in-c/[Naming convention in C++]
+* Geeksforgeeks - https://www.geeksforgeeks.org/naming-convention-in-c/[Naming convention in {cpp}]
 * Wikipedia - https://en.wikipedia.org/wiki/Naming_convention_(programming)[Naming Convention (programming)]

 ifdef::env-github,rspecator-view[]
--- a/rules/S1669/cfamily/rule.adoc
+++ b/rules/S1669/cfamily/rule.adoc
@ -25,8 +25,8 @@ void precept(int finalValue); // Compliant

 * {cpp} reference - https://en.cppreference.com/w/cpp/language/final[final specifier]
 * {cpp} reference - https://en.cppreference.com/w/cpp/language/override[override specifier]
-* {cpp} reference - https://en.cppreference.com/w/cpp/keyword/module[C++ keyword: module]
-* {cpp} reference - https://en.cppreference.com/w/cpp/keyword/import[C++ keyword: import]
+* {cpp} reference - https://en.cppreference.com/w/cpp/keyword/module[{cpp} keyword: module]
+* {cpp} reference - https://en.cppreference.com/w/cpp/keyword/import[{cpp} keyword: import]

 ifdef::env-github,rspecator-view[]

--- a/rules/S3519/cfamily/rule.adoc
+++ b/rules/S3519/cfamily/rule.adoc
@ -375,8 +375,8 @@ void tar(std::string const &s) {

 === Conference presentations

-* CppCon 2014 - https://youtu.be/V2_80g0eOMc?si=U_qv9iBKI5B3a_EL[Sanitize your C++ code]
-* CppCon 2018 - https://youtu.be/0S0QgQd75Sw?si=AW9mA09L5PEbkqXc[Software Vulnerabilities in C and C++]
+* CppCon 2014 - https://youtu.be/V2_80g0eOMc?si=U_qv9iBKI5B3a_EL[Sanitize your {cpp} code]
+* CppCon 2018 - https://youtu.be/0S0QgQd75Sw?si=AW9mA09L5PEbkqXc[Software Vulnerabilities in C and {cpp}]
 * CppCon 2020 - https://youtu.be/xEzfnbTabyE?si=9yJQkrcRKn6tuPaV[2020: The Year of Sanitizers?]

 === Standards
--- a/rules/S4263/cfamily/rule.adoc
+++ b/rules/S4263/cfamily/rule.adoc
@ -8,7 +8,7 @@ This rule raises an issue whenever the file specified in a ``++#include++`` dire

 == Resources

-* Microsoft Learn - https://learn.microsoft.com/en-us/cpp/preprocessor/hash-include-directive-c-cpp[``++#include++`` directive (C/C++)]
+* Microsoft Learn - https://learn.microsoft.com/en-us/cpp/preprocessor/hash-include-directive-c-cpp[``++#include++`` directive (C/{cpp})]

 ifdef::env-github,rspecator-view[]

--- a/rules/S5273/cfamily/rule.adoc
+++ b/rules/S5273/cfamily/rule.adoc
@ -133,7 +133,7 @@ void bar(const char *src) {

 === Conference presentations

-* CppCon 2018 - https://www.youtube.com/watch?v=0S0QgQd75Sw&ab_channel=CppCon[Software Vulnerabilities in C and C++]
+* CppCon 2018 - https://www.youtube.com/watch?v=0S0QgQd75Sw&ab_channel=CppCon[Software Vulnerabilities in C and {cpp}]

 === Standards

--- a/rules/S5487/cfamily/rule.adoc
+++ b/rules/S5487/cfamily/rule.adoc
@ -211,7 +211,7 @@ void use_and_destroy_initialized()

 === Conference presentations

-* CppCon 2020 - https://youtu.be/A7sVFJLJM-A?si=v76jhmv3XnHExZYU[An Introduction to Multithreading in C++20]
+* CppCon 2020 - https://youtu.be/A7sVFJLJM-A?si=v76jhmv3XnHExZYU[An Introduction to Multithreading in {cpp}20]

 === Related rules

--- a/rules/S6180/cfamily/rule.adoc
+++ b/rules/S6180/cfamily/rule.adoc
@ -43,5 +43,5 @@ if (nullptr == ptr) [[unlikely]] {

 == Resources

-* {cpp} reference - https://en.cppreference.com/w/cpp/language/attributes/likely[C++ attribute: likely, unlikely]
+* {cpp} reference - https://en.cppreference.com/w/cpp/language/attributes/likely[{cpp} attribute: likely, unlikely]

--- a/rules/S6226/cfamily/rule.adoc
+++ b/rules/S6226/cfamily/rule.adoc
@ -58,7 +58,7 @@ This rule does not apply to fields whose class has a non-default alignment.

 == Resources

-* {cpp} reference - https://en.cppreference.com/w/cpp/language/attributes/no_unique_address[C++ attribute: no_unique_address]
+* {cpp} reference - https://en.cppreference.com/w/cpp/language/attributes/no_unique_address[{cpp} attribute: no_unique_address]

 ifdef::env-github,rspecator-view[]
 '''
--- a/rules/S6232/cfamily/rule.adoc
+++ b/rules/S6232/cfamily/rule.adoc
@ -103,7 +103,7 @@ float fastInvSqrt(float number) {

 === Conference presentations

-* CppCon 2019 - https://www.youtube.com/watch?v=_qzMpk-22cc[Type punning in modern C++, Timur Doumler]
+* CppCon 2019 - https://www.youtube.com/watch?v=_qzMpk-22cc[Type punning in modern {cpp}, Timur Doumler]

 === Related rules

--- a/rules/S995/cfamily/rule.adoc
+++ b/rules/S995/cfamily/rule.adoc
@ -67,7 +67,7 @@ void increment (int & value,

 === Articles & blog posts

-* https://isocpp.org/wiki/faq/const-correctness[ISO C++ FAQ about const correctness].
+* https://isocpp.org/wiki/faq/const-correctness[ISO {cpp} FAQ about const correctness].

 === External coding guidelines