Prohibit non-ASCII characters in rule metadata.json files (#1119)

Triggered by the deployment failure that was caused by an invisible Unicode character in a rule's metadata.json. This PR implements three conceptual changes: - make the deployment parse error more informative - prohibit the use of non-ASCII characters in the metadata.json files - remove the existing non-ASCII characters from the existing rules
2022-07-25 17:19:53 +02:00 · 2022-07-25 17:19:53 +02:00 · 84967d6c25
commit 84967d6c25
parent fedead313e
18 changed files with 46 additions and 14 deletions
--- a/frontend/src/deployment/metadata.ts
+++ b/frontend/src/deployment/metadata.ts
@ -117,7 +117,12 @@ function getRuleMetadata(srcDir: string, language?: string) {
    }
    const languageFile = path.join(srcDir, language, 'metadata.json');
    if (fs.existsSync(languageFile)) {
-      return JSON.parse(fs.readFileSync(languageFile, 'utf8'));
+      try {
+        return JSON.parse(fs.readFileSync(languageFile, 'utf8'));
+      } catch (error) {
+        console.error('When parsing the JSON file ' + languageFile);
+        throw error;
+      }
    }
    return {};
  })();
--- a/rspec-tools/rspec_tools/rules.py
+++ b/rspec-tools/rspec_tools/rules.py
@ -9,6 +9,18 @@ from rspec_tools.errors import RuleNotFoundError
 METADATA_FILE_NAME: Final[str] = 'metadata.json'
 DESCRIPTION_FILE_NAME: Final[str] = 'rule.html'

+def load_metadata_contents(metadata_path):
+  try:
+    # Make sure the metadata file contains only ASCII.
+    # Even though python is fine with Unicode, it might
+    # break other tools such as the TypeScript deployment script.
+    return metadata_path.read_text(encoding='ascii')
+  except:
+    print('ERROR: Non-ASCII characters in ', metadata_path)
+    print('The metadata files must contain only ASCII characters.\n\n')
+    raise
+
+
 class LanguageSpecificRule:
  language_path: Final[Path]
  rule: 'GenericRule'
@ -32,10 +44,11 @@ class LanguageSpecificRule:
    if self.__metadata is not None:
      return self.__metadata
    metadata_path = self.language_path.joinpath(METADATA_FILE_NAME)
+    metadata_contents = load_metadata_contents(metadata_path)
    try:
-      lang_metadata = json.loads(metadata_path.read_bytes())
+      lang_metadata = json.loads(metadata_contents)
    except:
-      print('Failed to parse ', metadata_path)
+      print('ERROR: Failed to parse ', metadata_path)
      raise

    self.__metadata = self.rule.generic_metadata | lang_metadata
@ -73,7 +86,8 @@ class GenericRule:
    if self.__generic_metadata is not None:
      return self.__generic_metadata
    metadata_path = self.rule_path.joinpath(METADATA_FILE_NAME)
-    self.__generic_metadata = json.loads(metadata_path.read_bytes())
+    metadata_contents = load_metadata_contents(metadata_path)
+    self.__generic_metadata = json.loads(metadata_contents)
    return self.__generic_metadata


--- a/rspec-tools/tests/resources/invalid-rules/S4225/csharp/metadata.json
+++ b/rspec-tools/tests/resources/invalid-rules/S4225/csharp/metadata.json
@ -0,0 +1,2 @@
+{
+}
--- a/rspec-tools/tests/resources/invalid-rules/S4225/csharp/rule.adoc
+++ b/rspec-tools/tests/resources/invalid-rules/S4225/csharp/rule.adoc
@ -0,0 +1 @@
+A rule with an invisible Unicode character preceding the open '{' in the metadata.json
--- a/rspec-tools/tests/resources/invalid-rules/S4225/metadata.json
+++ b/rspec-tools/tests/resources/invalid-rules/S4225/metadata.json
@ -0,0 +1,2 @@
+{
+}
--- a/rspec-tools/tests/resources/invalid-rules/S4225/vbnet/metadata.json
+++ b/rspec-tools/tests/resources/invalid-rules/S4225/vbnet/metadata.json
@ -0,0 +1,3 @@
+{
+  "title": "Extension methods should not extend \"Object\""
+}
--- a/rspec-tools/tests/resources/invalid-rules/S4225/vbnet/rule.adoc
+++ b/rspec-tools/tests/resources/invalid-rules/S4225/vbnet/rule.adoc
@ -0,0 +1 @@
+dummy
--- a/rspec-tools/tests/validation/test_metadata_validation.py
+++ b/rspec-tools/tests/validation/test_metadata_validation.py
@ -61,6 +61,10 @@ def test_rule_with_invalid_language(invalid_rules: RulesRepository):
  with pytest.raises(RuleValidationError, match=fr'^Rule S502 failed validation for these reasons:\n - Rule scala:S502 has invalid metadata'):
    validate_rule_metadata(s502)

+def test_rule_with_unicode_in_metadata(invalid_rules: RulesRepository):
+  s4225 = invalid_rules.get_rule('S4225')
+  with pytest.raises(UnicodeDecodeError, match=fr'ascii'):
+    validate_rule_metadata(s4225)

 def test_rule_that_is_fully_valid(mockrules: Path):
  valid_rule = RulesRepository(rules_path=mockrules).get_rule('S120')
--- a/rules/S1019/cfamily/metadata.json
+++ b/rules/S1019/cfamily/metadata.json
@ -1,5 +1,5 @@
 {
-  "title": "An object’s dynamic type should not be used from its constructors or destructor",
+  "title": "An object's dynamic type should not be used from its constructors or destructor",
  "type": "BUG",
  "status": "ready",
  "remediation": {
--- a/rules/S1249/cfamily/metadata.json
+++ b/rules/S1249/cfamily/metadata.json
@ -1,5 +1,5 @@
 {
-  "title": "Do not use “using namespace” directives in header files",
+  "title": "Do not use \"using namespace\" directives in header files",
  "type": "CODE_SMELL",
  "status": "closed",
  "remediation": {
--- a/rules/S4225/csharp/metadata.json
+++ b/rules/S4225/csharp/metadata.json
@ -1,2 +1,2 @@
-{
+{
 }
--- a/rules/S4225/vbnet/metadata.json
+++ b/rules/S4225/vbnet/metadata.json
@ -1,3 +1,3 @@
-{
+{
  "title": "Extension methods should not extend \"Object\""
 }
--- a/rules/S5305/cfamily/metadata.json
+++ b/rules/S5305/cfamily/metadata.json
@ -1,5 +1,5 @@
 {
-  "title": "The statement forming the body of a switch, while, do … while or for statement shall be a compound statement",
+  "title": "The statement forming the body of a \"switch\", \"while\", \"do {...} while\" or \"for\" statement shall be a compound statement",
  "type": "CODE_SMELL",
  "status": "ready",
  "remediation": {
--- a/rules/S5307/cfamily/metadata.json
+++ b/rules/S5307/cfamily/metadata.json
@ -1,5 +1,5 @@
 {
-  "title": "All if … else if constructs shall be terminated with an else clause",
+  "title": "All \"if ... else if\" constructs shall be terminated with an \"else \"clause",
  "type": "CODE_SMELL",
  "status": "ready",
  "remediation": {
--- a/rules/S5866/java/metadata.json
+++ b/rules/S5866/java/metadata.json
@ -1,5 +1,5 @@
 {
-  "title": "Case insensitive Unicode regular expressions should enable the “UNICODE_CASE” flag",
+  "title": "Case insensitive Unicode regular expressions should enable the \"UNICODE_CASE\" flag",
  "type": "BUG",
  "status": "ready",
  "remediation": {
--- a/rules/S6165/cfamily/metadata.json
+++ b/rules/S6165/cfamily/metadata.json
@ -1,5 +1,5 @@
 {
-  "title": "Elements in a container should be erased with “std::erase” or “std::erase_if”",
+  "title": "Elements in a container should be erased with \"std::erase\" or \"std::erase_if\"",
  "type": "CODE_SMELL",
  "status": "ready",
  "remediation": {
--- a/rules/S6171/cfamily/metadata.json
+++ b/rules/S6171/cfamily/metadata.json
@ -1,5 +1,5 @@
 {
-  "title": "“contains” should be used to check if a key exists in a container",
+  "title": "\"contains\" should be used to check if a key exists in a container",
  "type": "CODE_SMELL",
  "status": "ready",
  "remediation": {
--- a/rules/S816/cfamily/metadata.json
+++ b/rules/S816/cfamily/metadata.json
@ -1,5 +1,5 @@
 {
-  "title": "A “U ” suffix shall be applied to all octal or hexadecimal integer literals of unsigned type",
+  "title": "A \"U\" suffix shall be applied to all octal or hexadecimal integer literals of unsigned type",
  "type": "CODE_SMELL",
  "status": "closed",
  "remediation": {
				`@ -0,0 +1 @@`
				`A rule with an invisible Unicode character preceding the open '{' in the metadata.json`