Prohibit non-ASCII characters in rule metadata.json files (#1119)

Triggered by the deployment failure that was caused by an invisible Unicode character in a rule's metadata.json.
This PR implements three conceptual changes:
- make the deployment parse error more informative
- prohibit the use of non-ASCII characters in the metadata.json files
- remove the existing non-ASCII characters from the existing rules
This commit is contained in:
Arseniy Zaostrovnykh 2022-07-25 17:19:53 +02:00 committed by GitHub
parent fedead313e
commit 84967d6c25
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
18 changed files with 46 additions and 14 deletions

View File

@ -117,7 +117,12 @@ function getRuleMetadata(srcDir: string, language?: string) {
}
const languageFile = path.join(srcDir, language, 'metadata.json');
if (fs.existsSync(languageFile)) {
return JSON.parse(fs.readFileSync(languageFile, 'utf8'));
try {
return JSON.parse(fs.readFileSync(languageFile, 'utf8'));
} catch (error) {
console.error('When parsing the JSON file ' + languageFile);
throw error;
}
}
return {};
})();

View File

@ -9,6 +9,18 @@ from rspec_tools.errors import RuleNotFoundError
METADATA_FILE_NAME: Final[str] = 'metadata.json'
DESCRIPTION_FILE_NAME: Final[str] = 'rule.html'
def load_metadata_contents(metadata_path):
try:
# Make sure the metadata file contains only ASCII.
# Even though python is fine with Unicode, it might
# break other tools such as the TypeScript deployment script.
return metadata_path.read_text(encoding='ascii')
except:
print('ERROR: Non-ASCII characters in ', metadata_path)
print('The metadata files must contain only ASCII characters.\n\n')
raise
class LanguageSpecificRule:
language_path: Final[Path]
rule: 'GenericRule'
@ -32,10 +44,11 @@ class LanguageSpecificRule:
if self.__metadata is not None:
return self.__metadata
metadata_path = self.language_path.joinpath(METADATA_FILE_NAME)
metadata_contents = load_metadata_contents(metadata_path)
try:
lang_metadata = json.loads(metadata_path.read_bytes())
lang_metadata = json.loads(metadata_contents)
except:
print('Failed to parse ', metadata_path)
print('ERROR: Failed to parse ', metadata_path)
raise
self.__metadata = self.rule.generic_metadata | lang_metadata
@ -73,7 +86,8 @@ class GenericRule:
if self.__generic_metadata is not None:
return self.__generic_metadata
metadata_path = self.rule_path.joinpath(METADATA_FILE_NAME)
self.__generic_metadata = json.loads(metadata_path.read_bytes())
metadata_contents = load_metadata_contents(metadata_path)
self.__generic_metadata = json.loads(metadata_contents)
return self.__generic_metadata

View File

@ -0,0 +1,2 @@
{
}

View File

@ -0,0 +1 @@
A rule with an invisible Unicode character preceding the open '{' in the metadata.json

View File

@ -0,0 +1,2 @@
{
}

View File

@ -0,0 +1,3 @@
{
"title": "Extension methods should not extend \"Object\""
}

View File

@ -0,0 +1 @@
dummy

View File

@ -61,6 +61,10 @@ def test_rule_with_invalid_language(invalid_rules: RulesRepository):
with pytest.raises(RuleValidationError, match=fr'^Rule S502 failed validation for these reasons:\n - Rule scala:S502 has invalid metadata'):
validate_rule_metadata(s502)
def test_rule_with_unicode_in_metadata(invalid_rules: RulesRepository):
s4225 = invalid_rules.get_rule('S4225')
with pytest.raises(UnicodeDecodeError, match=fr'ascii'):
validate_rule_metadata(s4225)
def test_rule_that_is_fully_valid(mockrules: Path):
valid_rule = RulesRepository(rules_path=mockrules).get_rule('S120')

View File

@ -1,5 +1,5 @@
{
"title": "An objects dynamic type should not be used from its constructors or destructor",
"title": "An object's dynamic type should not be used from its constructors or destructor",
"type": "BUG",
"status": "ready",
"remediation": {

View File

@ -1,5 +1,5 @@
{
"title": "Do not use “using namespace” directives in header files",
"title": "Do not use \"using namespace\" directives in header files",
"type": "CODE_SMELL",
"status": "closed",
"remediation": {

View File

@ -1,2 +1,2 @@
{
{
}

View File

@ -1,3 +1,3 @@
{
{
"title": "Extension methods should not extend \"Object\""
}

View File

@ -1,5 +1,5 @@
{
"title": "The statement forming the body of a switch, while, do … while or for statement shall be a compound statement",
"title": "The statement forming the body of a \"switch\", \"while\", \"do {...} while\" or \"for\" statement shall be a compound statement",
"type": "CODE_SMELL",
"status": "ready",
"remediation": {

View File

@ -1,5 +1,5 @@
{
"title": "All if … else if constructs shall be terminated with an else clause",
"title": "All \"if ... else if\" constructs shall be terminated with an \"else \"clause",
"type": "CODE_SMELL",
"status": "ready",
"remediation": {

View File

@ -1,5 +1,5 @@
{
"title": "Case insensitive Unicode regular expressions should enable the “UNICODE_CASE” flag",
"title": "Case insensitive Unicode regular expressions should enable the \"UNICODE_CASE\" flag",
"type": "BUG",
"status": "ready",
"remediation": {

View File

@ -1,5 +1,5 @@
{
"title": "Elements in a container should be erased with “std::erase” or “std::erase_if”",
"title": "Elements in a container should be erased with \"std::erase\" or \"std::erase_if\"",
"type": "CODE_SMELL",
"status": "ready",
"remediation": {

View File

@ -1,5 +1,5 @@
{
"title": "“contains” should be used to check if a key exists in a container",
"title": "\"contains\" should be used to check if a key exists in a container",
"type": "CODE_SMELL",
"status": "ready",
"remediation": {

View File

@ -1,5 +1,5 @@
{
"title": "A “U ” suffix shall be applied to all octal or hexadecimal integer literals of unsigned type",
"title": "A \"U\" suffix shall be applied to all octal or hexadecimal integer literals of unsigned type",
"type": "CODE_SMELL",
"status": "closed",
"remediation": {