Prohibit non-ASCII characters in rule metadata.json files (#1119)
Triggered by the deployment failure that was caused by an invisible Unicode character in a rule's metadata.json. This PR implements three conceptual changes: - make the deployment parse error more informative - prohibit the use of non-ASCII characters in the metadata.json files - remove the existing non-ASCII characters from the existing rules
This commit is contained in:
parent
fedead313e
commit
84967d6c25
@ -117,7 +117,12 @@ function getRuleMetadata(srcDir: string, language?: string) {
|
||||
}
|
||||
const languageFile = path.join(srcDir, language, 'metadata.json');
|
||||
if (fs.existsSync(languageFile)) {
|
||||
return JSON.parse(fs.readFileSync(languageFile, 'utf8'));
|
||||
try {
|
||||
return JSON.parse(fs.readFileSync(languageFile, 'utf8'));
|
||||
} catch (error) {
|
||||
console.error('When parsing the JSON file ' + languageFile);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
return {};
|
||||
})();
|
||||
|
@ -9,6 +9,18 @@ from rspec_tools.errors import RuleNotFoundError
|
||||
METADATA_FILE_NAME: Final[str] = 'metadata.json'
|
||||
DESCRIPTION_FILE_NAME: Final[str] = 'rule.html'
|
||||
|
||||
def load_metadata_contents(metadata_path):
|
||||
try:
|
||||
# Make sure the metadata file contains only ASCII.
|
||||
# Even though python is fine with Unicode, it might
|
||||
# break other tools such as the TypeScript deployment script.
|
||||
return metadata_path.read_text(encoding='ascii')
|
||||
except:
|
||||
print('ERROR: Non-ASCII characters in ', metadata_path)
|
||||
print('The metadata files must contain only ASCII characters.\n\n')
|
||||
raise
|
||||
|
||||
|
||||
class LanguageSpecificRule:
|
||||
language_path: Final[Path]
|
||||
rule: 'GenericRule'
|
||||
@ -32,10 +44,11 @@ class LanguageSpecificRule:
|
||||
if self.__metadata is not None:
|
||||
return self.__metadata
|
||||
metadata_path = self.language_path.joinpath(METADATA_FILE_NAME)
|
||||
metadata_contents = load_metadata_contents(metadata_path)
|
||||
try:
|
||||
lang_metadata = json.loads(metadata_path.read_bytes())
|
||||
lang_metadata = json.loads(metadata_contents)
|
||||
except:
|
||||
print('Failed to parse ', metadata_path)
|
||||
print('ERROR: Failed to parse ', metadata_path)
|
||||
raise
|
||||
|
||||
self.__metadata = self.rule.generic_metadata | lang_metadata
|
||||
@ -73,7 +86,8 @@ class GenericRule:
|
||||
if self.__generic_metadata is not None:
|
||||
return self.__generic_metadata
|
||||
metadata_path = self.rule_path.joinpath(METADATA_FILE_NAME)
|
||||
self.__generic_metadata = json.loads(metadata_path.read_bytes())
|
||||
metadata_contents = load_metadata_contents(metadata_path)
|
||||
self.__generic_metadata = json.loads(metadata_contents)
|
||||
return self.__generic_metadata
|
||||
|
||||
|
||||
|
@ -0,0 +1,2 @@
|
||||
{
|
||||
}
|
@ -0,0 +1 @@
|
||||
A rule with an invisible Unicode character preceding the open '{' in the metadata.json
|
@ -0,0 +1,2 @@
|
||||
{
|
||||
}
|
@ -0,0 +1,3 @@
|
||||
{
|
||||
"title": "Extension methods should not extend \"Object\""
|
||||
}
|
@ -0,0 +1 @@
|
||||
dummy
|
@ -61,6 +61,10 @@ def test_rule_with_invalid_language(invalid_rules: RulesRepository):
|
||||
with pytest.raises(RuleValidationError, match=fr'^Rule S502 failed validation for these reasons:\n - Rule scala:S502 has invalid metadata'):
|
||||
validate_rule_metadata(s502)
|
||||
|
||||
def test_rule_with_unicode_in_metadata(invalid_rules: RulesRepository):
|
||||
s4225 = invalid_rules.get_rule('S4225')
|
||||
with pytest.raises(UnicodeDecodeError, match=fr'ascii'):
|
||||
validate_rule_metadata(s4225)
|
||||
|
||||
def test_rule_that_is_fully_valid(mockrules: Path):
|
||||
valid_rule = RulesRepository(rules_path=mockrules).get_rule('S120')
|
||||
|
@ -1,5 +1,5 @@
|
||||
{
|
||||
"title": "An object’s dynamic type should not be used from its constructors or destructor",
|
||||
"title": "An object's dynamic type should not be used from its constructors or destructor",
|
||||
"type": "BUG",
|
||||
"status": "ready",
|
||||
"remediation": {
|
||||
|
@ -1,5 +1,5 @@
|
||||
{
|
||||
"title": "Do not use “using namespace” directives in header files",
|
||||
"title": "Do not use \"using namespace\" directives in header files",
|
||||
"type": "CODE_SMELL",
|
||||
"status": "closed",
|
||||
"remediation": {
|
||||
|
@ -1,2 +1,2 @@
|
||||
{
|
||||
{
|
||||
}
|
||||
|
@ -1,3 +1,3 @@
|
||||
{
|
||||
{
|
||||
"title": "Extension methods should not extend \"Object\""
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
{
|
||||
"title": "The statement forming the body of a switch, while, do … while or for statement shall be a compound statement",
|
||||
"title": "The statement forming the body of a \"switch\", \"while\", \"do {...} while\" or \"for\" statement shall be a compound statement",
|
||||
"type": "CODE_SMELL",
|
||||
"status": "ready",
|
||||
"remediation": {
|
||||
|
@ -1,5 +1,5 @@
|
||||
{
|
||||
"title": "All if … else if constructs shall be terminated with an else clause",
|
||||
"title": "All \"if ... else if\" constructs shall be terminated with an \"else \"clause",
|
||||
"type": "CODE_SMELL",
|
||||
"status": "ready",
|
||||
"remediation": {
|
||||
|
@ -1,5 +1,5 @@
|
||||
{
|
||||
"title": "Case insensitive Unicode regular expressions should enable the “UNICODE_CASE” flag",
|
||||
"title": "Case insensitive Unicode regular expressions should enable the \"UNICODE_CASE\" flag",
|
||||
"type": "BUG",
|
||||
"status": "ready",
|
||||
"remediation": {
|
||||
|
@ -1,5 +1,5 @@
|
||||
{
|
||||
"title": "Elements in a container should be erased with “std::erase” or “std::erase_if”",
|
||||
"title": "Elements in a container should be erased with \"std::erase\" or \"std::erase_if\"",
|
||||
"type": "CODE_SMELL",
|
||||
"status": "ready",
|
||||
"remediation": {
|
||||
|
@ -1,5 +1,5 @@
|
||||
{
|
||||
"title": "“contains” should be used to check if a key exists in a container",
|
||||
"title": "\"contains\" should be used to check if a key exists in a container",
|
||||
"type": "CODE_SMELL",
|
||||
"status": "ready",
|
||||
"remediation": {
|
||||
|
@ -1,5 +1,5 @@
|
||||
{
|
||||
"title": "A “U ” suffix shall be applied to all octal or hexadecimal integer literals of unsigned type",
|
||||
"title": "A \"U\" suffix shall be applied to all octal or hexadecimal integer literals of unsigned type",
|
||||
"type": "CODE_SMELL",
|
||||
"status": "closed",
|
||||
"remediation": {
|
||||
|
Loading…
x
Reference in New Issue
Block a user