RULEAPI-682: Index multiple types and rules with no languages

* Generate description and metadata for rules with no language, so that they get indexed.
* Index rules with different types in language specializations.
* Improve validation to reject new rules with no language specialization (i.e. only a predefined set of such rules is allowed because they were imported from Jira and kept for historical purposes).
* Write smaller JSON files, reduce their size by 30%.
* Improve test coverage of CLI application.
This commit is contained in:
Marco Antognini 2022-01-28 09:51:13 +01:00 committed by GitHub
parent f0a6ea5537
commit b2b116a8e2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
19 changed files with 825 additions and 244 deletions

View File

@ -78,6 +78,8 @@ validate_metadata_task:
dockerfile: ci/Dockerfile dockerfile: ci/Dockerfile
cpu: 1 cpu: 1
memory: 1G memory: 1G
env:
CIRRUS_CLONE_DEPTH: 0
metadata_tests_script: metadata_tests_script:
- ./ci/validate_metadata.sh - ./ci/validate_metadata.sh

View File

@ -1,7 +1,25 @@
# Validate metadata #!/bin/bash
cd rspec-tools set -ueo pipefail
pipenv install
pipenv run rspec-tools validate-rules-metadata git fetch --quiet "${CIRRUS_DEFAULT_ORIGIN:-origin}" "${CIRRUS_DEFAULT_BRANCH:-master}"
if [[ $? -ne 0 ]]; then base="$(git merge-base FETCH_HEAD HEAD)"
exit 1 echo "Comparing against the merge-base: ${base}"
if ! git diff --name-only --exit-code "${base}" -- rspec-tools/
then
# Revalidate all rules
affected_rules="$(basename --multiple rules/*)"
else
affected_rules="$(git diff --name-only "${base}" -- rules/ | sed -Ee 's#rules/(S[0-9]+)/.*#\1#' | sort -u)"
fi
# Turn affected_rules into an array, for proper handling of spaces:
# one line is one element in the array
readarray -t affected_rules < <(echo "${affected_rules}")
# Validate metadata
if [[ "${#affected_rules[@]}" -gt 0 ]]
then
cd rspec-tools
pipenv install
pipenv run rspec-tools validate-rules-metadata "${affected_rules[@]}"
fi fi

View File

@ -5,31 +5,49 @@ import { generateRulesDescription } from '../description';
import { withTestDir, createFiles } from '../testutils'; import { withTestDir, createFiles } from '../testutils';
describe('description generation', () => { describe('description generation', () => {
test('generates html from asciidoc', () => { test('generates html from asciidoc', () => {
return withTestDir((srcPath) => { return withTestDir((srcPath) => {
createFiles(srcPath, { createFiles(srcPath, {
'S100/description.adoc': 'Generic content', 'S100/rule.adoc': 'Generic content',
'S100/java/rule.adoc': 'S100/java/rule.adoc': [
['include::../description.adoc[]', 'include::../rule.adoc[]',
'Specific content'].join('\n') 'Specific content',
].join('\n'),
'S501/rule.adoc': 'Generic content, no active language',
}); });
return withTestDir(async (dstPath) => { return withTestDir(async (dstPath) => {
generateRulesDescription(srcPath, dstPath); generateRulesDescription(srcPath, dstPath);
const ruleHtml = fs.readFileSync(path.join(dstPath, 'S100', 'java-description.html')); const s100Java = path.join(dstPath, 'S100', 'java-description.html');
expect(ruleHtml.toString()).toEqual( expect(fs.existsSync(s100Java)).toBeTruthy();
[ const htmlS100Java = fs.readFileSync(s100Java);
'<div class="sect1">', expect(htmlS100Java.toString()).toEqual([
'<h2 id="_description">Description</h2>', '<div class="sect1">',
'<div class="sectionbody">', '<h2 id="_description">Description</h2>',
'<div class="paragraph">', '<div class="sectionbody">',
'<p>Generic content', '<div class="paragraph">',
'Specific content</p>', '<p>Generic content',
'</div>', 'Specific content</p>',
'</div>', '</div>',
'</div>' '</div>',
].join('\n') '</div>',
); ].join('\n'));
const s501Default = path.join(dstPath, 'S501', 'default-description.html');
expect(fs.existsSync(s501Default)).toBeTruthy();
const htmlS501Default = fs.readFileSync(s501Default);
expect(htmlS501Default.toString()).toEqual([
'<div class="sect1">',
'<h2 id="_description">Description</h2>',
'<div class="sectionbody">',
'<div class="paragraph">',
'<p>Generic content, no active language</p>',
'</div>',
'</div>',
'</div>',
].join('\n'));
}); });
}); });
}); });

View File

@ -72,6 +72,52 @@ describe('metadata generation', () => {
}); });
}); });
test('computes rule types correctly', () => {
return withTestDir((srcPath) => {
createFiles(srcPath, {
'S100/metadata.json': JSON.stringify({
title: 'Rule S100',
type: 'CODE_SMELL',
}),
'S100/java/metadata.json': JSON.stringify({
title: 'Java Rule S100',
}),
'S100/python/metadata.json': JSON.stringify({
type: 'CODE_SMELL',
}),
'S100/cfamily/metadata.json': JSON.stringify({
type: 'BUG',
}),
});
return withTestDir(async (dstPath) => {
generateRulesMetadata(srcPath, dstPath);
const javaStrMetadata = fs.readFileSync(`${dstPath}/S100/java-metadata.json`);
const pythonStrMetadata = fs.readFileSync(`${dstPath}/S100/python-metadata.json`);
const cfamilyStrMetadata = fs.readFileSync(`${dstPath}/S100/cfamily-metadata.json`);
const javaMetadata = JSON.parse(javaStrMetadata.toString());
const pythonMetadata = JSON.parse(pythonStrMetadata.toString());
const cfamilyMetadata = JSON.parse(cfamilyStrMetadata.toString());
expect(javaMetadata).toMatchObject({
title: 'Java Rule S100',
type: 'CODE_SMELL',
});
expect(pythonMetadata).toMatchObject({
title: 'Rule S100',
type: 'CODE_SMELL',
});
expect(cfamilyMetadata).toMatchObject({
title: 'Rule S100',
type: 'BUG',
});
});
});
});
test('generates only requested rules if a list of rule is provided', () => { test('generates only requested rules if a list of rule is provided', () => {
return withTestDir((srcPath) => { return withTestDir((srcPath) => {
createFiles(srcPath, { createFiles(srcPath, {
@ -125,7 +171,7 @@ describe('metadata generation', () => {
}); });
}); });
test('generate test metadata', () => { test('generates metadata for active rules', () => {
return withTestDir(async (dstPath) => { return withTestDir(async (dstPath) => {
generateRulesMetadata(path.join(__dirname, 'resources', 'rules'), dstPath); generateRulesMetadata(path.join(__dirname, 'resources', 'rules'), dstPath);
const rules = fs.readdirSync(dstPath); const rules = fs.readdirSync(dstPath);
@ -145,4 +191,91 @@ describe('metadata generation', () => {
expect(treated).toBe(9); expect(treated).toBe(9);
}); });
}); });
test('generates metadata for closed rules', () => {
return withTestDir(srcPath => {
createFiles(srcPath, {
'S01/metadata.json': JSON.stringify({
title: 'Rule is closed and has no language-specific specification',
type: 'CODE_SMELL',
status: 'closed',
sqKey: 'S01',
extra: {
legacyKeys: ['OldS01'],
},
}),
'S02/metadata.json': JSON.stringify({
title: 'Rule is closed and has one closed language-specific specification',
type: 'CODE_SMELL',
status: 'closed',
sqKey: 'S02',
}),
'S02/cfamily/metadata.json': JSON.stringify({
title: 'Language specification is closed',
status: 'closed',
extra: {
legacyKeys: ['OldS02'],
},
}),
});
return withTestDir(async dstPath => {
generateRulesMetadata(srcPath, dstPath);
const rules = fs.readdirSync(dstPath).sort();
expect(rules).toEqual(['S01', 'S02'].sort());
{
const rule = 'S01';
const rulePath = path.join(dstPath, rule);
// Verify that the expected files are generated and no others
const entries = fs.readdirSync(rulePath).sort();
expect(entries).toEqual(['default-metadata.json'].sort());
// Check the top-level metadata
const defaultFile = path.join(rulePath, 'default-metadata.json');
const defaultData = JSON.parse(fs.readFileSync(defaultFile, 'utf8'));
expect(defaultData).toMatchObject({
title: 'Rule is closed and has no language-specific specification',
type: 'CODE_SMELL',
status: 'closed',
languagesSupport: [],
allKeys: ['S01', 'OldS01'],
});
}
{
const rule = 'S02';
const rulePath = path.join(dstPath, rule);
// Verify that the expected files are generated and no others
const entries = fs.readdirSync(rulePath).sort();
expect(entries).toEqual(['default-metadata.json', 'cfamily-metadata.json'].sort());
// Check the top-level metadata
const defaultFile = path.join(rulePath, 'default-metadata.json');
const defaultData = JSON.parse(fs.readFileSync(defaultFile, 'utf8'));
// Generic data is overriden by the first language-specific specification.
expect(defaultData).toMatchObject({
title: 'Language specification is closed',
type: 'CODE_SMELL',
status: 'closed',
languagesSupport: [{ name: 'cfamily', status: 'closed', }],
allKeys: ['S02', 'OldS02'],
});
// Check the language-specific metadata
const cfamilyFile = path.join(rulePath, 'cfamily-metadata.json');
const cfamilyData = JSON.parse(fs.readFileSync(cfamilyFile, 'utf8'));
expect(cfamilyData).toMatchObject({
title: 'Language specification is closed',
type: 'CODE_SMELL',
status: 'closed',
languagesSupport: [{ name: 'cfamily', status: 'closed', }],
allKeys: ['S02', 'OldS02'],
});
}
});
});
});
}); });

View File

@ -2,6 +2,12 @@ import path from 'path';
import lunr from 'lunr'; import lunr from 'lunr';
import { buildSearchIndex, buildIndexStore, DESCRIPTION_SPLIT_REGEX } from '../searchIndex'; import { buildSearchIndex, buildIndexStore, DESCRIPTION_SPLIT_REGEX } from '../searchIndex';
import { withTestDir, createFiles } from '../testutils';
import { IndexStore } from '../../types/IndexStore';
import {
addFilterForKeysTitlesDescriptions, addFilterForLanguages,
addFilterForQualityProfiles, addFilterForTags, addFilterForTypes
} from '../../utils/useSearch';
describe('index store generation', () => { describe('index store generation', () => {
@ -12,7 +18,7 @@ describe('index store generation', () => {
expect(ruleS3457).toMatchObject({ expect(ruleS3457).toMatchObject({
id: 'S3457', id: 'S3457',
type: 'CODE_SMELL', types: ['CODE_SMELL'],
supportedLanguages: [ supportedLanguages: [
{ "name": "cfamily", "status": "ready", }, { "name": "cfamily", "status": "ready", },
{ "name": "csharp", "status": "ready", }, { "name": "csharp", "status": "ready", },
@ -39,6 +45,84 @@ describe('index store generation', () => {
expect(ruleS3457.descriptions).toEqual(expect.arrayContaining(expectedWords)); expect(ruleS3457.descriptions).toEqual(expect.arrayContaining(expectedWords));
}); });
test('computes types correctly', () => {
return withTestDir(async rulesPath => {
createFiles(rulesPath, {
'S100/default-metadata.json': JSON.stringify({
title: 'Rule S100',
type: 'CODE_SMELL',
}),
'S100/default-description.html': 'Description',
'S100/java-metadata.json': JSON.stringify({
title: 'Java Rule S100',
type: 'CODE_SMELL',
}),
'S100/java-description.html': 'Description',
'S101/default-metadata.json': JSON.stringify({
title: 'Rule S101',
type: 'CODE_SMELL',
}),
'S101/default-description.html': 'Description',
'S101/java-metadata.json': JSON.stringify({
title: 'Java Rule S101',
type: 'CODE_SMELL',
}),
'S101/java-description.html': 'Description',
'S101/python-metadata.json': JSON.stringify({
title: 'Rule S101',
type: 'VULNERABILITY',
}),
'S101/python-description.html': 'Description',
'S101/cfamily-metadata.json': JSON.stringify({
title: 'Rule S101',
type: 'BUG',
}),
'S101/cfamily-description.html': 'Description',
'S501/default-metadata.json': JSON.stringify({
title: 'Rule S501',
type: 'CODE_SMELL',
}),
'S501/default-description.html': 'Not implemented by any language',
});
const [indexStore, aggregates] = buildIndexStore(rulesPath);
expect(aggregates.langs).toEqual({ 'cfamily': 1, 'default': 3, 'java': 2, 'python': 1 });
const ruleS100 = indexStore['S100'];
expect(ruleS100.types.sort()).toEqual(['CODE_SMELL']);
const ruleS101 = indexStore['S101'];
expect(ruleS101.types.sort()).toEqual(['BUG', 'CODE_SMELL', 'VULNERABILITY']);
const ruleS501 = indexStore['S501'];
expect(ruleS501.types.sort()).toEqual(['CODE_SMELL']);
const searchIndex = createIndex(indexStore);
expect(searchIndex.search('S501')).toHaveLength(1);
expect(searchIndex.search('titles:S501')).toHaveLength(1);
expect(searchIndex.search('titles:*')).toHaveLength(3);
expect(searchIndex.search('types:*')).toHaveLength(3);
// For types, the wildcard in the search query is required to succeed!
// This may be related to how tokenization is handled (or not done for arrays),
// but the actual reason doesn't matter for this test.
expect(searchIndex.search('BUG')).toHaveLength(1);
expect(searchIndex.search('types:BUG')).toHaveLength(1);
expect(searchIndex.search('*SMELL')).toHaveLength(3);
expect(searchIndex.search('types:*SMELL')).toHaveLength(3);
expect(searchIndex.search('*VULNERABILITY')).toHaveLength(1);
expect(searchIndex.search('types:*VULNERABILITY')).toHaveLength(1);
expect(findRulesByType(searchIndex, '*').sort()).toEqual(['S100', 'S101', 'S501']);
expect(findRulesByType(searchIndex, 'BUG').sort()).toEqual(['S101']);
expect(findRulesByType(searchIndex, 'CODE_SMELL').sort()).toEqual(['S100', 'S101', 'S501']);
expect(findRulesByType(searchIndex, 'VULNERABILITY').sort()).toEqual(['S101']);
});
});
test('collects all tags', () => { test('collects all tags', () => {
const rulesPath = path.join(__dirname, 'resources', 'metadata'); const rulesPath = path.join(__dirname, 'resources', 'metadata');
const [_, aggregates] = buildIndexStore(rulesPath); const [_, aggregates] = buildIndexStore(rulesPath);
@ -79,116 +163,128 @@ describe('index store generation', () => {
}); });
}); });
function createIndex() {
const rulesPath = path.join(__dirname, 'resources', 'metadata');
const [indexStore, _] = buildIndexStore(rulesPath);
// Hack to avoid warnings when 'selectivePipeline' is already registered
if ('selectivePipeline' in (lunr.Pipeline as any).registeredFunctions) {
delete (lunr.Pipeline as any).registeredFunctions['selectivePipeline']
}
return buildSearchIndex(indexStore);
}
describe('search index enables search by title and description words', () => { describe('search index enables search by title and description words', () => {
test('searches in rule keys', () => { test('searches in rule keys', () => {
const searchIndex = createIndex(); const searchIndex = createIndex();
const searchesS3457 = search(searchIndex, 'S3457', 'all_keys'); const searchesS3457 = findRuleByQuery(searchIndex, 'S3457');
expect(searchesS3457).toEqual(['S3457']); expect(searchesS3457).toEqual(['S3457']);
const searchesS987 = search(searchIndex, 'ppincludesignal', 'all_keys'); const searchesS987 = findRuleByQuery(searchIndex, 'ppincludesignal');
expect(searchesS987).toEqual(['S987']); expect(searchesS987).toEqual(['S987']);
const searchesS1000 = search(searchIndex, 'UnnamedNamespaceInHeader', 'all_keys'); const searchesS1000 = findRuleByQuery(searchIndex, 'UnnamedNamespaceInHeader');
expect(searchesS1000).toEqual(['S1000']); expect(searchesS1000).toEqual(['S1000']);
}); });
test('searches in rule description', () => { test('searches in rule description', () => {
const searchIndex = createIndex(); const searchIndex = createIndex();
const searchesS3457 = search(searchIndex, 'Because printf-style format', 'descriptions'); const searchesS3457 = findRuleByQuery(searchIndex, 'Because printf-style format');
expect(searchesS3457).toEqual(['S3457']); expect(searchesS3457).toEqual(['S3457']);
const searchesS987 = search(searchIndex, 'Signal handling contains', 'descriptions'); const searchesS987 = findRuleByQuery(searchIndex, 'Signal handling contains');
expect(searchesS987).toEqual(['S987']); expect(searchesS987).toEqual(['S987']);
const searchesUnknown = search(searchIndex, 'Unknown description', 'descriptions'); const searchesUnknown = findRuleByQuery(searchIndex, 'Unknown description');
expect(searchesUnknown).toHaveLength(0); expect(searchesUnknown).toHaveLength(0);
const searchesBothRules = search(searchIndex, 'Noncompliant Code Example', 'descriptions'); const searchesBothRules = findRuleByQuery(searchIndex, 'Noncompliant Code Example');
expect(searchesBothRules.sort()).toEqual(['S1000', 'S3457', 'S987'].sort()); expect(searchesBothRules).toEqual(['S1000', 'S3457', 'S987'].sort());
const searchesRuleMentions = search(searchIndex, 'S1000', 'descriptions'); const searchesRuleMentions = findRuleByQuery(searchIndex, 'S1000');
expect(searchesRuleMentions).toEqual(['S987'].sort()); expect(searchesRuleMentions).toEqual(['S987', 'S1000'].sort());
}); });
test('searches in rule title', () => { test('searches in rule title', () => {
const searchIndex = createIndex(); const searchIndex = createIndex();
const searchesS3457 = search(searchIndex, 'Composite format strings', 'titles'); const searchesS3457 = findRuleByQuery(searchIndex, 'Composite format strings');
expect(searchesS3457).toEqual(['S3457']); expect(searchesS3457).toEqual(['S3457']);
const searchesS987 = search(searchIndex, 'signal.h used', 'titles'); const searchesS987 = findRuleByQuery(searchIndex, 'signal.h used');
expect(searchesS987).toEqual(['S987']); expect(searchesS987).toEqual(['S987']);
const searchesUnknown = search(searchIndex, 'unknown title', 'titles'); const searchesUnknown = findRuleByQuery(searchIndex, 'unknown title');
expect(searchesUnknown).toHaveLength(0); expect(searchesUnknown).toHaveLength(0);
const searchesBothRules = search(searchIndex, 'be should', 'titles'); const searchesBothRules = findRuleByQuery(searchIndex, 'should be used');
expect(searchesBothRules.sort()).toEqual(['S3457', 'S987'].sort()); expect(searchesBothRules).toEqual(['S3457', 'S987'].sort());
}); });
function search(index: lunr.Index, query: string, field: string): string[] {
const hits = index.query(q => {
lunr.tokenizer(query).forEach(token => {
q.term(token, {fields: [field], presence: lunr.Query.presence.REQUIRED})
})
});
return hits.map(({ ref }) => ref)
}
}); });
describe('search index enables search by tags, quality profiles and languages', () => { describe('search index enables search by tags, quality profiles and languages', () => {
test('searches in rule tags', () => { test('searches in rule tags', () => {
const searchIndex = createIndex(); const searchIndex = createIndex();
const searchesS3457 = search(searchIndex, 'cert', 'tags'); const searchesS3457 = findRulesByTags(searchIndex, ['cert']);
expect(searchesS3457).toHaveLength(2); expect(searchesS3457).toHaveLength(2);
expect(searchesS3457).toContain('S1000'); expect(searchesS3457).toContain('S1000');
expect(searchesS3457).toContain('S3457'); expect(searchesS3457).toContain('S3457');
const searchesS987 = search(searchIndex, 'based-on-misra', 'tags'); const searchesS987 = findRulesByTags(searchIndex, ['based-on-misra', 'lock-in']);
expect(searchesS987).toEqual(['S987']); expect(searchesS987).toEqual(['S987']);
const searchesUnknown = search(searchIndex, 'unknown tag', 'tags'); const searchesUnknown = findRulesByTags(searchIndex, ['unknown tag']);
expect(searchesUnknown).toHaveLength(0); expect(searchesUnknown).toHaveLength(0);
}); });
test('searches in rule quality profiles', () => { test('searches in rule quality profiles', () => {
const searchIndex = createIndex(); const searchIndex = createIndex();
const searchesSonarWay = search(searchIndex, 'sonar way', 'qualityProfiles'); const searchesSonarWay = findRulesByProfile(searchIndex, 'sonar way');
expect(searchesSonarWay).toEqual(['S1000', 'S3457']); expect(searchesSonarWay).toEqual(['S1000', 'S3457']);
const filtersAll = search(searchIndex, 'non-existent', 'qualityProfiles'); const filtersAll = findRulesByProfile(searchIndex, 'non-existent');
expect(filtersAll).toEqual([]); expect(filtersAll).toEqual([]);
}); });
test('filter per language', () => { test('filter per language', () => {
const searchIndex = createIndex(); const searchIndex = createIndex();
const csharpRules = search(searchIndex, 'csharp', 'languages'); const csharpRules = findRulesByLanguage(searchIndex, 'csharp');
expect(csharpRules).toEqual(['S3457']); expect(csharpRules).toEqual(['S3457']);
const cfamilyRules = search(searchIndex, 'cfamily', 'languages'); const cfamilyRules = findRulesByLanguage(searchIndex, 'cfamily');
expect(cfamilyRules.sort()).toEqual(['S987', 'S1000', 'S3457'].sort()); expect(cfamilyRules.sort()).toEqual(['S987', 'S1000', 'S3457'].sort());
}); });
function search(index: lunr.Index, query: string, field: string): string[] {
const hits = index.query(q => {
q.term(query, {
fields: [field],
presence: lunr.Query.presence.REQUIRED,
usePipeline: false
});
});
return hits.map(({ ref }) => ref)
}
}); });
function createIndex(ruleIndexStore?: IndexStore) {
if (!ruleIndexStore) {
const rulesPath = path.join(__dirname, 'resources', 'metadata');
const [indexStore, _] = buildIndexStore(rulesPath);
ruleIndexStore = indexStore;
}
// Hack to avoid warnings when 'selectivePipeline' is already registered
if ('selectivePipeline' in (lunr.Pipeline as any).registeredFunctions) {
delete (lunr.Pipeline as any).registeredFunctions['selectivePipeline']
}
return buildSearchIndex(ruleIndexStore);
}
function findRules<QueryParam>(
index: lunr.Index,
filter: (q: lunr.Query, param: QueryParam) => void,
param: QueryParam
): string[] {
const hits = index.query(q => filter(q, param));
return hits.map(({ ref }) => ref);
}
function findRulesByType(index: lunr.Index, type: string): string[] {
return findRules(index, addFilterForTypes, type);
}
function findRulesByTags(index: lunr.Index, tags: string[]): string[] {
return findRules(index, addFilterForTags, tags);
}
function findRulesByLanguage(index: lunr.Index, language: string): string[] {
return findRules(index, addFilterForLanguages, language);
}
function findRulesByProfile(index: lunr.Index, profile: string): string[] {
return findRules(index, addFilterForQualityProfiles, [profile]);
}
function findRuleByQuery(index: lunr.Index, query: string): string[] {
const rules = findRules(index, addFilterForKeysTitlesDescriptions, query);
return rules.sort();
}

View File

@ -25,32 +25,55 @@ const winstonLogger = asciidoc.LoggerManager.newLogger('WinstonLogger', {
}); });
asciidoc.LoggerManager.setLogger(winstonLogger); asciidoc.LoggerManager.setLogger(winstonLogger);
/**
* Save the given HTML description to disk.
*/
function writeRuleDescription(dstDir: string, filename: string, html: string) {
const file = path.join(dstDir, filename);
fs.writeFileSync(file, html, { encoding: 'utf8' });
}
/**
* Generate the default description for a rule without any language-specific data.
*/
function generateGenericDescription(srcDir: string, dstDir: string) {
const adocFile = getRuleAdoc(srcDir);
const html = generateRuleDescription(adocFile);
writeRuleDescription(dstDir, 'default-description.html', html);
}
/** /**
* Generate rule descriptions (for all relevant languages) and write it in the destination directory. * Generate rule descriptions (for all relevant languages) and write it in the destination directory.
* @param srcDir directory containing the original rule metadata and description. * @param srcDir directory containing the original rule's metadata and description.
* @param dstDir directory where the generated rules metadata and description will be written. * @param dstDir directory where the generated rule's description will be written.
*/ */
export function generateOneRuleDescription(srcDir: string, dstDir: string) { export function generateOneRuleDescription(srcDir: string, dstDir: string) {
fs.mkdirSync(dstDir, { recursive: true }); fs.mkdirSync(dstDir, { recursive: true });
const languages = listSupportedLanguages(srcDir); const languages = listSupportedLanguages(srcDir);
let default_descr_wanted = true; if (languages.length === 0) {
generateGenericDescription(srcDir, dstDir);
return;
}
let isFirstLanguage = true;
for (const language of languages) { for (const language of languages) {
const html = generateRuleDescription(srcDir, language); const adocFile = getRuleAdoc(srcDir, language);
const dstFile = path.join(dstDir, language + '-description.html'); const html = generateRuleDescription(adocFile);
fs.writeFileSync(dstFile, html, {encoding: 'utf8'}); writeRuleDescription(dstDir, language + '-description.html', html);
if (default_descr_wanted) {
const defFile = path.join(dstDir, 'default-description.html'); if (isFirstLanguage) {
fs.writeFileSync(defFile, html, {encoding: 'utf8'}); // Use the first language as the default description.
default_descr_wanted = false; writeRuleDescription(dstDir, 'default-description.html', html);
isFirstLanguage = false;
} }
} }
} }
/** /**
* Generate rules descriptions and write them in the destination directory. * Generate one directory per rule with its HTML description.
* @param srcPath directory containing the original rules metadata and description. * @param srcPath directory containing all the rules subdirectories, with the metadata and descriptions.
* @param dstPath directory where the generated rules metadata and description will be written. * @param dstPath directory where rule directories should be created.
* @param rules an optional list of rules to list. Other rules won't be generated. * @param rules an optional list of rules to process. Other rules won't be generated.
*/ */
export function generateRulesDescription(srcPath: string, dstPath: string, rules?: string[]) { export function generateRulesDescription(srcPath: string, dstPath: string, rules?: string[]) {
for (const { srcDir, dstDir } of getRulesDirectories(srcPath, dstPath, rules)) { for (const { srcDir, dstDir } of getRulesDirectories(srcPath, dstPath, rules)) {
@ -59,32 +82,41 @@ export function generateRulesDescription(srcPath: string, dstPath: string, rules
} }
/** /**
* Generate the description corresponding to one rule and one language. * Retrieve the path to the rule.adoc file for the given rule and optional language.
* @param srcDir rule's source directory. * @param srcDir rule's source directory.
* @param language language for which the metadata should be generated * @param language language for which the metadata should be generated, when provided.
*/ */
function generateRuleDescription(srcDir: string, language: string) { function getRuleAdoc(srcDir: string, language?: string) {
let ruleSrcFile = path.join(srcDir, language, 'rule.adoc'); let ruleSrcFile = language ? path.join(srcDir, language, 'rule.adoc') : undefined;
if (!fs.existsSync(ruleSrcFile)) { if (!ruleSrcFile || !fs.existsSync(ruleSrcFile)) {
ruleSrcFile = path.join(srcDir, 'rule.adoc'); ruleSrcFile = path.join(srcDir, 'rule.adoc');
if (!fs.existsSync(ruleSrcFile)) { }
throw new Error(`Missing file 'rule.adoc' for language ${language} in ${srcDir}`);
}
}
const baseDir = path.resolve(path.dirname(ruleSrcFile));
const opts = {
attributes: {
'rspecator-view': '',
docfile: ruleSrcFile,
},
safe: 'unsafe',
base_dir: baseDir,
backend: 'xhtml5',
to_file: false
};
// Every rule documentation has an implicit level-1 "Description" header. if (!fs.existsSync(ruleSrcFile)) {
const fileData = fs.readFileSync(ruleSrcFile); throw new Error(`Missing file 'rule.adoc' for language ${language} in ${srcDir}`);
const data = '== Description\n\n' + fileData; }
return asciidoc.convert(data, opts);
return ruleSrcFile;
}
/**
* Generate the HTML for the rule description.
*/
function generateRuleDescription(ruleAdocFile: string) {
const baseDir = path.resolve(path.dirname(ruleAdocFile));
const opts = {
attributes: {
'rspecator-view': '',
docfile: ruleAdocFile,
},
safe: 'unsafe',
base_dir: baseDir,
backend: 'xhtml5',
to_file: false
};
// Every rule documentation has an implicit level-1 "Description" header.
const fileData = fs.readFileSync(ruleAdocFile);
const data = '== Description\n\n' + fileData;
return asciidoc.convert(data, opts) as string;
} }

View File

@ -1,39 +1,79 @@
import fs from 'fs'; import fs from 'fs';
import path from 'path'; import path from 'path';
import { LanguageSupport } from '../types/RuleMetadata';
import { LanguageSupport } from '../types/RuleMetadata';
import { getRulesDirectories, listSupportedLanguages } from './utils'; import { getRulesDirectories, listSupportedLanguages } from './utils';
type Metadata = any;
/**
* Save the given metadata to disk.
*/
function writeRuleMetadata(dstDir: string, filename: string, metadata: Metadata) {
const file = path.join(dstDir, filename);
fs.writeFileSync(file, JSON.stringify(metadata), { encoding: 'utf8' });
}
type SQKeyMetadata = {
sqKey: string,
extra?: { legacyKeys?: string[] },
};
/**
* Merge all sqKeys in an array to check rule coverage.
*/
function getAllKeys(allMetadata: { metadata: SQKeyMetadata }[]) {
const keys = allMetadata.reduce((set, { metadata }) => {
set.add(metadata.sqKey);
metadata.extra?.legacyKeys?.forEach((key: string) => set.add(key));
return set;
}, new Set<string>());
return Array.from(keys);
}
/**
* Generate the default metadata for a rule without any language-specific data.
*/
function generateGenericMetadata(srcDir: string, dstDir: string, branch: string) {
const metadata = getRuleMetadata(srcDir);
metadata.languagesSupport = [];
metadata.allKeys = getAllKeys([{ metadata }]);
metadata.branch = branch;
writeRuleMetadata(dstDir, 'default-metadata.json', metadata);
}
/** /**
* Generate rule metadata (for all relevant languages) and write it in the destination directory. * Generate rule metadata (for all relevant languages) and write it in the destination directory.
* @param srcDir directory containing the original rule metadata and description. * @param srcDir directory containing the original rule's metadata and description.
* @param dstDir directory where the generated metadata and description will be written. * @param dstDir directory where the generated metadata will be written.
* @param branch the branch containing the given version of the rule. Typically 'master' but can be different for not merged rules. * @param branch the branch containing the given version of the rule.
* @param prUrl optional link to the PR adding the rule. absent for merged rules. * Typically 'master' but can be different for not merged rules.
* @param prUrl optional link to the PR adding the rule. Absent for merged rules.
*/ */
export function generateOneRuleMetadata(srcDir: string, dstDir: string, export function generateOneRuleMetadata(srcDir: string, dstDir: string, branch: string, prUrl?: string) {
branch: string, prUrl?: string) {
fs.mkdirSync(dstDir, { recursive: true }); fs.mkdirSync(dstDir, { recursive: true });
const allLanguages = listSupportedLanguages(srcDir); const allLanguages = listSupportedLanguages(srcDir);
if (allLanguages.length === 0) {
if (prUrl !== undefined) {
console.warn('New rules must have at least one language.');
}
generateGenericMetadata(srcDir, dstDir, branch);
return;
}
const allMetadata = allLanguages.map((language) => { const allMetadata = allLanguages.map((language) => {
const metadata = generateRuleMetadata(srcDir, language); const metadata = getRuleMetadata(srcDir, language);
return {language, metadata}; return { language, metadata };
}); });
// Update language status for all // Update language status for all
const languageSupports = const languageSupports =
allMetadata.map(m => ({name: m.language, status: m.metadata.status} as LanguageSupport)); allMetadata.map(m => ({ name: m.language, status: m.metadata.status } as LanguageSupport));
// Merge all sqKeys in an array so that we can use it later to check rule coverage. const allKeys = getAllKeys(allMetadata);
const allKeys = allMetadata allMetadata.forEach(({ metadata }) => {
.reduce((set, {metadata}) => { metadata.allKeys = allKeys;
set.add(metadata.sqKey);
metadata.extra?.legacyKeys?.forEach((key: string) => set.add(key));
return set;
}, new Set<string>());
const allKeysArray = Array.from(allKeys);
allMetadata.forEach(({metadata}) => {
metadata.allKeys = allKeysArray;
if (prUrl) { if (prUrl) {
metadata.prUrl = prUrl; metadata.prUrl = prUrl;
} }
@ -41,23 +81,23 @@ export function generateOneRuleMetadata(srcDir: string, dstDir: string,
metadata.languagesSupport = languageSupports; metadata.languagesSupport = languageSupports;
}); });
let default_metadata_wanted = true; let isFirstLanguage = true;
for (const { language, metadata } of allMetadata) { for (const { language, metadata } of allMetadata) {
const dstJsonFile = path.join(dstDir, language + '-metadata.json'); writeRuleMetadata(dstDir, language + '-metadata.json', metadata);
fs.writeFileSync(dstJsonFile, JSON.stringify(metadata, null, 2), { encoding: 'utf8' })
if (default_metadata_wanted) { if (isFirstLanguage) {
const dstFile = path.join(dstDir, 'default-metadata.json'); // Use the first language as the default metadata.
fs.writeFileSync(dstFile, JSON.stringify(metadata, null, 2), { encoding: 'utf8' }); writeRuleMetadata(dstDir, 'default-metadata.json', metadata);
default_metadata_wanted = false; isFirstLanguage = false;
} }
} }
} }
/** /**
* Generate rules metadata and write them in the destination directory. * Generate one directory per rule with its JSON metadata.
* @param srcPath directory containing the original rules metadata and description. * @param srcPath directory containing all the rules subdirectories, with the metadata and descriptions.
* @param dstPath directory where the generated rules metadata and description will be written. * @param dstPath directory where rule directories should be created.
* @param rules an optional list of rules to list. Other rules won't be generated. * @param rules an optional list of rules to process. Other rules won't be generated.
*/ */
export function generateRulesMetadata(srcPath: string, dstPath: string, rules?: string[]) { export function generateRulesMetadata(srcPath: string, dstPath: string, rules?: string[]) {
for (const { srcDir, dstDir } of getRulesDirectories(srcPath, dstPath, rules)) { for (const { srcDir, dstDir } of getRulesDirectories(srcPath, dstPath, rules)) {
@ -68,12 +108,20 @@ export function generateRulesMetadata(srcPath: string, dstPath: string, rules?:
/** /**
* Generate the metadata corresponding to one rule and one language. * Generate the metadata corresponding to one rule and one language.
* @param srcDir rule's source directory. * @param srcDir rule's source directory.
* @param language language for which the metadata should be generated * @param language language for which the metadata should be generated (or none)
*/ */
function generateRuleMetadata(srcDir: string, language: string) { function getRuleMetadata(srcDir: string, language?: string) {
const parentFile = path.join(srcDir, language, 'metadata.json'); const languageSpecificJson = (() => {
const parentJson = fs.existsSync(parentFile) ? JSON.parse(fs.readFileSync(parentFile, 'utf8')) : {}; if (!language) {
const childFile = path.join(srcDir, 'metadata.json'); return {};
const childJson = fs.existsSync(childFile) ? JSON.parse(fs.readFileSync(childFile, 'utf8')) : {}; }
return {...childJson, ...parentJson}; const languageFile = path.join(srcDir, language, 'metadata.json');
if (fs.existsSync(languageFile)) {
return JSON.parse(fs.readFileSync(languageFile, 'utf8'));
}
return {};
})();
const genericFile = path.join(srcDir, 'metadata.json');
const genericJson = fs.existsSync(genericFile) ? JSON.parse(fs.readFileSync(genericFile, 'utf8')) : {};
return { ...genericJson, ...languageSpecificJson };
} }

View File

@ -5,7 +5,7 @@ import path from 'path';
import { stripHtml } from 'string-strip-html'; import { stripHtml } from 'string-strip-html';
import lunr, { Token } from 'lunr'; import lunr, { Token } from 'lunr';
import { IndexedRule, IndexStore, Severity, IndexAggregates } from '../types/IndexStore'; import { IndexedRule, IndexStore, Severity, Type, IndexAggregates } from '../types/IndexStore';
import { logger as rootLogger } from './deploymentLogger'; import { logger as rootLogger } from './deploymentLogger';
import { LanguageSupport } from '../types/RuleMetadata'; import { LanguageSupport } from '../types/RuleMetadata';
@ -20,8 +20,8 @@ export interface IndexedRuleWithDescription extends IndexedRule {
function buildOneRuleRecord(allLanguages: string[], rulesPath: string, ruleDir: string) { function buildOneRuleRecord(allLanguages: string[], rulesPath: string, ruleDir: string) {
let types = new Set<string>(); const types = new Set<Type>();
let severities = new Set<Severity>(); const severities = new Set<Severity>();
const allKeys = new Set<string>([ruleDir]); const allKeys = new Set<string>([ruleDir]);
const titles = new Set<string>(); const titles = new Set<string>();
const tags = new Set<string>(); const tags = new Set<string>();
@ -90,7 +90,7 @@ function buildOneRuleIndexedRecord(rulesPath: string, ruleDir: string)
logger.error(`No languages found for rule ${ruleDir}, at least 1 is required`); logger.error(`No languages found for rule ${ruleDir}, at least 1 is required`);
return null; return null;
} }
if (record.types.size !== 1) { if (record.types.size < 1) {
logger.error( logger.error(
`${record.types.size} type(s) found for rule ${ruleDir}, 1 is required: ${JSON.stringify(record.types)}`); `${record.types.size} type(s) found for rule ${ruleDir}, 1 is required: ${JSON.stringify(record.types)}`);
return null; return null;
@ -103,7 +103,7 @@ function buildOneRuleIndexedRecord(rulesPath: string, ruleDir: string)
const indexedRecord: IndexedRuleWithDescription = { const indexedRecord: IndexedRuleWithDescription = {
id: ruleDir, id: ruleDir,
supportedLanguages: Array.from(record.supportedLanguages).sort(), supportedLanguages: Array.from(record.supportedLanguages).sort(),
type: record.types.values().next().value, types: Array.from(record.types).sort(),
severities: Array.from(record.severities).sort(), severities: Array.from(record.severities).sort(),
all_keys: Array.from(record.allKeys).sort(), all_keys: Array.from(record.allKeys).sort(),
titles: Array.from(record.titles).sort(), titles: Array.from(record.titles).sort(),
@ -172,7 +172,7 @@ export function buildSearchIndex(ruleIndexStore: IndexStore) {
// it is not declared in the Token class. Thus we cast as any here. // it is not declared in the Token class. Thus we cast as any here.
const fields = (token as any).metadata["fields"]; const fields = (token as any).metadata["fields"];
// process only titles and descriptions // process only titles and descriptions
if (fields.includes('all_keys') || fields.includes('titles') || fields.includes('descriptions') ) { if (fields.includes('all_keys') || fields.includes('titles') || fields.includes('descriptions')) {
// We don't use the stopword filter to allow words such as "do", "while", "for" // We don't use the stopword filter to allow words such as "do", "while", "for"
const trimmed = lunr.trimmer(token); const trimmed = lunr.trimmer(token);
return lunr.stemmer(trimmed); return lunr.stemmer(trimmed);
@ -189,7 +189,7 @@ export function buildSearchIndex(ruleIndexStore: IndexStore) {
this.ref('id'); this.ref('id');
this.field('titles', { extractor: (doc) => (doc as IndexedRule).titles.join('\n') }); this.field('titles', { extractor: (doc) => (doc as IndexedRule).titles.join('\n') });
this.field('type'); this.field('types');
this.field('languages', { extractor: (doc) => (doc as IndexedRule).supportedLanguages.map(lang => lang.name) }); this.field('languages', { extractor: (doc) => (doc as IndexedRule).supportedLanguages.map(lang => lang.name) });
this.field('defaultSeverity'); this.field('defaultSeverity');
this.field('tags'); this.field('tags');
@ -219,7 +219,7 @@ export function createIndexFiles(rulesPath: string) {
for (const rule of Object.values(indexStore)) { for (const rule of Object.values(indexStore)) {
delete rule.descriptions; delete rule.descriptions;
} }
const indexStoreJson = JSON.stringify(indexStore, null, 2); const indexStoreJson = JSON.stringify(indexStore);
const indexStorePath = path.join(rulesPath, "rule-index-store.json") const indexStorePath = path.join(rulesPath, "rule-index-store.json")
fs.writeFileSync(indexStorePath, indexStoreJson, {encoding: 'utf8', flag: 'w'}); fs.writeFileSync(indexStorePath, indexStoreJson, {encoding: 'utf8', flag: 'w'});

View File

@ -31,4 +31,4 @@ export function listSupportedLanguages(ruleDirectory: string): string[] {
return fs.readdirSync(ruleDirectory) return fs.readdirSync(ruleDirectory)
.filter(fileName => fs.lstatSync(path.join(ruleDirectory, fileName)).isDirectory()) .filter(fileName => fs.lstatSync(path.join(ruleDirectory, fileName)).isDirectory())
.sort(); .sort();
} }

View File

@ -1,12 +1,13 @@
import {LanguageSupport} from './RuleMetadata'; import {LanguageSupport} from './RuleMetadata';
export type Severity = 'Blocker'|'Critical'|'Major'|'Minor'|'Info'; export type Severity = 'Blocker'|'Critical'|'Major'|'Minor'|'Info';
export type Type = 'BUG'|'CODE_SMELL'|'VULNERABILITY'|'SECURITY_HOTSPOT';
export interface IndexedRule { export interface IndexedRule {
id: string; id: string;
supportedLanguages: LanguageSupport[]; supportedLanguages: LanguageSupport[];
// FIXME: type, defaultSeverity should never be null but the index generation has a bug // FIXME: type, defaultSeverity should never be null but the index generation has a bug
type: 'BUG'|'CODE_SMELL'|'VULNERABILITY'|'SECURITY_HOTSPOT'; types: Type[];
severities: Severity[]; severities: Severity[];
all_keys: string[]; all_keys: string[];
titles: string[]; titles: string[];

View File

@ -1,15 +1,60 @@
import React, { useState } from 'react'; import React, { useState } from 'react';
import * as lunr from 'lunr' import lunr from 'lunr';
import { useFetch } from './useFetch'; import { useFetch } from './useFetch';
import { IndexedRule, IndexStore } from '../types/IndexStore'; import { IndexedRule, IndexStore } from '../types/IndexStore';
export function addFilterForTypes(q: lunr.Query, type: string) {
q.term(type.toLowerCase(), {
fields: ['types'],
presence: lunr.Query.presence.REQUIRED,
usePipeline: false,
});
}
export function addFilterForTags(q: lunr.Query, tags: string[]) {
tags.forEach(tag => {
q.term(tag, {
fields: ['tags'],
presence: lunr.Query.presence.REQUIRED,
usePipeline: false,
});
});
}
export function addFilterForLanguages(q: lunr.Query, language: string) {
q.term(language.toLowerCase(), {
fields: ['languages'],
presence: lunr.Query.presence.REQUIRED,
usePipeline: false,
});
}
export function addFilterForQualityProfiles(q: lunr.Query, profiles: string[]) {
profiles.forEach(profile => {
q.term(profile.toLowerCase(), {
fields: ['qualityProfiles'],
presence: lunr.Query.presence.REQUIRED,
usePipeline: false,
});
});
}
export function addFilterForKeysTitlesDescriptions(q: lunr.Query, query: string) {
lunr.tokenizer(amendQuery(query)).forEach(token => {
q.term(token, {
fields: ['all_keys', 'titles', 'descriptions'],
presence: lunr.Query.presence.REQUIRED
});
});
}
export function useSearch(query: string, ruleType: string|null, ruleLang: string|null, ruleTags: string[], export function useSearch(query: string, ruleType: string|null, ruleLang: string|null, ruleTags: string[],
qualityProfiles: string[], qualityProfiles: string[],
pageSize: number, pageNumber: number) { pageSize: number, pageNumber: number) {
let indexDataUrl = `${process.env.PUBLIC_URL}/rules/rule-index.json`; const indexDataUrl = `${process.env.PUBLIC_URL}/rules/rule-index.json`;
let storeDataUrl = `${process.env.PUBLIC_URL}/rules/rule-index-store.json`; const storeDataUrl = `${process.env.PUBLIC_URL}/rules/rule-index-store.json`;
const [indexData, indexDataError, indexDataIsLoading] = useFetch<object>(indexDataUrl); const [indexData, indexDataError, indexDataIsLoading] = useFetch<object>(indexDataUrl);
const [storeData, storeDataError, storeDataIsLoading] = useFetch<IndexStore>(storeDataUrl); const [storeData, storeDataError, storeDataIsLoading] = useFetch<IndexStore>(storeDataUrl);
@ -33,51 +78,21 @@ export function useSearch(query: string, ruleType: string|null, ruleLang: string
React.useEffect(() => { React.useEffect(() => {
if (index != null && !storeDataIsLoading && !storeDataError) { if (index != null && !storeDataIsLoading && !storeDataError) {
let hits: lunr.Index.Result[] = [] let hits: lunr.Index.Result[] = [];
setError(null); setError(null);
try { try {
// We use index.query instead if index.search in order to fully // We use index.query instead if index.search in order to fully
// control how each filter is added and how the query is processed. // control how each filter is added and how the query is processed.
hits = index.query(q => { hits = index.query(q => {
// Add rule type filter
if (ruleType) { if (ruleType) {
q.term(ruleType.toLowerCase(), { addFilterForTypes(q, ruleType);
fields: ['type'],
presence: lunr.Query.presence.REQUIRED,
usePipeline: false
});
} }
if (ruleLang) { if (ruleLang) {
q.term(ruleLang.toLowerCase(), { addFilterForLanguages(q, ruleLang);
fields: ['languages'],
presence: lunr.Query.presence.REQUIRED,
usePipeline: false
});
} }
addFilterForTags(q, ruleTags);
// Add rule tags filter addFilterForQualityProfiles(q, qualityProfiles);
ruleTags.forEach(ruleTag => { addFilterForKeysTitlesDescriptions(q, query);
q.term(ruleTag, {
fields: ['tags'],
presence: lunr.Query.presence.REQUIRED,
usePipeline: false
});
});
// Add quality profiles filter
qualityProfiles.forEach(qualityProfile => {
q.term(qualityProfile.toLowerCase(), {
fields: ['qualityProfiles'],
presence: lunr.Query.presence.REQUIRED,
usePipeline: false
});
});
// Search for each query token in titles and descriptions
lunr.tokenizer(amendQuery(query)).forEach(token => {
q.term(token, {fields: ['all_keys', 'titles', 'descriptions'], presence: lunr.Query.presence.REQUIRED})
});
}); });
} catch (exception) { } catch (exception) {
if (exception instanceof lunr.QueryParseError) { if (exception instanceof lunr.QueryParseError) {
@ -87,7 +102,7 @@ export function useSearch(query: string, ruleType: string|null, ruleLang: string
} }
} }
if (storeData) { if (storeData) {
setNumberOfHits(hits.length) setNumberOfHits(hits.length);
const pageResults = hits.slice(pageSize*(pageNumber - 1), pageSize*(pageNumber)); const pageResults = hits.slice(pageSize*(pageNumber - 1), pageSize*(pageNumber));
setResults(pageResults.map(({ ref }) => storeData[ref])); setResults(pageResults.map(({ ref }) => storeData[ref]));
setResultsAreLoading(false); setResultsAreLoading(false);

View File

@ -7,7 +7,7 @@ from rspec_tools.checklinks import check_html_links
from rspec_tools.errors import RuleNotFoundError, RuleValidationError from rspec_tools.errors import RuleNotFoundError, RuleValidationError
from rspec_tools.create_rule import create_new_rule, add_language_to_rule from rspec_tools.create_rule import create_new_rule, add_language_to_rule
from rspec_tools.rules import RulesRepository from rspec_tools.rules import RulesRepository
from rspec_tools.validation.metadata import validate_metadata from rspec_tools.validation.metadata import validate_rule_metadata
from rspec_tools.validation.description import validate_section_names, validate_section_levels from rspec_tools.validation.description import validate_section_names, validate_section_levels
from rspec_tools.coverage import update_coverage_for_all_repos, update_coverage_for_repo, update_coverage_for_repo_version from rspec_tools.coverage import update_coverage_for_all_repos, update_coverage_for_repo, update_coverage_for_repo_version
@ -49,28 +49,23 @@ def add_lang_to_rule(language: str, rule: str, user: Optional[str]):
token = os.environ.get('GITHUB_TOKEN') token = os.environ.get('GITHUB_TOKEN')
add_language_to_rule(language, rule, token, user) add_language_to_rule(language, rule, token, user)
@cli.command() @cli.command()
@click.argument('rules', nargs=-1) @click.argument('rules', nargs=-1, required=True)
def validate_rules_metadata(rules): def validate_rules_metadata(rules):
'''Validate rules metadata.''' '''Validate rules metadata.'''
rule_repository = RulesRepository() rule_repository = RulesRepository()
error_counter = 0 error_counter = 0
for rule in rule_repository.rules:
if rules and rule.key not in rules: for rule_id in rules:
continue try:
rule = rule_repository.get_rule(rule_id)
validate_rule_metadata(rule)
except RuleValidationError as e:
click.echo(e.message, err=True)
error_counter += 1
for lang_spec_rule in rule.specializations:
try:
validate_metadata(lang_spec_rule)
except RuleValidationError as e:
click.echo(e.message, err=True)
error_counter += 1
if error_counter > 0: if error_counter > 0:
message = f"Validation failed due to {error_counter} errors" fatal_error(f"Validation failed due to {error_counter} errors out of {len(rules)} analyzed rules")
click.echo(message, err=True)
raise click.Abort(message)
@cli.command() @cli.command()
@click.option('--d', required=True) @click.option('--d', required=True)
@ -95,9 +90,7 @@ def check_sections(d, rules):
click.echo(e.message, err=True) click.echo(e.message, err=True)
error_counter += 1 error_counter += 1
if error_counter > 0: if error_counter > 0:
message = f"Validation failed due to {error_counter} errors" fatal_error(f"Validation failed due to {error_counter} errors")
click.echo(message, err=True)
raise click.Abort(message)
@cli.command() @cli.command()
@click.option('--repository', required=False) @click.option('--repository', required=False)
@ -118,3 +111,7 @@ def notify_failure_on_slack(message: str, channel: str):
notify_slack(message, channel) notify_slack(message, channel)
__all__=['cli'] __all__=['cli']
def fatal_error(message: str):
click.echo(message, err=True)
raise click.Abort(message)

View File

@ -81,8 +81,7 @@ class RulesRepository:
rules_path: Final[Path] rules_path: Final[Path]
def __init__(self, *, rules_path: Path=DEFAULT_RULES_PATH): def __init__(self, rules_path: Path=DEFAULT_RULES_PATH):
print(rules_path.absolute().__str__())
self.rules_path = rules_path self.rules_path = rules_path
@property @property

View File

@ -1,25 +1,129 @@
import json import json
from pathlib import Path from pathlib import Path
from typing import Optional, Final from typing import Final, List
from functools import cache from functools import cache
from jsonschema import validate from jsonschema import validate
from jsonschema.exceptions import ValidationError from jsonschema.exceptions import ValidationError
from rspec_tools.errors import RuleValidationError from rspec_tools.errors import RuleValidationError
from rspec_tools.rules import LanguageSpecificRule from rspec_tools.rules import GenericRule, LanguageSpecificRule
DEFAULT_SCHEMA_PATH: Final[Path] = Path(__file__).parent.joinpath('rule-metadata-schema.json') DEFAULT_SCHEMA_PATH: Final[Path] = Path(__file__).parent.joinpath('rule-metadata-schema.json')
# Closed rules with no languages were imported when the migration from Jira was done.
# These rules are allowed to have no language specialization in order to keep them for posterity.
RULES_WITH_NO_LANGUAGES = [
'S137',
'S501',
'S502',
'S502',
'S788',
'S789',
'S857',
'S866',
'S882',
'S908',
'S911',
'S913',
'S914',
'S918',
'S921',
'S974',
'S975',
'S1076',
'S1078',
'S1127',
'S1173',
'S1212',
'S1230',
'S1294',
'S1318',
'S1513',
'S1518',
'S1519',
'S1520',
'S1521',
'S1538',
'S1646',
'S1701',
'S1724',
'S1730',
'S1746',
'S1802',
'S1815',
'S1825',
'S1826',
'S1847',
'S1873',
'S1924',
'S1925',
'S1956',
'S2098',
'S2128',
'S2192',
'S2215',
'S2336',
'S2337',
'S2338',
'S2341',
'S2371',
'S2385',
'S2732',
'S2735',
'S2736',
'S2848',
'S2916',
'S2987',
'S2988',
'S2998',
'S3223',
'S3354',
'S3746',
'S4805',
]
@cache @cache
def get_json_schema(): def get_json_schema():
return json.loads(DEFAULT_SCHEMA_PATH.read_bytes()) return json.loads(DEFAULT_SCHEMA_PATH.read_bytes())
def validate_metadata(rule_language: LanguageSpecificRule):
def validate_rule_specialization_metadata(rule_language: LanguageSpecificRule):
validate_schema(rule_language) validate_schema(rule_language)
validate_status(rule_language) validate_status(rule_language)
validate_security_standards(rule_language) validate_security_standards(rule_language)
def validate_rule_metadata(rule: GenericRule):
'''In addition to the test carried out by validate_metadata for each language specification,
a rule must have at least one language (unless it is part of the list of exception).
'''
specializations = list(rule.specializations)
if rule.id in RULES_WITH_NO_LANGUAGES:
if specializations:
# When this triggers, ask yourself whether the rule should be removed from RULES_WITH_NO_LANGUAGES
raise RuleValidationError(f'Rule {rule.id} should have no specializations. Forgot to remove it from the list?')
if rule.generic_metadata.get('status', None) not in ['closed', 'deprecated']:
raise RuleValidationError(f'Rule {rule.id} should be closed or deprecated')
# Nothing else to do.
return
if not specializations:
raise RuleValidationError(f'Rule {rule.id} has no language-specific data')
errors: List[str] = []
for language_rule in specializations:
try:
validate_rule_specialization_metadata(language_rule)
except RuleValidationError as e:
errors.append(str(e))
if errors:
raise RuleValidationError(f'Rule {rule.id} failed validation for these reasons:\n - ' + '\n - '.join(errors))
def validate_schema(rule_language: LanguageSpecificRule): def validate_schema(rule_language: LanguageSpecificRule):
schema = get_json_schema() schema = get_json_schema()
try: try:
@ -55,4 +159,4 @@ def has_replacement_rules(rule_language: LanguageSpecificRule):
meta = rule_language.metadata meta = rule_language.metadata
return 'extra' in meta and 'replacementRules' in meta.get('extra') and len(meta.get('extra').get('replacementRules')) > 0 return 'extra' in meta and 'replacementRules' in meta.get('extra') and len(meta.get('extra').get('replacementRules')) > 0
__all__=['validate_metadata'] __all__=['validate_rule_specialization_metadata']

View File

@ -0,0 +1,5 @@
{
"title": "Rule with no languages",
"type": "CODE_SMELL",
"status": "closed"
}

View File

@ -0,0 +1,5 @@
{
"title": "Rule with one language, but invalid",
"type": "CODE_SMELL",
"status": "ready"
}

View File

@ -0,0 +1,3 @@
{
}

View File

@ -0,0 +1,56 @@
from pathlib import Path
from typing import List
from unittest.mock import patch
from click.testing import CliRunner
from rspec_tools.cli import cli
from rspec_tools.rules import RulesRepository
class TestCLIValidateRulesMetadata:
'''Unit test for the Command Line Interface.'''
def _run(self, rules: List[str]):
runner = CliRunner()
arguments = ['validate-rules-metadata'] + rules
return runner.invoke(cli, arguments)
def _mock_rule_ressources(self):
mock_path = Path(__file__).parent.joinpath('resources', 'invalid-rules')
return patch.object(RulesRepository.__init__, '__defaults__', (mock_path,))
def test_missing_parameters(self):
result = self._run([])
assert 'Missing argument \'RULES...\'' in result.output
assert result.exit_code == 2
def test_valid_rule(self):
'''This test uses the actual rules data, not the mock resources.'''
result = self._run(['S100'])
assert result.output == ''
assert result.exit_code == 0
@patch('rspec_tools.validation.metadata.RULES_WITH_NO_LANGUAGES', ['S501'])
def test_invalid_rule_in_allow_list(self):
with self._mock_rule_ressources():
result = self._run(['S501'])
assert result.output == ''
assert result.exit_code == 0
@patch('rspec_tools.validation.metadata.RULES_WITH_NO_LANGUAGES', [])
def test_invalid_rule(self):
with self._mock_rule_ressources():
result = self._run(['S501'])
assert 'Rule S501 has no language-specific data' in result.output
assert 'Validation failed due to 1 errors out of 1 analyzed rules' in result.output
assert result.exit_code == 1
@patch('rspec_tools.validation.metadata.RULES_WITH_NO_LANGUAGES', [])
def test_invalid_rules(self):
with self._mock_rule_ressources():
result = self._run(['S501', 'S502'])
assert 'Rule S501 has no language-specific data' in result.output
assert 'Rule S502 failed validation for these reasons:' in result.output
assert 'Rule scala:S502 has invalid metadata : \'remediation\' is a required property' in result.output
assert 'Validation failed due to 2 errors out of 2 analyzed rules' in result.output
assert result.exit_code == 1

View File

@ -6,16 +6,65 @@ from rspec_tools.errors import RuleValidationError
from copy import deepcopy from copy import deepcopy
from rspec_tools.rules import LanguageSpecificRule, RulesRepository from rspec_tools.rules import LanguageSpecificRule, RulesRepository
from rspec_tools.validation.metadata import validate_metadata from rspec_tools.validation.metadata import validate_rule_specialization_metadata, validate_rule_metadata
@pytest.fixture @pytest.fixture
def rule_language(mockrules: Path): def rule_language(mockrules: Path):
rule = RulesRepository(rules_path=mockrules).get_rule('S100') rule = RulesRepository(rules_path=mockrules).get_rule('S100')
return rule.get_language('kotlin') return rule.get_language('kotlin')
@pytest.fixture
def invalid_rules():
invalid_rules_path = Path(__file__).parent.parent.joinpath('resources', 'invalid-rules')
return RulesRepository(rules_path=invalid_rules_path)
def test_valid_metadata_passes_validation(rule_language: LanguageSpecificRule): def test_valid_metadata_passes_validation(rule_language: LanguageSpecificRule):
'''Check that language metadata are correctly overriden.''' '''Check that language metadata are correctly overridden.'''
validate_metadata(rule_language) validate_rule_specialization_metadata(rule_language)
@patch('rspec_tools.validation.metadata.RULES_WITH_NO_LANGUAGES', [])
def test_rule_with_no_language(invalid_rules: RulesRepository):
s501 = invalid_rules.get_rule('S501')
with pytest.raises(RuleValidationError, match=fr'^Rule S501 has no language-specific data'):
validate_rule_metadata(s501)
@patch('rspec_tools.validation.metadata.RULES_WITH_NO_LANGUAGES', ['S501'])
def test_rule_with_no_language_in_exception_list(invalid_rules: RulesRepository):
s501 = invalid_rules.get_rule('S501')
validate_rule_metadata(s501)
with patch.dict(s501.generic_metadata, [('status', 'deprecated')]):
validate_rule_metadata(s501)
@patch('rspec_tools.validation.metadata.RULES_WITH_NO_LANGUAGES', ['S501'])
def test_open_rule_with_no_language_in_exception_list(invalid_rules: RulesRepository):
s501 = invalid_rules.get_rule('S501')
with pytest.raises(RuleValidationError, match=fr'^Rule S501 should be closed or deprecated'):
with patch.dict(s501.generic_metadata, [('status', 'ready')]):
validate_rule_metadata(s501)
@patch('rspec_tools.validation.metadata.RULES_WITH_NO_LANGUAGES', ['S120'])
def test_rule_expected_to_have_no_language(mockrules: Path):
valid_rule = RulesRepository(rules_path=mockrules).get_rule('S120')
with pytest.raises(RuleValidationError, match=fr'^Rule S120 should have no specializations'):
validate_rule_metadata(valid_rule)
@patch('rspec_tools.validation.metadata.RULES_WITH_NO_LANGUAGES', [])
def test_rule_with_invalid_language(invalid_rules: RulesRepository):
s502 = invalid_rules.get_rule('S502')
with pytest.raises(RuleValidationError, match=fr'^Rule S502 failed validation for these reasons:\n - Rule scala:S502 has invalid metadata'):
validate_rule_metadata(s502)
def test_rule_that_is_fully_valid(mockrules: Path):
valid_rule = RulesRepository(rules_path=mockrules).get_rule('S120')
validate_rule_metadata(valid_rule)
def test_missing_required_property_fails_validation(rule_language: LanguageSpecificRule): def test_missing_required_property_fails_validation(rule_language: LanguageSpecificRule):
@ -24,7 +73,7 @@ def test_missing_required_property_fails_validation(rule_language: LanguageSpeci
with pytest.raises(RuleValidationError, match=fr'^Rule {rule_language.id} has invalid metadata'): with pytest.raises(RuleValidationError, match=fr'^Rule {rule_language.id} has invalid metadata'):
with patch.object(LanguageSpecificRule, 'metadata', new_callable=PropertyMock) as mock: with patch.object(LanguageSpecificRule, 'metadata', new_callable=PropertyMock) as mock:
mock.return_value = invalid_metadata mock.return_value = invalid_metadata
validate_metadata(rule_language) validate_rule_specialization_metadata(rule_language)
def test_invalid_remediation_fails_validation(rule_language: LanguageSpecificRule): def test_invalid_remediation_fails_validation(rule_language: LanguageSpecificRule):
@ -33,7 +82,7 @@ def test_invalid_remediation_fails_validation(rule_language: LanguageSpecificRul
with pytest.raises(RuleValidationError, match=fr'^Rule {rule_language.id} has invalid metadata'): with pytest.raises(RuleValidationError, match=fr'^Rule {rule_language.id} has invalid metadata'):
with patch.object(LanguageSpecificRule, 'metadata', new_callable=PropertyMock) as mock: with patch.object(LanguageSpecificRule, 'metadata', new_callable=PropertyMock) as mock:
mock.return_value = invalid_metadata mock.return_value = invalid_metadata
validate_metadata(rule_language) validate_rule_specialization_metadata(rule_language)
def test_adding_properties_fails_validation(rule_language: LanguageSpecificRule): def test_adding_properties_fails_validation(rule_language: LanguageSpecificRule):
@ -42,7 +91,7 @@ def test_adding_properties_fails_validation(rule_language: LanguageSpecificRule)
with pytest.raises(RuleValidationError, match=fr'^Rule {rule_language.id} has invalid metadata'): with pytest.raises(RuleValidationError, match=fr'^Rule {rule_language.id} has invalid metadata'):
with patch.object(LanguageSpecificRule, 'metadata', new_callable=PropertyMock) as mock: with patch.object(LanguageSpecificRule, 'metadata', new_callable=PropertyMock) as mock:
mock.return_value = metadata mock.return_value = metadata
validate_metadata(rule_language) validate_rule_specialization_metadata(rule_language)
def test_ready_rule_with_replacement_fails_validation(rule_language: LanguageSpecificRule): def test_ready_rule_with_replacement_fails_validation(rule_language: LanguageSpecificRule):
@ -51,7 +100,7 @@ def test_ready_rule_with_replacement_fails_validation(rule_language: LanguageSpe
with pytest.raises(RuleValidationError, match=fr'^Rule {rule_language.id} has invalid metadata: status'): with pytest.raises(RuleValidationError, match=fr'^Rule {rule_language.id} has invalid metadata: status'):
with patch.object(LanguageSpecificRule, 'metadata', new_callable=PropertyMock) as mock: with patch.object(LanguageSpecificRule, 'metadata', new_callable=PropertyMock) as mock:
mock.return_value = invalid_metadata mock.return_value = invalid_metadata
validate_metadata(rule_language) validate_rule_specialization_metadata(rule_language)
def test_deprecated_rule_with_replacement_passes_validation(rule_language: LanguageSpecificRule): def test_deprecated_rule_with_replacement_passes_validation(rule_language: LanguageSpecificRule):
@ -60,7 +109,7 @@ def test_deprecated_rule_with_replacement_passes_validation(rule_language: Langu
metadata['status'] = 'deprecated' metadata['status'] = 'deprecated'
with patch.object(LanguageSpecificRule, 'metadata', new_callable=PropertyMock) as mock: with patch.object(LanguageSpecificRule, 'metadata', new_callable=PropertyMock) as mock:
mock.return_value = metadata mock.return_value = metadata
validate_metadata(rule_language) validate_rule_specialization_metadata(rule_language)
def test_rule_with_incomplete_list_of_security_standard_fails_validation(rule_language: LanguageSpecificRule): def test_rule_with_incomplete_list_of_security_standard_fails_validation(rule_language: LanguageSpecificRule):
@ -70,7 +119,7 @@ def test_rule_with_incomplete_list_of_security_standard_fails_validation(rule_la
with pytest.raises(RuleValidationError, match=fr'^Rule {rule_language.id} has invalid metadata: securityStandard'): with pytest.raises(RuleValidationError, match=fr'^Rule {rule_language.id} has invalid metadata: securityStandard'):
with patch.object(LanguageSpecificRule, 'metadata', new_callable=PropertyMock) as mock: with patch.object(LanguageSpecificRule, 'metadata', new_callable=PropertyMock) as mock:
mock.return_value = invalid_metadata mock.return_value = invalid_metadata
validate_metadata(rule_language) validate_rule_specialization_metadata(rule_language)
def test_rule_with_complete_list_of_security_standard_passes_validation(rule_language: LanguageSpecificRule): def test_rule_with_complete_list_of_security_standard_passes_validation(rule_language: LanguageSpecificRule):
@ -78,4 +127,4 @@ def test_rule_with_complete_list_of_security_standard_passes_validation(rule_lan
metadata['securityStandards'] = {'ASVS 4': [], 'OWASP': [], "OWASP Top 10 2021": []} metadata['securityStandards'] = {'ASVS 4': [], 'OWASP': [], "OWASP Top 10 2021": []}
with patch.object(LanguageSpecificRule, 'metadata', new_callable=PropertyMock) as mock: with patch.object(LanguageSpecificRule, 'metadata', new_callable=PropertyMock) as mock:
mock.return_value = metadata mock.return_value = metadata
validate_metadata(rule_language) validate_rule_specialization_metadata(rule_language)