From 7f827147d18bca83c9d2331f370defaf843ea8aa Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Fri, 22 Sep 2023 12:10:31 +0200 Subject: [PATCH] Create rule S6729: np.nonzero should be preferred over np.where when only the condition parameter is set. (#2966) You can preview this rule [here](https://sonarsource.github.io/rspec/#/rspec/S6729/python) (updated a few minutes after each push). ## Review A dedicated reviewer checked the rule description successfully for: - [ ] logical errors and incorrect information - [ ] information gaps and missing content - [ ] text style and tone - [ ] PR summary and labels follow [the guidelines](https://github.com/SonarSource/rspec/#to-modify-an-existing-rule) --------- Co-authored-by: joke1196 Co-authored-by: David Kunzmann --- rules/S6729/metadata.json | 2 + rules/S6729/python/metadata.json | 26 ++++++++++ rules/S6729/python/rule.adoc | 81 ++++++++++++++++++++++++++++++++ 3 files changed, 109 insertions(+) create mode 100644 rules/S6729/metadata.json create mode 100644 rules/S6729/python/metadata.json create mode 100644 rules/S6729/python/rule.adoc diff --git a/rules/S6729/metadata.json b/rules/S6729/metadata.json new file mode 100644 index 0000000000..2c63c08510 --- /dev/null +++ b/rules/S6729/metadata.json @@ -0,0 +1,2 @@ +{ +} diff --git a/rules/S6729/python/metadata.json b/rules/S6729/python/metadata.json new file mode 100644 index 0000000000..15ae580143 --- /dev/null +++ b/rules/S6729/python/metadata.json @@ -0,0 +1,26 @@ +{ + "title": "np.nonzero should be preferred over np.where when only the condition parameter is set", + "type": "CODE_SMELL", + "status": "ready", + "remediation": { + "func": "Constant\/Issue", + "constantCost": "5min" + }, + "tags": [ + "numpy", + "data-science" + ], + "defaultSeverity": "Major", + "ruleSpecification": "RSPEC-6729", + "sqKey": "S6729", + "scope": "All", + "defaultQualityProfiles": ["Sonar way"], + "quickfix": "unknown", + "code": { + "impacts": { + "MAINTAINABILITY": "HIGH", + "RELIABILITY": "HIGH" + }, + "attribute": "CONVENTIONAL" + } +} diff --git a/rules/S6729/python/rule.adoc b/rules/S6729/python/rule.adoc new file mode 100644 index 0000000000..21ac2ae712 --- /dev/null +++ b/rules/S6729/python/rule.adoc @@ -0,0 +1,81 @@ +This rule raises an issue when ``++np.where++`` is used with only the condition parameter set. + +== Why is this an issue? + +The NumPy function ``++np.where++`` provides a way to execute operations on an array under a certain condition: + +[source,python] +---- +import numpy as np + +arr = np.array([1,2,3,4]) + +result = np.where(arr > 3, arr * 2, arr) +---- + +In the example above the ``++np.where++`` function will multiply all the elements in the array which satisfy the condition: ``++element > 3++`` by 2. +The elements that do not satisfy the condition will be left untouched. +The ``++result++`` array holds now the values 1, 2, 3 and 8. + +It is also possible to call ``++np.where++`` with only the condition parameter set: + +[source,python] +---- +import numpy as np + +arr = np.array([1,2,3,4]) + +result = np.where(arr > 2) +---- + +Even though this is perfectly valid code in NumPy, it may not yield the expected results. + +When providing only the condition parameter to the ``++np.where++`` function, it will behave as ``++np.asarray(condition).nonzero()++`` or ``++np.nonzero(condition)++``. +Both these functions provide a way to find the indices of the elements satisfying the condition passed as parameter. +Be mindful that ``++np.asarray(condition).nonzero()++`` and ``++np.nonzero(condition)++`` do not return the *values* that satisfy the condition but only their *indices*. +This means the ``++result++`` variable now holds a tuple +with the first element being an array of all the indices where the condition ``++arr > 2++`` was satisfied: ``++(array([2,3]),)++``. + +If the intention is to find the indices of the elements which satisfy a certain condition it is preferable to use the ``++np.asarray(condition).nonzero()++`` or ``++np.nonzero(condition)++`` function instead. + + +== How to fix it + +To fix this issue either: + +* provide all three parameters to the ``++np.where++`` function (condition, value if the condition is satisfied, value if the condition is not satisfied) or, +* use the ``++np.nonzero++`` function. + +=== Code examples + +==== Noncompliant code example + +[source,text,diff-id=1,diff-type=noncompliant] +---- +import numpy as np + +def bigger_than_two(): + arr = np.array([1,2,3,4]) + result = np.where(arr > 2) # Noncompliant: only the condition parameter is provided to the np.where function. +---- + +==== Compliant solution + +[source,text,diff-id=1,diff-type=compliant] +---- +import numpy as np + +def bigger_than_two(): + arr = np.array([1,2,3,4]) + result = np.where(arr > 2, arr + 1, arr) # Compliant + indices = np.nonzero(arr > 2) # Compliant +---- + + + +== Resources +=== Documentation + +* NumPy Documentation - https://numpy.org/doc/stable/reference/generated/numpy.where.html#numpy-where[numpy.where] +* NumPy Documentation - https://numpy.org/doc/stable/reference/generated/numpy.nonzero.html#numpy.nonzero[numpy.nonzero] +