From c83ae776be18238aa90fb28ff38c58ef86fe82bd Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Mon, 2 Jun 2025 20:40:16 +0530 Subject: [PATCH 001/390] Get tag from VERSION manifest Signed-off-by: Keshav Priyadarshi --- setup.cfg | 2 +- vulnerabilities/models.py | 2 +- vulnerablecode/__init__.py | 25 ++++++++++++++++++++++++- 3 files changed, 26 insertions(+), 3 deletions(-) diff --git a/setup.cfg b/setup.cfg index ae58b59b4..81f29d43f 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = vulnerablecode -version = 36.1.0 +version = 36.1.2 license = Apache-2.0 AND CC-BY-SA-4.0 # description must be on ONE line https://github.com/pypa/setuptools/issues/1390 diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index b6bd861eb..e1fe7353e 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -2005,7 +2005,7 @@ def set_vulnerablecode_version_and_commit(self): msg = f"Field vulnerablecode_version already set to {self.vulnerablecode_version}" raise ValueError(msg) - self.vulnerablecode_version = VULNERABLECODE_VERSION + self.vulnerablecode_version = vulnerablecode.get_git_tag() self.vulnerablecode_commit = vulnerablecode.get_short_commit() self.save(update_fields=["vulnerablecode_version", "vulnerablecode_commit"]) diff --git a/vulnerablecode/__init__.py b/vulnerablecode/__init__.py index 6ad120243..1ac063667 100644 --- a/vulnerablecode/__init__.py +++ b/vulnerablecode/__init__.py @@ -14,7 +14,7 @@ import git -__version__ = "36.1.0" +__version__ = "36.1.2" PROJECT_DIR = Path(__file__).resolve().parent @@ -49,6 +49,29 @@ def get_git_commit_from_version_file(): return +def get_git_tag_from_version_file(): + """Return the tag from the ".VERSION" file.""" + version_file = ROOT_DIR / ".VERSION" + if not version_file.exists(): + return + + try: + lines = version_file.read_text().splitlines() + ref_line = lines[0] + if "tag:" in ref_line: + if vcio_tag := ref_line.split("tag:")[-1].strip(): + return vcio_tag + except (UnicodeDecodeError): + return + + +def get_git_tag(): + """Return the tag from the ".VERSION" file or __version__.""" + if vcio_tag := get_git_tag_from_version_file(): + return vcio_tag + return __version__ + + def get_short_commit(): """ Return the short commit hash from the .VERSION file or from `git describe` From ec8c90e251ada148588b2e73f7e9f63692f7c80d Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Mon, 2 Jun 2025 20:55:47 +0530 Subject: [PATCH 002/390] Update vulnerabilities search url Signed-off-by: Keshav Priyadarshi --- docs/source/user-interface.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/source/user-interface.rst b/docs/source/user-interface.rst index 251896c8a..b907ecfd5 100644 --- a/docs/source/user-interface.rst +++ b/docs/source/user-interface.rst @@ -15,11 +15,11 @@ package URL or purl prefix fragment such as The search by packages is available at the following URL: - `https://public.vulnerablecode.io/packages/search `_ + `https://public.vulnerablecode.io/packages/search/ `_ How to search by packages: - 1. Go to the URL: `https://public.vulnerablecode.io/packages/search `_ + 1. Go to the URL: `https://public.vulnerablecode.io/packages/search/ `_ 2. Enter the package URL or purl prefix fragment such as ``pkg:pypi`` or by package name in the search box. 3. Click on the search button. @@ -46,11 +46,11 @@ fragment of these identifiers like ``CVE-2021``. The search by vulnerabilities is available at the following URL: - `https://public.vulnerablecode.io/vulnerabilities/search `_ + `https://public.vulnerablecode.io/vulnerabilities/search/ `_ How to search by vulnerabilities: - 1. Go to the URL: `https://public.vulnerablecode.io/vulnerabilities/search `_ + 1. Go to the URL: `https://public.vulnerablecode.io/vulnerabilities/search/ `_ 2. Enter the VCID, CVE, GHSA, CPEs etc. in the search box. 3. Click on the search button. From 8f79fc0412d808d4d0e01a97dcceb66ee398056c Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Mon, 2 Jun 2025 21:18:28 +0530 Subject: [PATCH 003/390] Add CHANGELOG for v36.1.2 Signed-off-by: Keshav Priyadarshi --- CHANGELOG.rst | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index e81e19831..5776daa9b 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -2,6 +2,18 @@ Release notes ============= +Version v36.1.2 +--------------------- + +- Get tag from VERSION manifest #1895 + + +Version v36.1.1 +--------------------- + +- Update is_active help text in pipeline migration #1887 + + Version v36.1.0 --------------------- From 702575be754aff54fad6571de2912b081ab9a483 Mon Sep 17 00:00:00 2001 From: Tushar Goel <34160672+TG1999@users.noreply.github.com> Date: Fri, 6 Jun 2025 14:22:10 +0530 Subject: [PATCH 004/390] Increase docker shared memory size (#1896) Signed-off-by: Tushar Goel --- docker-compose.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/docker-compose.yml b/docker-compose.yml index 27a85ac1b..27bb5531d 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -9,6 +9,7 @@ services: volumes: - db_data:/var/lib/postgresql/data/ - ./etc/postgresql/postgresql.conf:/etc/postgresql/postgresql.conf + shm_size: 1gb vulnerablecode_redis: image: redis From 25830d37e7063d27d878e54bce948d97d9252de0 Mon Sep 17 00:00:00 2001 From: Tushar Goel <34160672+TG1999@users.noreply.github.com> Date: Fri, 6 Jun 2025 14:50:32 +0530 Subject: [PATCH 005/390] Prepare for release v36.1.3 (#1900) Signed-off-by: Tushar Goel --- CHANGELOG.rst | 5 +++++ setup.cfg | 2 +- vulnerablecode/__init__.py | 2 +- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 5776daa9b..e138ebef4 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,11 @@ Release notes ============= +Version v36.1.3 +--------------------- + +- Increase docker shared memory size #1896 + Version v36.1.2 --------------------- diff --git a/setup.cfg b/setup.cfg index 81f29d43f..da5c028ef 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = vulnerablecode -version = 36.1.2 +version = 36.1.3 license = Apache-2.0 AND CC-BY-SA-4.0 # description must be on ONE line https://github.com/pypa/setuptools/issues/1390 diff --git a/vulnerablecode/__init__.py b/vulnerablecode/__init__.py index 1ac063667..a67216173 100644 --- a/vulnerablecode/__init__.py +++ b/vulnerablecode/__init__.py @@ -14,7 +14,7 @@ import git -__version__ = "36.1.2" +__version__ = "36.1.3" PROJECT_DIR = Path(__file__).resolve().parent From 651fc0d23a16a22a5de8c5bf010367f045a608b2 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 10 Jun 2025 13:57:59 +0530 Subject: [PATCH 006/390] Bump django from 4.2.21 to 4.2.22 (#1901) Bumps [django](https://github.com/django/django) from 4.2.21 to 4.2.22. - [Commits](https://github.com/django/django/compare/4.2.21...4.2.22) --- updated-dependencies: - dependency-name: django dependency-version: 4.2.22 dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index feeae87a7..357b6fe30 100644 --- a/requirements.txt +++ b/requirements.txt @@ -27,7 +27,7 @@ dateparser==1.1.1 decorator==5.1.1 defusedxml==0.7.1 distro==1.7.0 -Django==4.2.21 +Django==4.2.22 django-crispy-forms==2.3 django-environ==0.11.2 django-filter==24.3 From 739de9d41f973485c8429639b9347eae61d3d1c2 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Thu, 16 Jan 2025 15:43:41 +0530 Subject: [PATCH 007/390] Add TODO model for Advisory Signed-off-by: Keshav Priyadarshi --- .../migrations/0088_advisorytodo.py | 89 +++++++++++++++++++ vulnerabilities/models.py | 55 ++++++++++++ 2 files changed, 144 insertions(+) create mode 100644 vulnerabilities/migrations/0088_advisorytodo.py diff --git a/vulnerabilities/migrations/0088_advisorytodo.py b/vulnerabilities/migrations/0088_advisorytodo.py new file mode 100644 index 000000000..ffc1cf6ac --- /dev/null +++ b/vulnerabilities/migrations/0088_advisorytodo.py @@ -0,0 +1,89 @@ +# Generated by Django 4.2.17 on 2025-01-16 10:47 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("vulnerabilities", "0087_update_alpine_advisory_created_by"), + ] + + operations = [ + migrations.CreateModel( + name="AdvisoryTODO", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, primary_key=True, serialize=False, verbose_name="ID" + ), + ), + ( + "issue_type", + models.CharField( + choices=[ + ("MISSING_AFFECTED_PACKAGE", "Advisory is missing affected package"), + ("MISSING_FIXED_BY_PACKAGE", "Advisory is missing fixed-by package"), + ( + "MISSING_AFFECTED_AND_FIXED_BY_PACKAGES", + "Advisory is missing both affected and fixed-by packages", + ), + ("MISSING_SUMMARY", "Advisory is missing summary"), + ( + "CONFLICTING_FIXED_BY_PACKAGES", + "Advisories have conflicting fixed-by packages", + ), + ( + "CONFLICTING_AFFECTED_PACKAGES", + "Advisories have conflicting affected packages", + ), + ( + "CONFLICTING_AFFECTED_AND_FIXED_BY_PACKAGES", + "Advisories have conflicting affected and fixed-by packages", + ), + ( + "CONFLICTING_SEVERITY_SCORES", + "Advisories have conflicting severity scores", + ), + ], + db_index=True, + help_text="Select the issue that needs to be addressed from the available options.", + max_length=50, + ), + ), + ("issue_detail", models.TextField(help_text="Additional details about the issue.")), + ( + "created_at", + models.DateTimeField( + auto_now_add=True, + help_text="Timestamp indicating when this TODO was created.", + ), + ), + ( + "is_resolved", + models.BooleanField( + db_index=True, default=False, help_text="This TODO is resolved or not." + ), + ), + ( + "resolved_at", + models.DateTimeField( + help_text="Timestamp indicating when this TODO was resolved." + ), + ), + ( + "resolution_detail", + models.TextField(help_text="Additional detail on how this TODO was resolved."), + ), + ( + "advisories", + models.ManyToManyField( + help_text="Advisory/ies where this TODO is applicable.", + related_name="advisory_todos", + to="vulnerabilities.advisory", + ), + ), + ], + ), + ] diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index e1fe7353e..6bd20e95b 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -2258,3 +2258,58 @@ def create_new_job(self, execute_now=False): schedules.clear_job(self.schedule_work_id) return schedules.schedule_execution(self, execute_now) if self.is_active else None +class AdvisoryTODO(models.Model): + """Track the TODOs for advisory/ies that need to be addressed.""" + + ISSUE_TYPE_CHOICES = [ + ("MISSING_AFFECTED_PACKAGE", "Advisory is missing affected package"), + ("MISSING_FIXED_BY_PACKAGE", "Advisory is missing fixed-by package"), + ( + "MISSING_AFFECTED_AND_FIXED_BY_PACKAGES", + "Advisory is missing both affected and fixed-by packages", + ), + ("MISSING_SUMMARY", "Advisory is missing summary"), + ("CONFLICTING_FIXED_BY_PACKAGES", "Advisories have conflicting fixed-by packages"), + ("CONFLICTING_AFFECTED_PACKAGES", "Advisories have conflicting affected packages"), + ( + "CONFLICTING_AFFECTED_AND_FIXED_BY_PACKAGES", + "Advisories have conflicting affected and fixed-by packages", + ), + ("CONFLICTING_SEVERITY_SCORES", "Advisories have conflicting severity scores"), + ] + + issue_type = models.CharField( + max_length=50, + choices=ISSUE_TYPE_CHOICES, + blank=False, + null=False, + db_index=True, + help_text="Select the issue that needs to be addressed from the available options.", + ) + issue_detail = models.TextField( + help_text="Additional details about the issue.", + ) + advisories = models.ManyToManyField( + Advisory, + related_name="advisory_todos", + help_text="Advisory/ies where this TODO is applicable.", + ) + + created_at = models.DateTimeField( + auto_now_add=True, + help_text="Timestamp indicating when this TODO was created.", + ) + + is_resolved = models.BooleanField( + default=False, + db_index=True, + help_text="This TODO is resolved or not.", + ) + + resolved_at = models.DateTimeField( + help_text="Timestamp indicating when this TODO was resolved.", + ) + + resolution_detail = models.TextField( + help_text="Additional detail on how this TODO was resolved.", + ) From 7d38a4f80b6cd2a2fecde89a26e2fe9a37e880b1 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Mon, 27 Jan 2025 23:36:08 +0530 Subject: [PATCH 008/390] Add unique_together constraint for AdvisoryToDo Signed-off-by: Keshav Priyadarshi --- ...8_advisorytodo.py => 0089_advisorytodo.py} | 16 ++++++++++--- vulnerabilities/models.py | 23 +++++++++++++++++-- 2 files changed, 34 insertions(+), 5 deletions(-) rename vulnerabilities/migrations/{0088_advisorytodo.py => 0089_advisorytodo.py} (86%) diff --git a/vulnerabilities/migrations/0088_advisorytodo.py b/vulnerabilities/migrations/0089_advisorytodo.py similarity index 86% rename from vulnerabilities/migrations/0088_advisorytodo.py rename to vulnerabilities/migrations/0089_advisorytodo.py index ffc1cf6ac..fd42c8286 100644 --- a/vulnerabilities/migrations/0088_advisorytodo.py +++ b/vulnerabilities/migrations/0089_advisorytodo.py @@ -1,4 +1,4 @@ -# Generated by Django 4.2.17 on 2025-01-16 10:47 +# Generated by Django 4.2.17 on 2025-01-27 18:04 from django.db import migrations, models @@ -6,12 +6,12 @@ class Migration(migrations.Migration): dependencies = [ - ("vulnerabilities", "0087_update_alpine_advisory_created_by"), + ("vulnerabilities", "0088_fix_alpine_purl_type"), ] operations = [ migrations.CreateModel( - name="AdvisoryTODO", + name="AdvisoryToDo", fields=[ ( "id", @@ -19,6 +19,13 @@ class Migration(migrations.Migration): auto_created=True, primary_key=True, serialize=False, verbose_name="ID" ), ), + ( + "related_advisories_id", + models.CharField( + help_text="SHA1 digest of the unique_content_id field of the applicable advisories.", + max_length=40, + ), + ), ( "issue_type", models.CharField( @@ -85,5 +92,8 @@ class Migration(migrations.Migration): ), ), ], + options={ + "unique_together": {("related_advisories_id", "issue_type")}, + }, ), ] diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index 6bd20e95b..ef11a6975 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -2258,9 +2258,9 @@ def create_new_job(self, execute_now=False): schedules.clear_job(self.schedule_work_id) return schedules.schedule_execution(self, execute_now) if self.is_active else None -class AdvisoryTODO(models.Model): - """Track the TODOs for advisory/ies that need to be addressed.""" + +class AdvisoryTODO(models.Model): ISSUE_TYPE_CHOICES = [ ("MISSING_AFFECTED_PACKAGE", "Advisory is missing affected package"), ("MISSING_FIXED_BY_PACKAGE", "Advisory is missing fixed-by package"), @@ -2278,6 +2278,20 @@ class AdvisoryTODO(models.Model): ("CONFLICTING_SEVERITY_SCORES", "Advisories have conflicting severity scores"), ] + +class AdvisoryToDo(models.Model): + """Track the TODOs for advisory/ies that need to be addressed.""" + + # Since we can not make advisories field (M2M field) unique + # (see https://code.djangoproject.com/ticket/702), we use related_advisories_id + # to avoid creating duplicate issue for same set of advisories, + related_advisories_id = models.CharField( + max_length=40, + blank=False, + null=False, + help_text="SHA1 digest of the unique_content_id field of the applicable advisories.", + ) + issue_type = models.CharField( max_length=50, choices=ISSUE_TYPE_CHOICES, @@ -2286,9 +2300,11 @@ class AdvisoryTODO(models.Model): db_index=True, help_text="Select the issue that needs to be addressed from the available options.", ) + issue_detail = models.TextField( help_text="Additional details about the issue.", ) + advisories = models.ManyToManyField( Advisory, related_name="advisory_todos", @@ -2313,3 +2329,6 @@ class AdvisoryTODO(models.Model): resolution_detail = models.TextField( help_text="Additional detail on how this TODO was resolved.", ) + + class Meta: + unique_together = ("related_advisories_id", "issue_type") From 8f86f468f017277bbf70b4846c5a632e36ad62cb Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Mon, 27 Jan 2025 23:44:48 +0530 Subject: [PATCH 009/390] Move get_versions to fetchcode_utils pipe Signed-off-by: Keshav Priyadarshi --- .../pipelines/flag_ghost_packages.py | 16 +--------- vulnerabilities/pipes/advisory.py | 13 ++++++++ vulnerabilities/pipes/fetchcode_utils.py | 31 +++++++++++++++++++ .../pipelines/test_flag_ghost_packages.py | 4 +-- 4 files changed, 47 insertions(+), 17 deletions(-) create mode 100644 vulnerabilities/pipes/fetchcode_utils.py diff --git a/vulnerabilities/pipelines/flag_ghost_packages.py b/vulnerabilities/pipelines/flag_ghost_packages.py index 7daee4115..da540cd04 100644 --- a/vulnerabilities/pipelines/flag_ghost_packages.py +++ b/vulnerabilities/pipelines/flag_ghost_packages.py @@ -9,15 +9,14 @@ import logging from itertools import groupby -from traceback import format_exc as traceback_format_exc from aboutcode.pipeline import LoopProgress from fetchcode.package_versions import SUPPORTED_ECOSYSTEMS as FETCHCODE_SUPPORTED_ECOSYSTEMS -from fetchcode.package_versions import versions from packageurl import PackageURL from vulnerabilities.models import Package from vulnerabilities.pipelines import VulnerableCodePipeline +from vulnerabilities.pipes.fetchcode_utils import get_versions class FlagGhostPackagePipeline(VulnerableCodePipeline): @@ -89,16 +88,3 @@ def flag_ghost_packages(base_purl, packages, logger=None): pkg.save() return ghost_packages - - -def get_versions(purl, logger=None): - """Return set of known versions for the given purl.""" - try: - return {v.value.lstrip("vV") for v in versions(str(purl))} - except Exception as e: - if logger: - logger( - f"Error while fetching known versions for {purl!s}: {e!r} \n {traceback_format_exc()}", - level=logging.ERROR, - ) - return diff --git a/vulnerabilities/pipes/advisory.py b/vulnerabilities/pipes/advisory.py index dd21bc88c..46f8b1ed3 100644 --- a/vulnerabilities/pipes/advisory.py +++ b/vulnerabilities/pipes/advisory.py @@ -7,12 +7,14 @@ # See https://aboutcode.org for more information about nexB OSS projects. # +import hashlib import logging from datetime import datetime from datetime import timezone from traceback import format_exc as traceback_format_exc from typing import Callable from typing import List +from typing import Union from django.db import transaction from django.db.models.query import QuerySet @@ -183,3 +185,14 @@ def import_advisory( advisory.date_imported = datetime.now(timezone.utc) advisory.save() + + +def advisories_checksum(advisories: Union[Advisory, List[Advisory]]) -> str: + if isinstance(advisories, Advisory): + advisories = [advisories] + + contents = sorted([advisory.unique_content_id for advisory in advisories]) + combined_contents = "".join(contents) + + checksum = hashlib.sha1(combined_contents.encode()) + return checksum.hexdigest() diff --git a/vulnerabilities/pipes/fetchcode_utils.py b/vulnerabilities/pipes/fetchcode_utils.py new file mode 100644 index 000000000..b6ae43814 --- /dev/null +++ b/vulnerabilities/pipes/fetchcode_utils.py @@ -0,0 +1,31 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import logging +from traceback import format_exc as traceback_format_exc +from typing import Callable + +from fetchcode.package_versions import SUPPORTED_ECOSYSTEMS as FETCHCODE_SUPPORTED_ECOSYSTEMS +from fetchcode.package_versions import versions +from packageurl import PackageURL + + +def get_versions(purl: PackageURL, logger: Callable = None): + """Return set of known versions for the given purl.""" + if purl.type not in FETCHCODE_SUPPORTED_ECOSYSTEMS: + return + + try: + return {v.value.lstrip("vV") for v in versions(str(purl))} + except Exception as e: + if logger: + logger( + f"Error while fetching known versions for {purl!s}: {e!r} \n {traceback_format_exc()}", + level=logging.ERROR, + ) diff --git a/vulnerabilities/tests/pipelines/test_flag_ghost_packages.py b/vulnerabilities/tests/pipelines/test_flag_ghost_packages.py index 192901c36..1f66878c9 100644 --- a/vulnerabilities/tests/pipelines/test_flag_ghost_packages.py +++ b/vulnerabilities/tests/pipelines/test_flag_ghost_packages.py @@ -23,7 +23,7 @@ class FlagGhostPackagePipelineTest(TestCase): data = Path(__file__).parent.parent / "test_data" - @mock.patch("vulnerabilities.pipelines.flag_ghost_packages.versions") + @mock.patch("vulnerabilities.pipes.fetchcode_utils.versions") def test_flag_ghost_package(self, mock_fetchcode_versions): Package.objects.create(type="pypi", name="foo", version="2.3.0") Package.objects.create(type="pypi", name="foo", version="3.0.0") @@ -43,7 +43,7 @@ def test_flag_ghost_package(self, mock_fetchcode_versions): self.assertEqual(1, flagged_package_count) self.assertEqual(1, Package.objects.filter(is_ghost=True).count()) - @mock.patch("vulnerabilities.pipelines.flag_ghost_packages.versions") + @mock.patch("vulnerabilities.pipes.fetchcode_utils.versions") def test_detect_and_flag_ghost_packages(self, mock_fetchcode_versions): Package.objects.create(type="pypi", name="foo", version="2.3.0") Package.objects.create(type="pypi", name="foo", version="3.0.0") From 00c92aaab2cc6bf461d569dd6d0ade92caeffd8e Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Mon, 27 Jan 2025 23:46:20 +0530 Subject: [PATCH 010/390] Add pipeline to compute Advisory ToDos Signed-off-by: Keshav Priyadarshi --- vulnerabilities/improvers/__init__.py | 2 + .../pipelines/compute_advisory_todo.py | 271 ++++++++++++++++++ 2 files changed, 273 insertions(+) create mode 100644 vulnerabilities/pipelines/compute_advisory_todo.py diff --git a/vulnerabilities/improvers/__init__.py b/vulnerabilities/improvers/__init__.py index 9e36ce5f0..08cce6ff9 100644 --- a/vulnerabilities/improvers/__init__.py +++ b/vulnerabilities/improvers/__init__.py @@ -12,6 +12,7 @@ from vulnerabilities.pipelines import VulnerableCodePipeline from vulnerabilities.pipelines import add_cvss31_to_CVEs from vulnerabilities.pipelines import collect_commits +from vulnerabilities.pipelines import compute_advisory_todo from vulnerabilities.pipelines import compute_package_risk from vulnerabilities.pipelines import compute_package_version_rank from vulnerabilities.pipelines import enhance_with_exploitdb @@ -49,6 +50,7 @@ add_cvss31_to_CVEs.CVEAdvisoryMappingPipeline, remove_duplicate_advisories.RemoveDuplicateAdvisoriesPipeline, populate_vulnerability_summary_pipeline.PopulateVulnerabilitySummariesPipeline, + compute_advisory_todo.ComputeToDo, ] IMPROVERS_REGISTRY = { diff --git a/vulnerabilities/pipelines/compute_advisory_todo.py b/vulnerabilities/pipelines/compute_advisory_todo.py new file mode 100644 index 000000000..0f9207f6c --- /dev/null +++ b/vulnerabilities/pipelines/compute_advisory_todo.py @@ -0,0 +1,271 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + + +from aboutcode.pipeline import LoopProgress + +from vulnerabilities.models import Advisory +from vulnerabilities.models import AdvisoryToDo +from vulnerabilities.models import Alias +from vulnerabilities.pipelines import VulnerableCodePipeline +from vulnerabilities.pipes import fetchcode_utils +from vulnerabilities.pipes.advisory import advisories_checksum + + +class ComputeToDo(VulnerableCodePipeline): + """Compute advisory AdvisoryToDo.""" + + pipeline_id = "compute_advisory_todo" + + @classmethod + def steps(cls): + return ( + cls.compute_individual_advisory_todo, + cls.detect_conflicting_advisories, + ) + + def compute_individual_advisory_todo(self): + advisories = Advisory.objects.all().paginated() + advisories_count = Advisory.objects.all().count() + + self.log( + f"Checking missing summary, affected and fixed packages in {advisories_count} Advisories" + ) + progress = LoopProgress( + total_iterations=advisories_count, + logger=self.log, + progress_step=1, + ) + for advisory in progress.iter(advisories): + advisory_todo_id = advisories_checksum(advisories=advisory) + check_missing_summary( + advisory=advisory, + todo_id=advisory_todo_id, + logger=self.log, + ) + check_missing_affected_and_fixed_by_packages( + advisory=advisory, + todo_id=advisory_todo_id, + logger=self.log, + ) + + def detect_conflicting_advisories(self): + PACKAGE_VERSIONS = {} + aliases = Alias.objects.filter(alias__istartswith="cve") + aliases_count = aliases.count() + + self.log(f"Cross validating advisory affected and fixed package for {aliases_count} CVEs") + + progress = LoopProgress(total_iterations=aliases_count, logger=self.log) + for alias in progress.iter(aliases.paginated()): + advisories = ( + Advisory.objects.filter(aliases__contains=alias.alias) + .exclude(advisory_todos__issue_type="MISSING_AFFECTED_AND_FIXED_BY_PACKAGES") + .distinct() + ) + purls = get_advisories_purls(advisories=advisories) + get_package_versions( + purls=purls, + package_versions=PACKAGE_VERSIONS, + logger=self.log, + ) + check_conflicting_affected_and_fixed_by_packages( + advisories=advisories, + package_versions=PACKAGE_VERSIONS, + purls=purls, + cve=alias, + logger=self.log, + ) + + +def check_missing_summary(advisory, todo_id, logger=None): + if not advisory.summary: + todo, created = AdvisoryToDo.objects.get_or_create( + unique_todo_id=todo_id, + issue_type="MISSING_SUMMARY", + issue_detail="", + ) + if created: + todo.advisories.add(advisory) + + +def check_missing_affected_and_fixed_by_packages(advisory, todo_id, logger=None): + """ + Check for missing affected or fixed-by packages in the advisory + and create appropriate AdvisoryToDo. + + - If both affected and fixed packages are missing add `MISSING_AFFECTED_AND_FIXED_BY_PACKAGES`. + - If only the affected package is missing add `MISSING_AFFECTED_PACKAGE`. + - If only the fixed package is missing add `MISSING_FIXED_BY_PACKAGE`. + """ + has_affected_package = False + has_fixed_package = False + for affected in advisory.to_advisory_data().affected_packages or []: + if has_affected_package and has_fixed_package: + break + if not has_affected_package and affected.affected_version_range: + has_affected_package = True + if not has_fixed_package and affected.fixed_version: + has_fixed_package = True + + if has_affected_package and has_fixed_package: + return + + if not has_affected_package and not has_fixed_package: + issue_type = "MISSING_AFFECTED_AND_FIXED_BY_PACKAGES" + elif not has_affected_package: + issue_type = "MISSING_AFFECTED_PACKAGE" + elif has_fixed_package: + issue_type = "MISSING_FIXED_BY_PACKAGE" + todo, created = AdvisoryToDo.objects.get_or_create( + unique_todo_id=todo_id, + issue_type=issue_type, + issue_detail="", + ) + if created: + todo.advisories.add(advisory) + + +def get_package_versions(purls, package_versions, logger=None): + for purl in purls: + if purl in package_versions: + continue + versions = fetchcode_utils.versions(purl=purl, logger=logger) + package_versions[purl] = versions + + +def get_advisories_purls(advisories): + purls = set() + for advisory in advisories: + advisory_obj = advisory.to_advisory_data() + purls.update([str(i.package) for i in advisory_obj.affected_packages]) + return purls + + +def check_conflicting_affected_and_fixed_by_packages( + advisories, package_versions, purls, cve, logger=None +): + """ + Add appropriate AdvisoryToDo for conflicting affected/fixed packages. + + Compute the comparison matrix for the given set of advisories. Iterate through each advisory + and compute and store fixed versions and normalized affected versions for each advisory, + keyed by purl. + + Use the matrix to determine conflicts in affected/fixed versions for each purl. If for any purl + there is more than one set of fixed versions or more than one set of affected versions, + it means the advisories have conflicting opinions on the fixed or affected packages. + + Example of comparison matrix: + { + "pkg:npm/foo/bar": { + "affected": { + Advisory1: frozenset(NormalizedVersionRange1, NormalizedVersionRange2), + Advisory2: frozenset(...), + }, + "fixed": { + Advisory1: frozenset(Version1, Version2), + Advisory2: frozenset(...), + }, + }, + "pkg:pypi/foobar": { + "affected": { + Advisory1: frozenset(...), + Advisory2: frozenset(...), + }, + "fixed": { + Advisory1: frozenset(...), + Advisory2: frozenset(...), + }, + }, + ... + } + """ + matrix = {} + for advisory in advisories: + advisory_obj = advisory.to_advisory_data() + for affected in advisory_obj.affected_packages or []: + affected_purl = str(affected.package) + + if affected_purl not in purls or not purls[affected_purl]: + continue + + initialize_sub_matrix( + matrix=matrix, + affected_purl=affected_purl, + advisory=advisory, + ) + + if fixed_version := affected.fixed_version: + matrix[affected_purl]["fixed"][advisory].add(fixed_version) + + if affected.affected_version_range: + normalized_vers = affected.affected_version_range.normalize( + known_versions=package_versions[affected_purl], + ) + matrix[affected_purl]["affected"][advisory].add(normalized_vers) + + has_conflicting_affected_packages = False + has_conflicting_fixed_package = False + messages = [] + for purl, board in matrix.items(): + fixed = board.get("fixed", {}).values() + affected = board.get("affected", {}).values() + + # Compare affected_vers set across different advisories. + unique_set_of_affected_vers = {frozenset(vers) for vers in affected} + + # Compare fixed_version set across different advisories. + unique_set_of_fixed_versions = {frozenset(versions) for versions in fixed} + + if len(unique_set_of_affected_vers) > 1: + has_conflicting_affected_packages = True + messages.append( + f"{cve}: {purl} with conflicting affected versions {unique_set_of_affected_vers}" + ) + if len(unique_set_of_fixed_versions) > 1: + has_conflicting_fixed_package = True + messages.append( + f"{cve}: {purl} with conflicting fixed version {unique_set_of_fixed_versions}" + ) + + if not has_conflicting_affected_packages and not has_conflicting_fixed_package: + return + + issue_type = "CONFLICTING_AFFECTED_AND_FIXED_BY_PACKAGES" + if not has_conflicting_fixed_package: + issue_type = "CONFLICTING_AFFECTED_PACKAGES" + elif not has_conflicting_affected_packages: + issue_type = "CONFLICTING_FIXED_BY_PACKAGES" + + todo_id = advisories_checksum(advisories) + todo, created = AdvisoryToDo.objects.get_or_create( + unique_todo_id=todo_id, + issue_type=issue_type, + issue_detail="\n".join(messages), + ) + if created: + todo.advisories.add(*advisories) + + +def initialize_sub_matrix(matrix, affected_purl, advisory): + if affected_purl not in matrix: + matrix[affected_purl] = { + "affected": { + advisory: set(), + }, + "fixed": { + advisory: set(), + }, + } + else: + if advisory not in matrix[affected_purl]["affected"]: + matrix[affected_purl]["affected"] = set() + if advisory not in matrix[affected_purl]["fixed"]: + matrix[affected_purl]["fixed"] = set() From 322c966c99e481b385877cec678d3b8247e49573 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Wed, 4 Jun 2025 00:52:33 +0700 Subject: [PATCH 011/390] Resolve migration conflict Signed-off-by: Keshav Priyadarshi --- ...9_advisorytodo.py => 0093_advisorytodo.py} | 4 +-- vulnerabilities/models.py | 33 +++++++++---------- 2 files changed, 18 insertions(+), 19 deletions(-) rename vulnerabilities/migrations/{0089_advisorytodo.py => 0093_advisorytodo.py} (97%) diff --git a/vulnerabilities/migrations/0089_advisorytodo.py b/vulnerabilities/migrations/0093_advisorytodo.py similarity index 97% rename from vulnerabilities/migrations/0089_advisorytodo.py rename to vulnerabilities/migrations/0093_advisorytodo.py index fd42c8286..59b87636b 100644 --- a/vulnerabilities/migrations/0089_advisorytodo.py +++ b/vulnerabilities/migrations/0093_advisorytodo.py @@ -1,4 +1,4 @@ -# Generated by Django 4.2.17 on 2025-01-27 18:04 +# Generated by Django 4.2.20 on 2025-06-03 17:36 from django.db import migrations, models @@ -6,7 +6,7 @@ class Migration(migrations.Migration): dependencies = [ - ("vulnerabilities", "0088_fix_alpine_purl_type"), + ("vulnerabilities", "0092_pipelineschedule_pipelinerun"), ] operations = [ diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index ef11a6975..b1cdb27fb 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -2260,23 +2260,22 @@ def create_new_job(self, execute_now=False): return schedules.schedule_execution(self, execute_now) if self.is_active else None -class AdvisoryTODO(models.Model): - ISSUE_TYPE_CHOICES = [ - ("MISSING_AFFECTED_PACKAGE", "Advisory is missing affected package"), - ("MISSING_FIXED_BY_PACKAGE", "Advisory is missing fixed-by package"), - ( - "MISSING_AFFECTED_AND_FIXED_BY_PACKAGES", - "Advisory is missing both affected and fixed-by packages", - ), - ("MISSING_SUMMARY", "Advisory is missing summary"), - ("CONFLICTING_FIXED_BY_PACKAGES", "Advisories have conflicting fixed-by packages"), - ("CONFLICTING_AFFECTED_PACKAGES", "Advisories have conflicting affected packages"), - ( - "CONFLICTING_AFFECTED_AND_FIXED_BY_PACKAGES", - "Advisories have conflicting affected and fixed-by packages", - ), - ("CONFLICTING_SEVERITY_SCORES", "Advisories have conflicting severity scores"), - ] +ISSUE_TYPE_CHOICES = [ + ("MISSING_AFFECTED_PACKAGE", "Advisory is missing affected package"), + ("MISSING_FIXED_BY_PACKAGE", "Advisory is missing fixed-by package"), + ( + "MISSING_AFFECTED_AND_FIXED_BY_PACKAGES", + "Advisory is missing both affected and fixed-by packages", + ), + ("MISSING_SUMMARY", "Advisory is missing summary"), + ("CONFLICTING_FIXED_BY_PACKAGES", "Advisories have conflicting fixed-by packages"), + ("CONFLICTING_AFFECTED_PACKAGES", "Advisories have conflicting affected packages"), + ( + "CONFLICTING_AFFECTED_AND_FIXED_BY_PACKAGES", + "Advisories have conflicting affected and fixed-by packages", + ), + ("CONFLICTING_SEVERITY_SCORES", "Advisories have conflicting severity scores"), +] class AdvisoryToDo(models.Model): From dcd5d703fc395d476628fd6be4d57eae98247ba0 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Wed, 4 Jun 2025 03:03:31 +0700 Subject: [PATCH 012/390] Use advisory related aliases to compare conflicting advisories Signed-off-by: Keshav Priyadarshi --- .../migrations/0093_advisorytodo.py | 15 +++++++--- vulnerabilities/models.py | 8 +++++ .../pipelines/compute_advisory_todo.py | 29 ++++++++++++------- 3 files changed, 37 insertions(+), 15 deletions(-) diff --git a/vulnerabilities/migrations/0093_advisorytodo.py b/vulnerabilities/migrations/0093_advisorytodo.py index 59b87636b..f380d02c5 100644 --- a/vulnerabilities/migrations/0093_advisorytodo.py +++ b/vulnerabilities/migrations/0093_advisorytodo.py @@ -1,4 +1,4 @@ -# Generated by Django 4.2.20 on 2025-06-03 17:36 +# Generated by Django 4.2.20 on 2025-06-03 18:13 from django.db import migrations, models @@ -59,7 +59,10 @@ class Migration(migrations.Migration): max_length=50, ), ), - ("issue_detail", models.TextField(help_text="Additional details about the issue.")), + ( + "issue_detail", + models.TextField(blank=True, help_text="Additional details about the issue."), + ), ( "created_at", models.DateTimeField( @@ -76,12 +79,16 @@ class Migration(migrations.Migration): ( "resolved_at", models.DateTimeField( - help_text="Timestamp indicating when this TODO was resolved." + blank=True, + help_text="Timestamp indicating when this TODO was resolved.", + null=True, ), ), ( "resolution_detail", - models.TextField(help_text="Additional detail on how this TODO was resolved."), + models.TextField( + blank=True, help_text="Additional detail on how this TODO was resolved." + ), ), ( "advisories", diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index b1cdb27fb..2da3cc665 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -2301,6 +2301,7 @@ class AdvisoryToDo(models.Model): ) issue_detail = models.TextField( + blank=True, help_text="Additional details about the issue.", ) @@ -2322,12 +2323,19 @@ class AdvisoryToDo(models.Model): ) resolved_at = models.DateTimeField( + null=True, + blank=True, help_text="Timestamp indicating when this TODO was resolved.", ) resolution_detail = models.TextField( + blank=True, help_text="Additional detail on how this TODO was resolved.", ) class Meta: unique_together = ("related_advisories_id", "issue_type") + + def save(self, *args, **kwargs): + self.full_clean() + return super().save(*args, **kwargs) diff --git a/vulnerabilities/pipelines/compute_advisory_todo.py b/vulnerabilities/pipelines/compute_advisory_todo.py index 0f9207f6c..ce85cac09 100644 --- a/vulnerabilities/pipelines/compute_advisory_todo.py +++ b/vulnerabilities/pipelines/compute_advisory_todo.py @@ -31,7 +31,7 @@ def steps(cls): ) def compute_individual_advisory_todo(self): - advisories = Advisory.objects.all().paginated() + advisories = Advisory.objects.all().iterator(chunk_size=2000) advisories_count = Advisory.objects.all().count() self.log( @@ -62,10 +62,14 @@ def detect_conflicting_advisories(self): self.log(f"Cross validating advisory affected and fixed package for {aliases_count} CVEs") - progress = LoopProgress(total_iterations=aliases_count, logger=self.log) - for alias in progress.iter(aliases.paginated()): + progress = LoopProgress( + total_iterations=aliases_count, + logger=self.log, + progress_step=1, + ) + for alias in progress.iter(aliases.iterator(chunk_size=2000)): advisories = ( - Advisory.objects.filter(aliases__contains=alias.alias) + Advisory.objects.filter(aliases__in=aliases) .exclude(advisory_todos__issue_type="MISSING_AFFECTED_AND_FIXED_BY_PACKAGES") .distinct() ) @@ -87,9 +91,8 @@ def detect_conflicting_advisories(self): def check_missing_summary(advisory, todo_id, logger=None): if not advisory.summary: todo, created = AdvisoryToDo.objects.get_or_create( - unique_todo_id=todo_id, + related_advisories_id=todo_id, issue_type="MISSING_SUMMARY", - issue_detail="", ) if created: todo.advisories.add(advisory) @@ -107,6 +110,9 @@ def check_missing_affected_and_fixed_by_packages(advisory, todo_id, logger=None) has_affected_package = False has_fixed_package = False for affected in advisory.to_advisory_data().affected_packages or []: + if not affected: + continue + if has_affected_package and has_fixed_package: break if not has_affected_package and affected.affected_version_range: @@ -121,12 +127,11 @@ def check_missing_affected_and_fixed_by_packages(advisory, todo_id, logger=None) issue_type = "MISSING_AFFECTED_AND_FIXED_BY_PACKAGES" elif not has_affected_package: issue_type = "MISSING_AFFECTED_PACKAGE" - elif has_fixed_package: + elif not has_fixed_package: issue_type = "MISSING_FIXED_BY_PACKAGE" todo, created = AdvisoryToDo.objects.get_or_create( - unique_todo_id=todo_id, + related_advisories_id=todo_id, issue_type=issue_type, - issue_detail="", ) if created: todo.advisories.add(advisory) @@ -246,9 +251,11 @@ def check_conflicting_affected_and_fixed_by_packages( todo_id = advisories_checksum(advisories) todo, created = AdvisoryToDo.objects.get_or_create( - unique_todo_id=todo_id, + related_advisories_id=todo_id, issue_type=issue_type, - issue_detail="\n".join(messages), + defaults={ + "issue_details": "\n".join(messages), + }, ) if created: todo.advisories.add(*advisories) From 39c28e348bd07a96016adf067bc8d79dcbacea69 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Wed, 11 Jun 2025 00:21:14 +0530 Subject: [PATCH 013/390] Use PURL type to get version class Signed-off-by: Keshav Priyadarshi --- vulnerabilities/importer.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/vulnerabilities/importer.py b/vulnerabilities/importer.py index 933c19edc..759ec9330 100644 --- a/vulnerabilities/importer.py +++ b/vulnerabilities/importer.py @@ -268,9 +268,13 @@ def from_dict(cls, affected_pkg: dict): return fixed_version = affected_pkg["fixed_version"] - if fixed_version and affected_version_range: - # TODO: revisit after https://github.com/nexB/univers/issues/10 - fixed_version = affected_version_range.version_class(fixed_version) + if fixed_version: + if affected_version_range: + # TODO: revisit after https://github.com/nexB/univers/issues/10 + fixed_version = affected_version_range.version_class(fixed_version) + elif package.type in RANGE_CLASS_BY_SCHEMES: + vrc = RANGE_CLASS_BY_SCHEMES[package.type] + fixed_version = vrc.version_class(fixed_version) if not fixed_version and not affected_version_range: logger.error( From 3683f2ce78365857b1777dbd061a8ef623f2f645 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Wed, 11 Jun 2025 00:27:56 +0530 Subject: [PATCH 014/390] Include comparison matrix in issue detail Signed-off-by: Keshav Priyadarshi --- .../pipelines/compute_advisory_todo.py | 84 +++++++------------ 1 file changed, 28 insertions(+), 56 deletions(-) diff --git a/vulnerabilities/pipelines/compute_advisory_todo.py b/vulnerabilities/pipelines/compute_advisory_todo.py index ce85cac09..5b06c279e 100644 --- a/vulnerabilities/pipelines/compute_advisory_todo.py +++ b/vulnerabilities/pipelines/compute_advisory_todo.py @@ -8,18 +8,19 @@ # +import json + from aboutcode.pipeline import LoopProgress from vulnerabilities.models import Advisory from vulnerabilities.models import AdvisoryToDo from vulnerabilities.models import Alias from vulnerabilities.pipelines import VulnerableCodePipeline -from vulnerabilities.pipes import fetchcode_utils from vulnerabilities.pipes.advisory import advisories_checksum class ComputeToDo(VulnerableCodePipeline): - """Compute advisory AdvisoryToDo.""" + """Compute ToDos for Advisory.""" pipeline_id = "compute_advisory_todo" @@ -56,7 +57,6 @@ def compute_individual_advisory_todo(self): ) def detect_conflicting_advisories(self): - PACKAGE_VERSIONS = {} aliases = Alias.objects.filter(alias__istartswith="cve") aliases_count = aliases.count() @@ -68,21 +68,12 @@ def detect_conflicting_advisories(self): progress_step=1, ) for alias in progress.iter(aliases.iterator(chunk_size=2000)): - advisories = ( - Advisory.objects.filter(aliases__in=aliases) - .exclude(advisory_todos__issue_type="MISSING_AFFECTED_AND_FIXED_BY_PACKAGES") - .distinct() - ) - purls = get_advisories_purls(advisories=advisories) - get_package_versions( - purls=purls, - package_versions=PACKAGE_VERSIONS, - logger=self.log, - ) + advisories = alias.advisories.exclude( + advisory_todos__issue_type="MISSING_AFFECTED_AND_FIXED_BY_PACKAGES" + ).distinct() + check_conflicting_affected_and_fixed_by_packages( advisories=advisories, - package_versions=PACKAGE_VERSIONS, - purls=purls, cve=alias, logger=self.log, ) @@ -137,30 +128,12 @@ def check_missing_affected_and_fixed_by_packages(advisory, todo_id, logger=None) todo.advisories.add(advisory) -def get_package_versions(purls, package_versions, logger=None): - for purl in purls: - if purl in package_versions: - continue - versions = fetchcode_utils.versions(purl=purl, logger=logger) - package_versions[purl] = versions - - -def get_advisories_purls(advisories): - purls = set() - for advisory in advisories: - advisory_obj = advisory.to_advisory_data() - purls.update([str(i.package) for i in advisory_obj.affected_packages]) - return purls - - -def check_conflicting_affected_and_fixed_by_packages( - advisories, package_versions, purls, cve, logger=None -): +def check_conflicting_affected_and_fixed_by_packages(advisories, cve, logger=None): """ Add appropriate AdvisoryToDo for conflicting affected/fixed packages. Compute the comparison matrix for the given set of advisories. Iterate through each advisory - and compute and store fixed versions and normalized affected versions for each advisory, + and compute and store fixed versions and affected versionrange for each advisory, keyed by purl. Use the matrix to determine conflicts in affected/fixed versions for each purl. If for any purl @@ -171,7 +144,7 @@ def check_conflicting_affected_and_fixed_by_packages( { "pkg:npm/foo/bar": { "affected": { - Advisory1: frozenset(NormalizedVersionRange1, NormalizedVersionRange2), + Advisory1: frozenset(VersionRange1, VersionRange2), Advisory2: frozenset(...), }, "fixed": { @@ -195,11 +168,11 @@ def check_conflicting_affected_and_fixed_by_packages( matrix = {} for advisory in advisories: advisory_obj = advisory.to_advisory_data() + advisory_id = advisory.unique_content_id for affected in advisory_obj.affected_packages or []: - affected_purl = str(affected.package) - - if affected_purl not in purls or not purls[affected_purl]: + if not affected: continue + affected_purl = str(affected.package) initialize_sub_matrix( matrix=matrix, @@ -208,13 +181,12 @@ def check_conflicting_affected_and_fixed_by_packages( ) if fixed_version := affected.fixed_version: - matrix[affected_purl]["fixed"][advisory].add(fixed_version) + matrix[affected_purl]["fixed"][advisory_id].add(str(fixed_version)) if affected.affected_version_range: - normalized_vers = affected.affected_version_range.normalize( - known_versions=package_versions[affected_purl], + matrix[affected_purl]["affected"][advisory_id].add( + str(affected.affected_version_range) ) - matrix[affected_purl]["affected"][advisory].add(normalized_vers) has_conflicting_affected_packages = False has_conflicting_fixed_package = False @@ -223,22 +195,19 @@ def check_conflicting_affected_and_fixed_by_packages( fixed = board.get("fixed", {}).values() affected = board.get("affected", {}).values() - # Compare affected_vers set across different advisories. unique_set_of_affected_vers = {frozenset(vers) for vers in affected} - - # Compare fixed_version set across different advisories. unique_set_of_fixed_versions = {frozenset(versions) for versions in fixed} if len(unique_set_of_affected_vers) > 1: has_conflicting_affected_packages = True + conflicting_affected = json.dumps(unique_set_of_affected_vers, default=list) messages.append( - f"{cve}: {purl} with conflicting affected versions {unique_set_of_affected_vers}" + f"{cve}: {purl} with conflicting affected versions {conflicting_affected}" ) if len(unique_set_of_fixed_versions) > 1: has_conflicting_fixed_package = True - messages.append( - f"{cve}: {purl} with conflicting fixed version {unique_set_of_fixed_versions}" - ) + conflicting_fixed = json.dumps(unique_set_of_fixed_versions, default=list) + messages.append(f"{cve}: {purl} with conflicting fixed version {conflicting_fixed}") if not has_conflicting_affected_packages and not has_conflicting_fixed_package: return @@ -249,12 +218,14 @@ def check_conflicting_affected_and_fixed_by_packages( elif not has_conflicting_affected_packages: issue_type = "CONFLICTING_FIXED_BY_PACKAGES" + messages.append("Comparison matrix:") + messages.append(json.dumps(matrix, indent=2, default=list)) todo_id = advisories_checksum(advisories) todo, created = AdvisoryToDo.objects.get_or_create( related_advisories_id=todo_id, issue_type=issue_type, defaults={ - "issue_details": "\n".join(messages), + "issue_detail": "\n".join(messages), }, ) if created: @@ -262,17 +233,18 @@ def check_conflicting_affected_and_fixed_by_packages( def initialize_sub_matrix(matrix, affected_purl, advisory): + advisory_id = advisory.unique_content_id if affected_purl not in matrix: matrix[affected_purl] = { "affected": { - advisory: set(), + advisory_id: set(), }, "fixed": { - advisory: set(), + advisory_id: set(), }, } else: if advisory not in matrix[affected_purl]["affected"]: - matrix[affected_purl]["affected"] = set() + matrix[affected_purl]["affected"][advisory_id] = set() if advisory not in matrix[affected_purl]["fixed"]: - matrix[affected_purl]["fixed"] = set() + matrix[affected_purl]["fixed"][advisory_id] = set() From 095691052724f7595e1d29b0d0de43dfc8dc42d9 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Wed, 11 Jun 2025 12:44:31 +0530 Subject: [PATCH 015/390] Use advisory JSONField to compute missing fix and affected Signed-off-by: Keshav Priyadarshi --- .../pipelines/compute_advisory_todo.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/vulnerabilities/pipelines/compute_advisory_todo.py b/vulnerabilities/pipelines/compute_advisory_todo.py index 5b06c279e..7087bfa45 100644 --- a/vulnerabilities/pipelines/compute_advisory_todo.py +++ b/vulnerabilities/pipelines/compute_advisory_todo.py @@ -32,7 +32,7 @@ def steps(cls): ) def compute_individual_advisory_todo(self): - advisories = Advisory.objects.all().iterator(chunk_size=2000) + advisories = Advisory.objects.all().iterator(chunk_size=5000) advisories_count = Advisory.objects.all().count() self.log( @@ -50,6 +50,7 @@ def compute_individual_advisory_todo(self): todo_id=advisory_todo_id, logger=self.log, ) + check_missing_affected_and_fixed_by_packages( advisory=advisory, todo_id=advisory_todo_id, @@ -100,15 +101,15 @@ def check_missing_affected_and_fixed_by_packages(advisory, todo_id, logger=None) """ has_affected_package = False has_fixed_package = False - for affected in advisory.to_advisory_data().affected_packages or []: + for affected in advisory.affected_packages or []: if not affected: continue if has_affected_package and has_fixed_package: break - if not has_affected_package and affected.affected_version_range: + if not has_affected_package and affected["affected_version_range"]: has_affected_package = True - if not has_fixed_package and affected.fixed_version: + if not has_fixed_package and affected["fixed_version"]: has_fixed_package = True if has_affected_package and has_fixed_package: @@ -236,12 +237,8 @@ def initialize_sub_matrix(matrix, affected_purl, advisory): advisory_id = advisory.unique_content_id if affected_purl not in matrix: matrix[affected_purl] = { - "affected": { - advisory_id: set(), - }, - "fixed": { - advisory_id: set(), - }, + "affected": {advisory_id: set()}, + "fixed": {advisory_id: set()}, } else: if advisory not in matrix[affected_purl]["affected"]: From f0176874892fd90092e3d9873195bfef2a41259c Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Wed, 11 Jun 2025 15:35:23 +0530 Subject: [PATCH 016/390] Add tests for Advisory ToDos Signed-off-by: Keshav Priyadarshi --- .../pipelines/test_compute_advisory_todo.py | 167 ++++++++++++++++++ 1 file changed, 167 insertions(+) create mode 100644 vulnerabilities/tests/pipelines/test_compute_advisory_todo.py diff --git a/vulnerabilities/tests/pipelines/test_compute_advisory_todo.py b/vulnerabilities/tests/pipelines/test_compute_advisory_todo.py new file mode 100644 index 000000000..081d38b07 --- /dev/null +++ b/vulnerabilities/tests/pipelines/test_compute_advisory_todo.py @@ -0,0 +1,167 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +from datetime import datetime + +from django.test import TestCase +from packageurl import PackageURL + +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.importer import AffectedPackage +from vulnerabilities.importer import Reference +from vulnerabilities.models import Advisory +from vulnerabilities.models import AdvisoryToDo +from vulnerabilities.models import Alias +from vulnerabilities.pipelines.compute_advisory_todo import ComputeToDo + + +class TestComputeToDo(TestCase): + def setUp(self): + self.advisory_data1 = AdvisoryData( + summary="Test summary", + affected_packages=[ + AffectedPackage( + package=PackageURL(type="npm", name="package1"), + affected_version_range="vers:npm/>=1.0.0|<2.0.0", + fixed_version="2.0.0", + ) + ], + references=[Reference(url="https://example.com/vuln1")], + url="https://test.url/", + ) + + self.advisory_data2 = AdvisoryData( + summary="Test summary", + affected_packages=[ + AffectedPackage( + package=PackageURL(type="npm", name="package1"), + affected_version_range="vers:npm/>=1.0.0|<2.0.0", + ) + ], + references=[Reference(url="https://example.com/vuln1")], + url="https://test.url/", + ) + + self.advisory_data3 = AdvisoryData( + summary="Test summary", + affected_packages=[ + AffectedPackage( + package=PackageURL(type="npm", name="package1"), + fixed_version="2.0.0", + ) + ], + references=[Reference(url="https://example.com/vuln1")], + url="https://test.url/", + ) + + self.advisory_data4 = AdvisoryData( + summary="Test summary", + affected_packages=[ + AffectedPackage( + package=PackageURL(type="npm", name="package1"), + affected_version_range="vers:npm/>=1.0.0|<=2.0.0", + fixed_version="2.0.1", + ) + ], + references=[Reference(url="https://example.com/vuln1")], + url="https://test.url/", + ) + + def test_advisory_todo_missing_summary(self): + date = datetime.now() + Advisory.objects.create( + unique_content_id="test_id", + url=self.advisory_data1.url, + summary="", + affected_packages=[pkg.to_dict() for pkg in self.advisory_data1.affected_packages], + references=[ref.to_dict() for ref in self.advisory_data1.references], + date_imported=date, + date_collected=date, + created_by="test_pipeline", + ) + pipeline = ComputeToDo() + pipeline.execute() + + todos = AdvisoryToDo.objects.first() + self.assertEqual(1, AdvisoryToDo.objects.count()) + self.assertEqual("MISSING_SUMMARY", todos.issue_type) + + def test_advisory_todo_missing_fixed(self): + date = datetime.now() + Advisory.objects.create( + unique_content_id="test_id", + url=self.advisory_data2.url, + summary=self.advisory_data2.summary, + affected_packages=[pkg.to_dict() for pkg in self.advisory_data2.affected_packages], + references=[ref.to_dict() for ref in self.advisory_data2.references], + date_imported=date, + date_collected=date, + created_by="test_pipeline", + ) + pipeline = ComputeToDo() + pipeline.execute() + + todos = AdvisoryToDo.objects.first() + self.assertEqual(1, AdvisoryToDo.objects.count()) + self.assertEqual("MISSING_FIXED_BY_PACKAGE", todos.issue_type) + + def test_advisory_todo_missing_affected(self): + date = datetime.now() + Advisory.objects.create( + unique_content_id="test_id", + url=self.advisory_data3.url, + summary=self.advisory_data3.summary, + affected_packages=[pkg.to_dict() for pkg in self.advisory_data3.affected_packages], + references=[ref.to_dict() for ref in self.advisory_data3.references], + date_imported=date, + date_collected=date, + created_by="test_pipeline", + ) + pipeline = ComputeToDo() + pipeline.execute() + + todos = AdvisoryToDo.objects.first() + self.assertEqual(1, AdvisoryToDo.objects.count()) + self.assertEqual("MISSING_AFFECTED_PACKAGE", todos.issue_type) + + def test_advisory_todo_conflicting_fixed_affected(self): + alias = Alias.objects.create(alias="CVE-0000-0000") + date = datetime.now() + adv1 = Advisory.objects.create( + unique_content_id="test_id1", + url=self.advisory_data1.url, + summary=self.advisory_data1.summary, + affected_packages=[pkg.to_dict() for pkg in self.advisory_data1.affected_packages], + references=[ref.to_dict() for ref in self.advisory_data1.references], + date_imported=date, + date_collected=date, + created_by="test_pipeline", + ) + adv1.aliases.add(alias) + adv2 = Advisory.objects.create( + unique_content_id="test_id2", + url=self.advisory_data4.url, + summary=self.advisory_data4.summary, + affected_packages=[pkg.to_dict() for pkg in self.advisory_data4.affected_packages], + references=[ref.to_dict() for ref in self.advisory_data4.references], + date_imported=date, + date_collected=date, + created_by="test_pipeline", + ) + adv2.aliases.add(alias) + + pipeline = ComputeToDo() + pipeline.execute() + + todos = AdvisoryToDo.objects.first() + self.assertEqual(1, AdvisoryToDo.objects.count()) + self.assertEqual("CONFLICTING_AFFECTED_AND_FIXED_BY_PACKAGES", todos.issue_type) + self.assertIn( + "CVE-0000-0000: pkg:npm/package1 with conflicting fixed version", todos.issue_detail + ) From a05b65e522e69d0ce7324d98a26b418a6e5736b1 Mon Sep 17 00:00:00 2001 From: Vara Rahul Rajana <123227543+rajanarahul93@users.noreply.github.com> Date: Sun, 15 Jun 2025 01:48:13 +0530 Subject: [PATCH 017/390] Add back navigation link to vulnerability details page (#1875) --- vulnerabilities/templates/vulnerability_package_details.html | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/vulnerabilities/templates/vulnerability_package_details.html b/vulnerabilities/templates/vulnerability_package_details.html index 21fb52192..5c1e5e800 100644 --- a/vulnerabilities/templates/vulnerability_package_details.html +++ b/vulnerabilities/templates/vulnerability_package_details.html @@ -20,6 +20,10 @@ {{ vulnerability.vulnerability_id }} +
+ + ← Back to Vulnerability {{ vulnerability.vulnerability_id }} +
From 550f72585b8b1213ca9605a1a46b769b66cc8acb Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Fri, 27 Jun 2025 22:51:17 +0530 Subject: [PATCH 018/390] Optimize creation of TODO and advisory TODO M2M relations Signed-off-by: Keshav Priyadarshi --- ...isorytodo_todorelatedadvisory_and_more.py} | 45 +++++- vulnerabilities/models.py | 32 +++-- .../pipelines/compute_advisory_todo.py | 134 +++++++++++++++--- .../pipelines/test_compute_advisory_todo.py | 24 ++-- 4 files changed, 187 insertions(+), 48 deletions(-) rename vulnerabilities/migrations/{0093_advisorytodo.py => 0093_advisorytodo_todorelatedadvisory_and_more.py} (72%) diff --git a/vulnerabilities/migrations/0093_advisorytodo.py b/vulnerabilities/migrations/0093_advisorytodo_todorelatedadvisory_and_more.py similarity index 72% rename from vulnerabilities/migrations/0093_advisorytodo.py rename to vulnerabilities/migrations/0093_advisorytodo_todorelatedadvisory_and_more.py index f380d02c5..b81a8bb95 100644 --- a/vulnerabilities/migrations/0093_advisorytodo.py +++ b/vulnerabilities/migrations/0093_advisorytodo_todorelatedadvisory_and_more.py @@ -1,6 +1,7 @@ -# Generated by Django 4.2.20 on 2025-06-03 18:13 +# Generated by Django 4.2.22 on 2025-06-27 15:59 from django.db import migrations, models +import django.db.models.deletion class Migration(migrations.Migration): @@ -90,17 +91,47 @@ class Migration(migrations.Migration): blank=True, help_text="Additional detail on how this TODO was resolved." ), ), + ], + ), + migrations.CreateModel( + name="ToDoRelatedAdvisory", + fields=[ ( - "advisories", - models.ManyToManyField( - help_text="Advisory/ies where this TODO is applicable.", - related_name="advisory_todos", - to="vulnerabilities.advisory", + "id", + models.AutoField( + auto_created=True, primary_key=True, serialize=False, verbose_name="ID" + ), + ), + ( + "advisory", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, to="vulnerabilities.advisory" + ), + ), + ( + "todo", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + to="vulnerabilities.advisorytodo", ), ), ], options={ - "unique_together": {("related_advisories_id", "issue_type")}, + "unique_together": {("todo", "advisory")}, }, ), + migrations.AddField( + model_name="advisorytodo", + name="advisories", + field=models.ManyToManyField( + help_text="Advisory/ies where this TODO is applicable.", + related_name="advisory_todos", + through="vulnerabilities.ToDoRelatedAdvisory", + to="vulnerabilities.advisory", + ), + ), + migrations.AlterUniqueTogether( + name="advisorytodo", + unique_together={("related_advisories_id", "issue_type")}, + ), ] diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index 2da3cc665..c4ccbd1fa 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -2286,16 +2286,19 @@ class AdvisoryToDo(models.Model): # to avoid creating duplicate issue for same set of advisories, related_advisories_id = models.CharField( max_length=40, - blank=False, - null=False, help_text="SHA1 digest of the unique_content_id field of the applicable advisories.", ) + advisories = models.ManyToManyField( + Advisory, + through="ToDoRelatedAdvisory", + related_name="advisory_todos", + help_text="Advisory/ies where this TODO is applicable.", + ) + issue_type = models.CharField( max_length=50, choices=ISSUE_TYPE_CHOICES, - blank=False, - null=False, db_index=True, help_text="Select the issue that needs to be addressed from the available options.", ) @@ -2305,12 +2308,6 @@ class AdvisoryToDo(models.Model): help_text="Additional details about the issue.", ) - advisories = models.ManyToManyField( - Advisory, - related_name="advisory_todos", - help_text="Advisory/ies where this TODO is applicable.", - ) - created_at = models.DateTimeField( auto_now_add=True, help_text="Timestamp indicating when this TODO was created.", @@ -2339,3 +2336,18 @@ class Meta: def save(self, *args, **kwargs): self.full_clean() return super().save(*args, **kwargs) + + +class ToDoRelatedAdvisory(models.Model): + todo = models.ForeignKey( + AdvisoryToDo, + on_delete=models.CASCADE, + ) + + advisory = models.ForeignKey( + Advisory, + on_delete=models.CASCADE, + ) + + class Meta: + unique_together = ("todo", "advisory") diff --git a/vulnerabilities/pipelines/compute_advisory_todo.py b/vulnerabilities/pipelines/compute_advisory_todo.py index 7087bfa45..16dd1bace 100644 --- a/vulnerabilities/pipelines/compute_advisory_todo.py +++ b/vulnerabilities/pipelines/compute_advisory_todo.py @@ -11,10 +11,12 @@ import json from aboutcode.pipeline import LoopProgress +from django.utils import timezone from vulnerabilities.models import Advisory from vulnerabilities.models import AdvisoryToDo from vulnerabilities.models import Alias +from vulnerabilities.models import ToDoRelatedAdvisory from vulnerabilities.pipelines import VulnerableCodePipeline from vulnerabilities.pipes.advisory import advisories_checksum @@ -32,8 +34,14 @@ def steps(cls): ) def compute_individual_advisory_todo(self): - advisories = Advisory.objects.all().iterator(chunk_size=5000) - advisories_count = Advisory.objects.all().count() + """Create ToDos for missing summary, affected and fixed packages.""" + + advisories = Advisory.objects.all() + advisories_count = advisories.count() + advisory_relation_to_create = {} + todo_to_create = [] + new_todos_count = 0 + batch_size = 5000 self.log( f"Checking missing summary, affected and fixed packages in {advisories_count} Advisories" @@ -43,23 +51,48 @@ def compute_individual_advisory_todo(self): logger=self.log, progress_step=1, ) - for advisory in progress.iter(advisories): + for advisory in progress.iter(advisories.iterator(chunk_size=5000)): advisory_todo_id = advisories_checksum(advisories=advisory) check_missing_summary( advisory=advisory, todo_id=advisory_todo_id, - logger=self.log, + todo_to_create=todo_to_create, + advisory_relation_to_create=advisory_relation_to_create, ) check_missing_affected_and_fixed_by_packages( advisory=advisory, todo_id=advisory_todo_id, - logger=self.log, + todo_to_create=todo_to_create, + advisory_relation_to_create=advisory_relation_to_create, ) + if len(todo_to_create) > batch_size: + new_todos_count += bulk_create_with_m2m( + todos=todo_to_create, + advisories=advisory_relation_to_create, + logger=self.log, + ) + advisory_relation_to_create.clear() + todo_to_create.clear() + + new_todos_count += bulk_create_with_m2m( + todos=todo_to_create, + advisories=advisory_relation_to_create, + logger=self.log, + ) + def detect_conflicting_advisories(self): + """ + Create ToDos for advisories with conflicting opinions on fixed and affected + package versions for a vulnerability. + """ aliases = Alias.objects.filter(alias__istartswith="cve") aliases_count = aliases.count() + advisory_relation_to_create = {} + todo_to_create = [] + new_todos_count = 0 + batch_size = 5000 self.log(f"Cross validating advisory affected and fixed package for {aliases_count} CVEs") @@ -73,24 +106,50 @@ def detect_conflicting_advisories(self): advisory_todos__issue_type="MISSING_AFFECTED_AND_FIXED_BY_PACKAGES" ).distinct() - check_conflicting_affected_and_fixed_by_packages( + check_conflicting_affected_and_fixed_by_packages_for_alias( advisories=advisories, cve=alias, - logger=self.log, + todo_to_create=todo_to_create, + advisory_relation_to_create=advisory_relation_to_create, ) + if len(todo_to_create) > batch_size: + new_todos_count += bulk_create_with_m2m( + todos=todo_to_create, + advisories=advisory_relation_to_create, + logger=self.log, + ) + advisory_relation_to_create.clear() + todo_to_create.clear() + + new_todos_count += bulk_create_with_m2m( + todos=todo_to_create, + advisories=advisory_relation_to_create, + logger=self.log, + ) -def check_missing_summary(advisory, todo_id, logger=None): + +def check_missing_summary( + advisory, + todo_id, + todo_to_create, + advisory_relation_to_create, +): if not advisory.summary: - todo, created = AdvisoryToDo.objects.get_or_create( + todo = AdvisoryToDo( related_advisories_id=todo_id, issue_type="MISSING_SUMMARY", ) - if created: - todo.advisories.add(advisory) + advisory_relation_to_create[todo_id] = [advisory] + todo_to_create.append(todo) -def check_missing_affected_and_fixed_by_packages(advisory, todo_id, logger=None): +def check_missing_affected_and_fixed_by_packages( + advisory, + todo_id, + todo_to_create, + advisory_relation_to_create, +): """ Check for missing affected or fixed-by packages in the advisory and create appropriate AdvisoryToDo. @@ -121,15 +180,21 @@ def check_missing_affected_and_fixed_by_packages(advisory, todo_id, logger=None) issue_type = "MISSING_AFFECTED_PACKAGE" elif not has_fixed_package: issue_type = "MISSING_FIXED_BY_PACKAGE" - todo, created = AdvisoryToDo.objects.get_or_create( + + todo = AdvisoryToDo( related_advisories_id=todo_id, issue_type=issue_type, ) - if created: - todo.advisories.add(advisory) + todo_to_create.append(todo) + advisory_relation_to_create[todo_id] = [advisory] -def check_conflicting_affected_and_fixed_by_packages(advisories, cve, logger=None): +def check_conflicting_affected_and_fixed_by_packages_for_alias( + advisories, + cve, + todo_to_create, + advisory_relation_to_create, +): """ Add appropriate AdvisoryToDo for conflicting affected/fixed packages. @@ -222,15 +287,13 @@ def check_conflicting_affected_and_fixed_by_packages(advisories, cve, logger=Non messages.append("Comparison matrix:") messages.append(json.dumps(matrix, indent=2, default=list)) todo_id = advisories_checksum(advisories) - todo, created = AdvisoryToDo.objects.get_or_create( + todo = AdvisoryToDo( related_advisories_id=todo_id, issue_type=issue_type, - defaults={ - "issue_detail": "\n".join(messages), - }, + issue_detail="\n".join(messages), ) - if created: - todo.advisories.add(*advisories) + todo_to_create.append(todo) + advisory_relation_to_create[todo_id] = list(advisories) def initialize_sub_matrix(matrix, affected_purl, advisory): @@ -245,3 +308,30 @@ def initialize_sub_matrix(matrix, affected_purl, advisory): matrix[affected_purl]["affected"][advisory_id] = set() if advisory not in matrix[affected_purl]["fixed"]: matrix[affected_purl]["fixed"][advisory_id] = set() + + +def bulk_create_with_m2m(todos, advisories, logger): + """Bulk create ToDos and also bulk create M2M ToDo Advisory relationships.""" + if not todos: + return 0 + + start_time = timezone.now() + try: + AdvisoryToDo.objects.bulk_create(objs=todos, ignore_conflicts=True) + except Exception as e: + logger(f"Error creating AdvisoryToDo: {e}") + + new_todos = AdvisoryToDo.objects.filter(created_at__gte=start_time) + + relations = [ + ToDoRelatedAdvisory(todo=todo, advisory=advisory) + for todo in new_todos + for advisory in advisories[todo.related_advisories_id] + ] + + try: + ToDoRelatedAdvisory.objects.bulk_create(relations) + except Exception as e: + logger(f"Error creating Advisory ToDo relations: {e}") + + return new_todos.count() diff --git a/vulnerabilities/tests/pipelines/test_compute_advisory_todo.py b/vulnerabilities/tests/pipelines/test_compute_advisory_todo.py index 081d38b07..d382ff0b8 100644 --- a/vulnerabilities/tests/pipelines/test_compute_advisory_todo.py +++ b/vulnerabilities/tests/pipelines/test_compute_advisory_todo.py @@ -88,9 +88,10 @@ def test_advisory_todo_missing_summary(self): pipeline = ComputeToDo() pipeline.execute() - todos = AdvisoryToDo.objects.first() + todo = AdvisoryToDo.objects.first() self.assertEqual(1, AdvisoryToDo.objects.count()) - self.assertEqual("MISSING_SUMMARY", todos.issue_type) + self.assertEqual("MISSING_SUMMARY", todo.issue_type) + self.assertEqual(1, todo.advisories.count()) def test_advisory_todo_missing_fixed(self): date = datetime.now() @@ -107,9 +108,10 @@ def test_advisory_todo_missing_fixed(self): pipeline = ComputeToDo() pipeline.execute() - todos = AdvisoryToDo.objects.first() + todo = AdvisoryToDo.objects.first() self.assertEqual(1, AdvisoryToDo.objects.count()) - self.assertEqual("MISSING_FIXED_BY_PACKAGE", todos.issue_type) + self.assertEqual("MISSING_FIXED_BY_PACKAGE", todo.issue_type) + self.assertEqual(1, todo.advisories.count()) def test_advisory_todo_missing_affected(self): date = datetime.now() @@ -126,9 +128,10 @@ def test_advisory_todo_missing_affected(self): pipeline = ComputeToDo() pipeline.execute() - todos = AdvisoryToDo.objects.first() + todo = AdvisoryToDo.objects.first() self.assertEqual(1, AdvisoryToDo.objects.count()) - self.assertEqual("MISSING_AFFECTED_PACKAGE", todos.issue_type) + self.assertEqual("MISSING_AFFECTED_PACKAGE", todo.issue_type) + self.assertEqual(1, todo.advisories.count()) def test_advisory_todo_conflicting_fixed_affected(self): alias = Alias.objects.create(alias="CVE-0000-0000") @@ -156,12 +159,15 @@ def test_advisory_todo_conflicting_fixed_affected(self): ) adv2.aliases.add(alias) + self.assertEqual(0, AdvisoryToDo.objects.count()) pipeline = ComputeToDo() pipeline.execute() - todos = AdvisoryToDo.objects.first() + todo = AdvisoryToDo.objects.first() self.assertEqual(1, AdvisoryToDo.objects.count()) - self.assertEqual("CONFLICTING_AFFECTED_AND_FIXED_BY_PACKAGES", todos.issue_type) + self.assertEqual("CONFLICTING_AFFECTED_AND_FIXED_BY_PACKAGES", todo.issue_type) self.assertIn( - "CVE-0000-0000: pkg:npm/package1 with conflicting fixed version", todos.issue_detail + "CVE-0000-0000: pkg:npm/package1 with conflicting fixed version", todo.issue_detail ) + self.assertEqual(2, todo.advisories.count()) + self.assertEqual(todo, adv2.advisory_todos.first()) From f1479073712a5ca57c089d796331a949b35d595f Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Mon, 30 Jun 2025 15:44:36 +0530 Subject: [PATCH 019/390] Store issue details as JSON dump Signed-off-by: Keshav Priyadarshi --- .../pipelines/compute_advisory_todo.py | 25 +++++++++++++------ 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/vulnerabilities/pipelines/compute_advisory_todo.py b/vulnerabilities/pipelines/compute_advisory_todo.py index 16dd1bace..8c4d1253d 100644 --- a/vulnerabilities/pipelines/compute_advisory_todo.py +++ b/vulnerabilities/pipelines/compute_advisory_todo.py @@ -82,6 +82,10 @@ def compute_individual_advisory_todo(self): logger=self.log, ) + self.log( + f"Successfully created {new_todos_count} ToDos for missing summary, affected and fixed packages" + ) + def detect_conflicting_advisories(self): """ Create ToDos for advisories with conflicting opinions on fixed and affected @@ -128,6 +132,10 @@ def detect_conflicting_advisories(self): logger=self.log, ) + self.log( + f"Successfully created {new_todos_count} ToDos for conflicting affected and fixed packages" + ) + def check_missing_summary( advisory, @@ -266,14 +274,14 @@ def check_conflicting_affected_and_fixed_by_packages_for_alias( if len(unique_set_of_affected_vers) > 1: has_conflicting_affected_packages = True - conflicting_affected = json.dumps(unique_set_of_affected_vers, default=list) messages.append( - f"{cve}: {purl} with conflicting affected versions {conflicting_affected}" + f"{cve}: {purl} with conflicting affected versions {unique_set_of_affected_vers}" ) if len(unique_set_of_fixed_versions) > 1: has_conflicting_fixed_package = True - conflicting_fixed = json.dumps(unique_set_of_fixed_versions, default=list) - messages.append(f"{cve}: {purl} with conflicting fixed version {conflicting_fixed}") + messages.append( + f"{cve}: {purl} with conflicting fixed version {unique_set_of_fixed_versions}" + ) if not has_conflicting_affected_packages and not has_conflicting_fixed_package: return @@ -284,13 +292,16 @@ def check_conflicting_affected_and_fixed_by_packages_for_alias( elif not has_conflicting_affected_packages: issue_type = "CONFLICTING_FIXED_BY_PACKAGES" - messages.append("Comparison matrix:") - messages.append(json.dumps(matrix, indent=2, default=list)) + issue_detail = { + "Conflict summary": messages, + "Conflict matrix": matrix, + } + todo_id = advisories_checksum(advisories) todo = AdvisoryToDo( related_advisories_id=todo_id, issue_type=issue_type, - issue_detail="\n".join(messages), + issue_detail=json.dumps(issue_detail, default=list), ) todo_to_create.append(todo) advisory_relation_to_create[todo_id] = list(advisories) From 8801c905193db2b36eb774b25bed4a6fa598b852 Mon Sep 17 00:00:00 2001 From: Tushar Goel <34160672+TG1999@users.noreply.github.com> Date: Tue, 1 Jul 2025 16:58:38 +0530 Subject: [PATCH 020/390] Add advisory v2 (#1866) Reference: VCIO-next: Design new Advisory -> Package -> Vulnerability models relationhips #1393 VCIO-next: Advisory model migration Batch 1 #1877 Add V2Advisory Model. V2Advisory Model should have relationships between other models like aliases, affected packages, references, severities and weaknesses. V2AdvisoyModel will have advisory ID. advisory ID will be a natural ID for example Redhat importer will have RHSA, NVD will have CVE, when there is no ID we will create one. For example NPM * Add AdvisoryV2 models Signed-off-by: Tushar Goel * Do formatting changes Signed-off-by: Tushar Goel * Add migrations Signed-off-by: Tushar Goel * Add model changes and support new advisory ingestion Signed-off-by: Tushar Goel * Add V2Pipelines Signed-off-by: Tushar Goel * Revert alpine linux importer Signed-off-by: Tushar Goel * Fix tests Signed-off-by: Tushar Goel * Refactor compute content ID Signed-off-by: Tushar Goel * Formatting changes Signed-off-by: Tushar Goel * Fix errors in compute content ID Signed-off-by: Tushar Goel * Add github pipeline Signed-off-by: Tushar Goel * Add V2 pipelines Signed-off-by: Tushar Goel * Rename pipelines Signed-off-by: Tushar Goel * Add V2 importer pipelines Signed-off-by: Tushar Goel * Rename pipelines Signed-off-by: Tushar Goel * Reorder importers in registry Signed-off-by: Tushar Goel * Fix tests Signed-off-by: Tushar Goel * Fix tests Signed-off-by: Tushar Goel * Fix tests Signed-off-by: Tushar Goel * Add tests for apache HTTPD importer pipeline Signed-off-by: Tushar Goel * Add tests for npm importer pipeline Signed-off-by: Tushar Goel * Add tests for github importer pipeline Signed-off-by: Tushar Goel * Add tests for pysec importer Signed-off-by: Tushar Goel * Add license header files Signed-off-by: Tushar Goel * Add tests for Pypa importer Signed-off-by: Tushar Goel * Add tests for vulnrichment importer pipeline v2 Signed-off-by: Tushar Goel * Add UI for V2 Signed-off-by: Tushar Goel * Fix tests Signed-off-by: Tushar Goel * Fix tests Signed-off-by: Tushar Goel * Fix tests Signed-off-by: Tushar Goel * Add Advisory Detail View Signed-off-by: Tushar Goel * Fix risk score pipeline Signed-off-by: Tushar Goel * Fix tests Signed-off-by: Tushar Goel * Change API design Signed-off-by: Tushar Goel * Add tests for gitlab importer Signed-off-by: Tushar Goel * Test postgresql importer Signed-off-by: Tushar Goel * Add tests for elixir security importer Signed-off-by: Tushar Goel * Add tests for models Signed-off-by: Tushar Goel * Merge changes Signed-off-by: Tushar Goel * Add tests for compute package risk V2 Signed-off-by: Tushar Goel * Add tests for compute package rank V2 Signed-off-by: Tushar Goel * Fix tests Signed-off-by: Tushar Goel * Add tests for V2 Importer Pipeline Signed-off-by: Tushar Goel * Add tests for exploits enhancement pipeline Signed-off-by: Tushar Goel --------- Signed-off-by: Tushar Goel --- vulnerabilities/api_v2.py | 382 +++++++ vulnerabilities/forms.py | 8 + vulnerabilities/importer.py | 120 +++ vulnerabilities/importers/__init__.py | 97 +- vulnerabilities/importers/curl.py | 2 +- vulnerabilities/importers/osv.py | 85 ++ vulnerabilities/improvers/__init__.py | 87 +- vulnerabilities/management/commands/import.py | 5 +- ...soryreference_advisoryseverity_and_more.py | 635 ++++++++++++ vulnerabilities/models.py | 937 +++++++++++++++++- vulnerabilities/pipelines/__init__.py | 285 +++++- .../v2_importers/apache_httpd_importer.py | 341 +++++++ .../v2_importers/elixir_security_importer.py | 124 +++ .../pipelines/v2_importers/github_importer.py | 393 ++++++++ .../pipelines/v2_importers/gitlab_importer.py | 329 ++++++ .../pipelines/v2_importers/npm_importer.py | 178 ++++ .../pipelines/v2_importers/nvd_importer.py | 338 +++++++ .../v2_importers/postgresql_importer.py | 163 +++ .../pipelines/v2_importers/pypa_importer.py | 74 ++ .../pipelines/v2_importers/pysec_importer.py | 67 ++ .../v2_importers/vulnrichment_importer.py | 318 ++++++ .../pipelines/v2_improvers/collect_commits.py | 252 +++++ .../v2_improvers/compute_package_risk.py | 143 +++ .../computer_package_version_rank.py | 93 ++ .../v2_improvers/enhance_with_exploitdb.py | 169 ++++ .../v2_improvers/enhance_with_kev.py | 103 ++ .../v2_improvers/enhance_with_metasploit.py | 126 +++ .../v2_improvers/flag_ghost_packages.py | 104 ++ vulnerabilities/pipes/advisory.py | 118 +++ vulnerabilities/risk.py | 18 + .../templates/advisory_detail.html | 614 ++++++++++++ .../templates/advisory_package_details.html | 88 ++ vulnerabilities/templates/index_v2.html | 33 + .../templates/package_details_v2.html | 365 +++++++ .../templates/package_search_box_v2.html | 48 + vulnerabilities/templates/packages_v2.html | 84 ++ .../test_apache_httpd_importer_pipeline_v2.py | 161 +++ .../pipelines/test_collect_commits_v2.py | 131 +++ .../pipelines/test_compute_package_risk_v2.py | 69 ++ .../pipelines/test_compute_version_rank_v2.py | 70 ++ .../test_elixir_security_v2_importer.py | 108 ++ .../test_enhance_with_exploitdb_v2.py | 56 ++ .../pipelines/test_enhance_with_kev_v2.py | 57 ++ .../test_enhance_with_metasploit_v2.py | 56 ++ .../pipelines/test_flag_ghost_packages_v2.py | 111 +++ .../pipelines/test_github_importer_v2.py | 174 ++++ .../pipelines/test_gitlab_v2_importer.py | 153 +++ .../test_npm_importer_pipeline_v2.py | 128 +++ .../pipelines/test_postgresql_v2_importer.py | 154 +++ .../test_pypa_v2_importer_pipeline.py | 173 ++++ .../tests/pipelines/test_pysec_v2_importer.py | 137 +++ ...est_vulnerablecode_importer_v2_pipeline.py | 180 ++++ .../test_vulnrichment_v2_importer.py | 205 ++++ vulnerabilities/tests/pipes/test_advisory.py | 91 ++ vulnerabilities/utils.py | 84 +- vulnerabilities/views.py | 267 ++++- vulnerablecode/urls.py | 34 + 57 files changed, 9799 insertions(+), 126 deletions(-) create mode 100644 vulnerabilities/migrations/0094_advisoryalias_advisoryreference_advisoryseverity_and_more.py create mode 100644 vulnerabilities/pipelines/v2_importers/apache_httpd_importer.py create mode 100644 vulnerabilities/pipelines/v2_importers/elixir_security_importer.py create mode 100644 vulnerabilities/pipelines/v2_importers/github_importer.py create mode 100644 vulnerabilities/pipelines/v2_importers/gitlab_importer.py create mode 100644 vulnerabilities/pipelines/v2_importers/npm_importer.py create mode 100644 vulnerabilities/pipelines/v2_importers/nvd_importer.py create mode 100644 vulnerabilities/pipelines/v2_importers/postgresql_importer.py create mode 100644 vulnerabilities/pipelines/v2_importers/pypa_importer.py create mode 100644 vulnerabilities/pipelines/v2_importers/pysec_importer.py create mode 100644 vulnerabilities/pipelines/v2_importers/vulnrichment_importer.py create mode 100644 vulnerabilities/pipelines/v2_improvers/collect_commits.py create mode 100644 vulnerabilities/pipelines/v2_improvers/compute_package_risk.py create mode 100644 vulnerabilities/pipelines/v2_improvers/computer_package_version_rank.py create mode 100644 vulnerabilities/pipelines/v2_improvers/enhance_with_exploitdb.py create mode 100644 vulnerabilities/pipelines/v2_improvers/enhance_with_kev.py create mode 100644 vulnerabilities/pipelines/v2_improvers/enhance_with_metasploit.py create mode 100644 vulnerabilities/pipelines/v2_improvers/flag_ghost_packages.py create mode 100644 vulnerabilities/templates/advisory_detail.html create mode 100644 vulnerabilities/templates/advisory_package_details.html create mode 100644 vulnerabilities/templates/index_v2.html create mode 100644 vulnerabilities/templates/package_details_v2.html create mode 100644 vulnerabilities/templates/package_search_box_v2.html create mode 100644 vulnerabilities/templates/packages_v2.html create mode 100644 vulnerabilities/tests/pipelines/test_apache_httpd_importer_pipeline_v2.py create mode 100644 vulnerabilities/tests/pipelines/test_collect_commits_v2.py create mode 100644 vulnerabilities/tests/pipelines/test_compute_package_risk_v2.py create mode 100644 vulnerabilities/tests/pipelines/test_compute_version_rank_v2.py create mode 100644 vulnerabilities/tests/pipelines/test_elixir_security_v2_importer.py create mode 100644 vulnerabilities/tests/pipelines/test_enhance_with_exploitdb_v2.py create mode 100644 vulnerabilities/tests/pipelines/test_enhance_with_kev_v2.py create mode 100644 vulnerabilities/tests/pipelines/test_enhance_with_metasploit_v2.py create mode 100644 vulnerabilities/tests/pipelines/test_flag_ghost_packages_v2.py create mode 100644 vulnerabilities/tests/pipelines/test_github_importer_v2.py create mode 100644 vulnerabilities/tests/pipelines/test_gitlab_v2_importer.py create mode 100644 vulnerabilities/tests/pipelines/test_npm_importer_pipeline_v2.py create mode 100644 vulnerabilities/tests/pipelines/test_postgresql_v2_importer.py create mode 100644 vulnerabilities/tests/pipelines/test_pypa_v2_importer_pipeline.py create mode 100644 vulnerabilities/tests/pipelines/test_pysec_v2_importer.py create mode 100644 vulnerabilities/tests/pipelines/test_vulnerablecode_importer_v2_pipeline.py create mode 100644 vulnerabilities/tests/pipelines/test_vulnrichment_v2_importer.py diff --git a/vulnerabilities/api_v2.py b/vulnerabilities/api_v2.py index 4c2562216..4915dda63 100644 --- a/vulnerabilities/api_v2.py +++ b/vulnerabilities/api_v2.py @@ -24,8 +24,14 @@ from rest_framework.response import Response from rest_framework.reverse import reverse +from vulnerabilities.models import AdvisoryReference +from vulnerabilities.models import AdvisorySeverity +from vulnerabilities.models import AdvisoryV2 +from vulnerabilities.models import AdvisoryWeakness from vulnerabilities.models import CodeFix +from vulnerabilities.models import CodeFixV2 from vulnerabilities.models import Package +from vulnerabilities.models import PackageV2 from vulnerabilities.models import PipelineRun from vulnerabilities.models import PipelineSchedule from vulnerabilities.models import Vulnerability @@ -44,6 +50,16 @@ class Meta: fields = ["cwe_id", "name", "description"] +class AdvisoryWeaknessSerializer(serializers.ModelSerializer): + cwe_id = serializers.CharField() + name = serializers.CharField() + description = serializers.CharField() + + class Meta: + model = AdvisoryWeakness + fields = ["cwe_id", "name", "description"] + + class VulnerabilityReferenceV2Serializer(serializers.ModelSerializer): url = serializers.CharField() reference_type = serializers.CharField() @@ -54,6 +70,29 @@ class Meta: fields = ["url", "reference_type", "reference_id"] +class AdvisoryReferenceSerializer(serializers.ModelSerializer): + url = serializers.CharField() + reference_type = serializers.CharField() + reference_id = serializers.CharField() + + class Meta: + model = AdvisoryReference + fields = ["url", "reference_type", "reference_id"] + + +class AdvisorySeveritySerializer(serializers.ModelSerializer): + class Meta: + model = AdvisorySeverity + fields = ["url", "value", "scoring_system", "scoring_elements", "published_at"] + + def to_representation(self, instance): + data = super().to_representation(instance) + published_at = data.get("published_at", None) + if not published_at: + data.pop("published_at") + return data + + class VulnerabilitySeverityV2Serializer(serializers.ModelSerializer): class Meta: model = VulnerabilitySeverity @@ -94,6 +133,32 @@ def get_aliases(self, obj): return [alias.alias for alias in obj.aliases.all()] +class AdvisoryV2Serializer(serializers.ModelSerializer): + aliases = serializers.SerializerMethodField() + weaknesses = AdvisoryWeaknessSerializer(many=True) + references = AdvisoryReferenceSerializer(many=True) + severities = AdvisorySeveritySerializer(many=True) + advisory_id = serializers.CharField(source="avid", read_only=True) + + class Meta: + model = AdvisoryV2 + fields = [ + "advisory_id", + "url", + "aliases", + "summary", + "severities", + "weaknesses", + "references", + "exploitability", + "weighted_severity", + "risk_score", + ] + + def get_aliases(self, obj): + return [alias.alias for alias in obj.aliases.all()] + + class VulnerabilityListSerializer(serializers.ModelSerializer): url = serializers.SerializerMethodField() @@ -233,6 +298,57 @@ def get_fixing_vulnerabilities(self, obj): return [vuln.vulnerability_id for vuln in obj.fixing_vulnerabilities.all()] +class AdvisoryPackageV2Serializer(serializers.ModelSerializer): + purl = serializers.CharField(source="package_url") + risk_score = serializers.FloatField(read_only=True) + affected_by_vulnerabilities = serializers.SerializerMethodField() + fixing_vulnerabilities = serializers.SerializerMethodField() + next_non_vulnerable_version = serializers.CharField(read_only=True) + latest_non_vulnerable_version = serializers.CharField(read_only=True) + + class Meta: + model = Package + fields = [ + "purl", + "affected_by_vulnerabilities", + "fixing_vulnerabilities", + "next_non_vulnerable_version", + "latest_non_vulnerable_version", + "risk_score", + ] + + def get_affected_by_vulnerabilities(self, obj): + """ + Return a dictionary with vulnerabilities as keys and their details, including fixed_by_packages. + """ + result = {} + request = self.context.get("request") + for adv in getattr(obj, "prefetched_affected_advisories", []): + fixed_by_package = adv.fixed_by_packages.first() + purl = None + if fixed_by_package: + purl = fixed_by_package.package_url + # Get code fixed for a vulnerability + code_fixes = CodeFixV2.objects.filter(advisory=adv).distinct() + code_fix_urls = [ + reverse("codefix-detail", args=[code_fix.id], request=request) + for code_fix in code_fixes + ] + + result[adv.avid] = { + "advisory_id": adv.avid, + "fixed_by_packages": purl, + "code_fixes": code_fix_urls, + } + return result + + def get_fixing_vulnerabilities(self, obj): + # Ghost package should not fix any vulnerability. + if obj.is_ghost: + return [] + return [adv.advisory_id for adv in obj.fixing_advisories.all()] + + class PackageurlListSerializer(serializers.Serializer): purls = serializers.ListField( child=serializers.CharField(), @@ -261,6 +377,12 @@ class PackageV2FilterSet(filters.FilterSet): purl = filters.CharFilter(field_name="package_url") +class AdvisoryPackageV2FilterSet(filters.FilterSet): + affected_by_vulnerability = filters.CharFilter(field_name="affected_by_advisory__advisory_id") + fixing_vulnerability = filters.CharFilter(field_name="fixing_advisories__advisory_id") + purl = filters.CharFilter(field_name="package_url") + + class PackageV2ViewSet(viewsets.ReadOnlyModelViewSet): queryset = Package.objects.all().prefetch_related( Prefetch( @@ -754,3 +876,263 @@ def get_permissions(self): if self.action not in ["list", "retrieve"]: return [IsAdminWithSessionAuth()] return super().get_permissions() + + +class AdvisoriesPackageV2ViewSet(viewsets.ReadOnlyModelViewSet): + queryset = PackageV2.objects.all().prefetch_related( + Prefetch( + "affected_by_advisories", + queryset=AdvisoryV2.objects.prefetch_related("fixed_by_packages"), + to_attr="prefetched_affected_advisories", + ) + ) + serializer_class = AdvisoryPackageV2Serializer + filter_backends = (filters.DjangoFilterBackend,) + filterset_class = AdvisoryPackageV2FilterSet + + def get_queryset(self): + queryset = super().get_queryset() + package_purls = self.request.query_params.getlist("purl") + affected_by_advisory = self.request.query_params.get("affected_by_advisory") + fixing_advisory = self.request.query_params.get("fixing_advisory") + if package_purls: + queryset = queryset.filter(package_url__in=package_purls) + if affected_by_advisory: + queryset = queryset.filter(affected_by_advisories__advisory_id=affected_by_advisory) + if fixing_advisory: + queryset = queryset.filter(fixing_advisories__advisory=fixing_advisory) + return queryset.with_is_vulnerable() + + def list(self, request, *args, **kwargs): + queryset = self.get_queryset() + # Apply pagination + page = self.paginate_queryset(queryset) + if page is not None: + # Collect only vulnerabilities for packages in the current page + advisories = set() + for package in page: + advisories.update(package.affected_by_advisories.all()) + advisories.update(package.fixing_advisories.all()) + + # Serialize the vulnerabilities with advisory_id and advisory label as keys + advisory_data = {f"{adv.avid}": AdvisoryV2Serializer(adv).data for adv in advisories} + + # Serialize the current page of packages + serializer = self.get_serializer(page, many=True) + data = serializer.data + print(data) + # Use 'self.get_paginated_response' to include pagination data + return self.get_paginated_response({"advisories": advisory_data, "packages": data}) + + # If pagination is not applied, collect vulnerabilities for all packages + advisories = set() + for package in queryset: + advisories.update(package.affected_by_vulnerabilities.all()) + advisories.update(package.fixing_vulnerabilities.all()) + + advisory_data = {f"{adv.avid}": AdvisoryV2Serializer(adv).data for adv in advisories} + + serializer = self.get_serializer(queryset, many=True) + data = serializer.data + return Response({"advisories": advisory_data, "packages": data}) + + @extend_schema( + request=PackageurlListSerializer, + responses={200: PackageV2Serializer(many=True)}, + ) + @action( + detail=False, + methods=["post"], + serializer_class=PackageurlListSerializer, + filter_backends=[], + pagination_class=None, + ) + def bulk_lookup(self, request): + """ + Return the response for exact PackageURLs requested for. + """ + serializer = self.serializer_class(data=request.data) + if not serializer.is_valid(): + return Response( + status=status.HTTP_400_BAD_REQUEST, + data={ + "error": serializer.errors, + "message": "A non-empty 'purls' list of PURLs is required.", + }, + ) + validated_data = serializer.validated_data + purls = validated_data.get("purls") + + # Fetch packages matching the provided purls + packages = PackageV2.objects.for_purls(purls).with_is_vulnerable() + + # Collect vulnerabilities associated with these packages + advisories = set() + for package in packages: + advisories.update(package.affected_by_advisories.all()) + advisories.update(package.fixing_advisories.all()) + + # Serialize vulnerabilities with vulnerability_id as keys + advisory_data = {adv.avid: AdvisoryV2Serializer(adv).data for adv in advisories} + + # Serialize packages + package_data = AdvisoryPackageV2Serializer( + packages, + many=True, + context={"request": request}, + ).data + + return Response( + { + "advisories": advisory_data, + "packages": package_data, + } + ) + + @extend_schema( + request=PackageBulkSearchRequestSerializer, + responses={200: PackageV2Serializer(many=True)}, + ) + @action( + detail=False, + methods=["post"], + serializer_class=PackageBulkSearchRequestSerializer, + filter_backends=[], + pagination_class=None, + ) + def bulk_search(self, request): + """ + Lookup for vulnerable packages using many Package URLs at once. + """ + serializer = self.serializer_class(data=request.data) + if not serializer.is_valid(): + return Response( + status=status.HTTP_400_BAD_REQUEST, + data={ + "error": serializer.errors, + "message": "A non-empty 'purls' list of PURLs is required.", + }, + ) + validated_data = serializer.validated_data + purls = validated_data.get("purls") + purl_only = validated_data.get("purl_only", False) + plain_purl = validated_data.get("plain_purl", False) + + if plain_purl: + purl_objects = [PackageURL.from_string(purl) for purl in purls] + plain_purl_objects = [ + PackageURL( + type=purl.type, + namespace=purl.namespace, + name=purl.name, + version=purl.version, + ) + for purl in purl_objects + ] + plain_purls = [str(purl) for purl in plain_purl_objects] + + query = ( + PackageV2.objects.filter(plain_package_url__in=plain_purls) + .order_by("plain_package_url") + .distinct("plain_package_url") + .with_is_vulnerable() + ) + + packages = query + + # Collect vulnerabilities associated with these packages + advisories = set() + for package in packages: + advisories.update(package.affected_by_vulnerabilities.all()) + advisories.update(package.fixing_vulnerabilities.all()) + + advisory_data = {adv.avid: VulnerabilityV2Serializer(adv).data for adv in advisories} + + if not purl_only: + package_data = AdvisoryPackageV2Serializer( + packages, many=True, context={"request": request} + ).data + return Response( + { + "advisories": advisory_data, + "packages": package_data, + } + ) + + # Using order by and distinct because there will be + # many fully qualified purl for a single plain purl + vulnerable_purls = query.vulnerable().only("plain_package_url") + vulnerable_purls = [str(package.plain_package_url) for package in vulnerable_purls] + return Response(data=vulnerable_purls) + + query = PackageV2.objects.filter(package_url__in=purls).distinct().with_is_vulnerable() + packages = query + + # Collect vulnerabilities associated with these packages + advisories = set() + for package in packages: + advisories.update(package.affected_by_vulnerabilities.all()) + advisories.update(package.fixing_vulnerabilities.all()) + + advisory_data = {adv.advisory_id: AdvisoryV2Serializer(adv).data for adv in advisories} + + if not purl_only: + package_data = AdvisoryPackageV2Serializer( + packages, many=True, context={"request": request} + ).data + return Response( + { + "advisories": advisory_data, + "packages": package_data, + } + ) + + vulnerable_purls = query.vulnerable().only("package_url") + vulnerable_purls = [str(package.package_url) for package in vulnerable_purls] + return Response(data=vulnerable_purls) + + @action(detail=False, methods=["get"]) + def all(self, request): + """ + Return a list of Package URLs of vulnerable packages. + """ + vulnerable_purls = ( + PackageV2.objects.vulnerable() + .only("package_url") + .order_by("package_url") + .distinct() + .values_list("package_url", flat=True) + ) + return Response(vulnerable_purls) + + @extend_schema( + request=LookupRequestSerializer, + responses={200: PackageV2Serializer(many=True)}, + ) + @action( + detail=False, + methods=["post"], + serializer_class=LookupRequestSerializer, + filter_backends=[], + pagination_class=None, + ) + def lookup(self, request): + """ + Return the response for exact PackageURL requested for. + """ + serializer = self.serializer_class(data=request.data) + if not serializer.is_valid(): + return Response( + status=status.HTTP_400_BAD_REQUEST, + data={ + "error": serializer.errors, + "message": "A 'purl' is required.", + }, + ) + validated_data = serializer.validated_data + purl = validated_data.get("purl") + + qs = self.get_queryset().for_purls([purl]).with_is_vulnerable() + return Response( + AdvisoryPackageV2Serializer(qs, many=True, context={"request": request}).data + ) diff --git a/vulnerabilities/forms.py b/vulnerabilities/forms.py index 74a10340c..7d955ac37 100644 --- a/vulnerabilities/forms.py +++ b/vulnerabilities/forms.py @@ -36,6 +36,14 @@ class VulnerabilitySearchForm(forms.Form): ) +class AdvisorySearchForm(forms.Form): + + search = forms.CharField( + required=True, + widget=forms.TextInput(attrs={"placeholder": "Advisory id or alias such as CVE or GHSA"}), + ) + + class ApiUserCreationForm(forms.ModelForm): """ Support a simplified creation for API-only users directly from the UI. diff --git a/vulnerabilities/importer.py b/vulnerabilities/importer.py index 759ec9330..9cef5e0fa 100644 --- a/vulnerabilities/importer.py +++ b/vulnerabilities/importer.py @@ -55,6 +55,7 @@ class VulnerabilitySeverity: value: str scoring_elements: str = "" published_at: Optional[datetime.datetime] = None + url: Optional[str] = None def to_dict(self): data = { @@ -145,6 +146,54 @@ def from_url(cls, url): return cls(url=url) +@dataclasses.dataclass(eq=True) +@functools.total_ordering +class ReferenceV2: + reference_id: str = "" + reference_type: str = "" + url: str = "" + + def __post_init__(self): + if not self.url: + raise TypeError("Reference must have a url") + if self.reference_id and not isinstance(self.reference_id, str): + self.reference_id = str(self.reference_id) + + def __lt__(self, other): + if not isinstance(other, Reference): + return NotImplemented + return self._cmp_key() < other._cmp_key() + + # TODO: Add cache + def _cmp_key(self): + return (self.reference_id, self.reference_type, self.url) + + def to_dict(self): + """Return a normalized dictionary representation""" + return { + "reference_id": self.reference_id, + "reference_type": self.reference_type, + "url": self.url, + } + + @classmethod + def from_dict(cls, ref: dict): + return cls( + reference_id=str(ref["reference_id"]), + reference_type=ref.get("reference_type") or "", + url=ref["url"], + ) + + @classmethod + def from_url(cls, url): + reference_id = get_reference_id(url) + if "GHSA-" in reference_id.upper(): + return cls(reference_id=reference_id, url=url) + if is_cve(reference_id): + return cls(url=url, reference_id=reference_id.upper()) + return cls(url=url) + + class UnMergeablePackageError(Exception): """ Raised when a package cannot be merged with another one. @@ -302,10 +351,81 @@ class AdvisoryData: date_published must be aware datetime """ + advisory_id: str = "" aliases: List[str] = dataclasses.field(default_factory=list) summary: Optional[str] = "" affected_packages: List[AffectedPackage] = dataclasses.field(default_factory=list) references: List[Reference] = dataclasses.field(default_factory=list) + references_v2: List[ReferenceV2] = dataclasses.field(default_factory=list) + date_published: Optional[datetime.datetime] = None + weaknesses: List[int] = dataclasses.field(default_factory=list) + severities: List[VulnerabilitySeverity] = dataclasses.field(default_factory=list) + url: Optional[str] = None + + def __post_init__(self): + if self.date_published and not self.date_published.tzinfo: + logger.warning(f"AdvisoryData with no tzinfo: {self!r}") + if self.summary: + self.summary = self.clean_summary(self.summary) + + def clean_summary(self, summary): + # https://nvd.nist.gov/vuln/detail/CVE-2013-4314 + # https://github.com/cms-dev/cms/issues/888#issuecomment-516977572 + summary = summary.strip() + if summary: + summary = summary.replace("\x00", "\uFFFD") + return summary + + def to_dict(self): + return { + "aliases": self.aliases, + "summary": self.summary, + "affected_packages": [pkg.to_dict() for pkg in self.affected_packages], + "references": [ref.to_dict() for ref in self.references], + "date_published": self.date_published.isoformat() if self.date_published else None, + "weaknesses": self.weaknesses, + "url": self.url if self.url else "", + } + + @classmethod + def from_dict(cls, advisory_data): + date_published = advisory_data["date_published"] + transformed = { + "aliases": advisory_data["aliases"], + "summary": advisory_data["summary"], + "affected_packages": [ + AffectedPackage.from_dict(pkg) + for pkg in advisory_data["affected_packages"] + if pkg is not None + ], + "references": [Reference.from_dict(ref) for ref in advisory_data["references"]], + "date_published": datetime.datetime.fromisoformat(date_published) + if date_published + else None, + "weaknesses": advisory_data["weaknesses"], + "url": advisory_data.get("url") or None, + } + return cls(**transformed) + + +@dataclasses.dataclass(order=True) +class AdvisoryDataV2: + """ + This data class expresses the contract between data sources and the import runner. + + If a vulnerability_id is present then: + summary or affected_packages or references must be present + otherwise + either affected_package or references should be present + + date_published must be aware datetime + """ + + advisory_id: str = "" + aliases: List[str] = dataclasses.field(default_factory=list) + summary: Optional[str] = "" + affected_packages: List[AffectedPackage] = dataclasses.field(default_factory=list) + references: List[ReferenceV2] = dataclasses.field(default_factory=list) date_published: Optional[datetime.datetime] = None weaknesses: List[int] = dataclasses.field(default_factory=list) url: Optional[str] = None diff --git a/vulnerabilities/importers/__init__.py b/vulnerabilities/importers/__init__.py index f0d9532ab..3dd914a92 100644 --- a/vulnerabilities/importers/__init__.py +++ b/vulnerabilities/importers/__init__.py @@ -33,7 +33,6 @@ from vulnerabilities.importers import ubuntu_usn from vulnerabilities.importers import vulnrichment from vulnerabilities.importers import xen -from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline from vulnerabilities.pipelines import alpine_linux_importer from vulnerabilities.pipelines import github_importer from vulnerabilities.pipelines import gitlab_importer @@ -42,45 +41,59 @@ from vulnerabilities.pipelines import nvd_importer from vulnerabilities.pipelines import pypa_importer from vulnerabilities.pipelines import pysec_importer +from vulnerabilities.pipelines.v2_importers import apache_httpd_importer as apache_httpd_v2 +from vulnerabilities.pipelines.v2_importers import github_importer as github_importer_v2 +from vulnerabilities.pipelines.v2_importers import gitlab_importer as gitlab_importer_v2 +from vulnerabilities.pipelines.v2_importers import npm_importer as npm_importer_v2 +from vulnerabilities.pipelines.v2_importers import nvd_importer as nvd_importer_v2 +from vulnerabilities.pipelines.v2_importers import pypa_importer as pypa_importer_v2 +from vulnerabilities.pipelines.v2_importers import pysec_importer as pysec_importer_v2 +from vulnerabilities.pipelines.v2_importers import vulnrichment_importer as vulnrichment_importer_v2 +from vulnerabilities.utils import create_registry -IMPORTERS_REGISTRY = [ - nvd_importer.NVDImporterPipeline, - github_importer.GitHubAPIImporterPipeline, - gitlab_importer.GitLabImporterPipeline, - github_osv.GithubOSVImporter, - pypa_importer.PyPaImporterPipeline, - npm_importer.NpmImporterPipeline, - nginx_importer.NginxImporterPipeline, - pysec_importer.PyPIImporterPipeline, - apache_tomcat.ApacheTomcatImporter, - postgresql.PostgreSQLImporter, - debian.DebianImporter, - curl.CurlImporter, - epss.EPSSImporter, - vulnrichment.VulnrichImporter, - alpine_linux_importer.AlpineLinuxImporterPipeline, - ruby.RubyImporter, - apache_kafka.ApacheKafkaImporter, - openssl.OpensslImporter, - redhat.RedhatImporter, - archlinux.ArchlinuxImporter, - ubuntu.UbuntuImporter, - debian_oval.DebianOvalImporter, - retiredotnet.RetireDotnetImporter, - apache_httpd.ApacheHTTPDImporter, - mozilla.MozillaImporter, - gentoo.GentooImporter, - istio.IstioImporter, - project_kb_msr2019.ProjectKBMSRImporter, - suse_scores.SUSESeverityScoreImporter, - elixir_security.ElixirSecurityImporter, - xen.XenImporter, - ubuntu_usn.UbuntuUSNImporter, - fireeye.FireyeImporter, - oss_fuzz.OSSFuzzImporter, -] - -IMPORTERS_REGISTRY = { - x.pipeline_id if issubclass(x, VulnerableCodeBaseImporterPipeline) else x.qualified_name: x - for x in IMPORTERS_REGISTRY -} +IMPORTERS_REGISTRY = create_registry( + [ + nvd_importer_v2.NVDImporterPipeline, + github_importer_v2.GitHubAPIImporterPipeline, + npm_importer_v2.NpmImporterPipeline, + vulnrichment_importer_v2.VulnrichImporterPipeline, + apache_httpd_v2.ApacheHTTPDImporterPipeline, + pypa_importer_v2.PyPaImporterPipeline, + gitlab_importer_v2.GitLabImporterPipeline, + pysec_importer_v2.PyPIImporterPipeline, + nvd_importer.NVDImporterPipeline, + github_importer.GitHubAPIImporterPipeline, + gitlab_importer.GitLabImporterPipeline, + github_osv.GithubOSVImporter, + pypa_importer.PyPaImporterPipeline, + npm_importer.NpmImporterPipeline, + nginx_importer.NginxImporterPipeline, + pysec_importer.PyPIImporterPipeline, + apache_tomcat.ApacheTomcatImporter, + postgresql.PostgreSQLImporter, + debian.DebianImporter, + curl.CurlImporter, + epss.EPSSImporter, + vulnrichment.VulnrichImporter, + alpine_linux_importer.AlpineLinuxImporterPipeline, + ruby.RubyImporter, + apache_kafka.ApacheKafkaImporter, + openssl.OpensslImporter, + redhat.RedhatImporter, + archlinux.ArchlinuxImporter, + ubuntu.UbuntuImporter, + debian_oval.DebianOvalImporter, + retiredotnet.RetireDotnetImporter, + apache_httpd.ApacheHTTPDImporter, + mozilla.MozillaImporter, + gentoo.GentooImporter, + istio.IstioImporter, + project_kb_msr2019.ProjectKBMSRImporter, + suse_scores.SUSESeverityScoreImporter, + elixir_security.ElixirSecurityImporter, + xen.XenImporter, + ubuntu_usn.UbuntuUSNImporter, + fireeye.FireyeImporter, + oss_fuzz.OSSFuzzImporter, + ] +) diff --git a/vulnerabilities/importers/curl.py b/vulnerabilities/importers/curl.py index a7f5e86fa..7cbc3208e 100644 --- a/vulnerabilities/importers/curl.py +++ b/vulnerabilities/importers/curl.py @@ -97,7 +97,7 @@ def parse_advisory_data(raw_data) -> AdvisoryData: ... ] ... } >>> parse_advisory_data(raw_data) - AdvisoryData(aliases=['CVE-2024-2379'], summary='QUIC certificate check bypass with wolfSSL', affected_packages=[AffectedPackage(package=PackageURL(type='generic', namespace='curl.se', name='curl', version=None, qualifiers={}, subpath=None), affected_version_range=GenericVersionRange(constraints=(VersionConstraint(comparator='=', version=SemverVersion(string='8.6.0')),)), fixed_version=SemverVersion(string='8.7.0'))], references=[Reference(reference_id='', reference_type='', url='https://curl.se/docs/CVE-2024-2379.html', severities=[VulnerabilitySeverity(system=Cvssv3ScoringSystem(identifier='cvssv3.1', name='CVSSv3.1 Base Score', url='https://www.first.org/cvss/v3-1/', notes='CVSSv3.1 base score and vector'), value='Low', scoring_elements='', published_at=None)]), Reference(reference_id='', reference_type='', url='https://hackerone.com/reports/2410774', severities=[])], date_published=datetime.datetime(2024, 3, 27, 8, 0, tzinfo=datetime.timezone.utc), weaknesses=[297], url='https://curl.se/docs/CVE-2024-2379.json') + AdvisoryData(advisory_id='', aliases=['CVE-2024-2379'], summary='QUIC certificate check bypass with wolfSSL', affected_packages=[AffectedPackage(package=PackageURL(type='generic', namespace='curl.se', name='curl', version=None, qualifiers={}, subpath=None), affected_version_range=GenericVersionRange(constraints=(VersionConstraint(comparator='=', version=SemverVersion(string='8.6.0')),)), fixed_version=SemverVersion(string='8.7.0'))], references=[Reference(reference_id='', reference_type='', url='https://curl.se/docs/CVE-2024-2379.html', severities=[VulnerabilitySeverity(system=Cvssv3ScoringSystem(identifier='cvssv3.1', name='CVSSv3.1 Base Score', url='https://www.first.org/cvss/v3-1/', notes='CVSSv3.1 base score and vector'), value='Low', scoring_elements='', published_at=None, url=None)]), Reference(reference_id='', reference_type='', url='https://hackerone.com/reports/2410774', severities=[])], references_v2=[], date_published=datetime.datetime(2024, 3, 27, 8, 0, tzinfo=datetime.timezone.utc), weaknesses=[297], severities=[], url='https://curl.se/docs/CVE-2024-2379.json') """ affected = get_item(raw_data, "affected")[0] if len(get_item(raw_data, "affected")) > 0 else [] diff --git a/vulnerabilities/importers/osv.py b/vulnerabilities/importers/osv.py index 19867cda5..01f2d8023 100644 --- a/vulnerabilities/importers/osv.py +++ b/vulnerabilities/importers/osv.py @@ -107,6 +107,74 @@ def parse_advisory_data( ) +def parse_advisory_data_v2( + raw_data: dict, supported_ecosystems, advisory_url: str +) -> Optional[AdvisoryData]: + """ + Return an AdvisoryData build from a ``raw_data`` mapping of OSV advisory and + a ``supported_ecosystem`` string. + """ + advisory_id = raw_data.get("id") or "" + if not advisory_id: + logger.error(f"Missing advisory id in OSV data: {raw_data}") + return None + summary = raw_data.get("summary") or "" + details = raw_data.get("details") or "" + summary = build_description(summary=summary, description=details) + aliases = raw_data.get("aliases") or [] + + date_published = get_published_date(raw_data=raw_data) + severities = list(get_severities(raw_data=raw_data)) + references = get_references_v2(raw_data=raw_data) + + affected_packages = [] + + for affected_pkg in raw_data.get("affected") or []: + purl = get_affected_purl(affected_pkg=affected_pkg, raw_id=advisory_id) + + if not purl or purl.type not in supported_ecosystems: + logger.error(f"Unsupported package type: {affected_pkg!r} in OSV: {advisory_id!r}") + continue + + affected_version_range = get_affected_version_range( + affected_pkg=affected_pkg, + raw_id=advisory_id, + supported_ecosystem=purl.type, + ) + + for fixed_range in affected_pkg.get("ranges") or []: + fixed_version = get_fixed_versions( + fixed_range=fixed_range, raw_id=advisory_id, supported_ecosystem=purl.type + ) + + for version in fixed_version: + affected_packages.append( + AffectedPackage( + package=purl, + affected_version_range=affected_version_range, + fixed_version=version, + ) + ) + database_specific = raw_data.get("database_specific") or {} + cwe_ids = database_specific.get("cwe_ids") or [] + weaknesses = list(map(get_cwe_id, cwe_ids)) + + if advisory_id in aliases: + aliases.remove(advisory_id) + + return AdvisoryData( + advisory_id=advisory_id, + aliases=aliases, + summary=summary, + references_v2=references, + severities=severities, + affected_packages=affected_packages, + date_published=date_published, + weaknesses=weaknesses, + url=advisory_url, + ) + + def extract_fixed_versions(fixed_range) -> Iterable[str]: """ Return a list of fixed version strings given a ``fixed_range`` mapping of @@ -187,6 +255,23 @@ def get_references(raw_data, severities) -> List[Reference]: return references +def get_references_v2(raw_data) -> List[Reference]: + """ + Return a list Reference extracted from a mapping of OSV ``raw_data`` given a + ``severities`` list of VulnerabilitySeverity. + """ + references = [] + for ref in raw_data.get("references") or []: + if not ref: + continue + url = ref["url"] + if not url: + logger.error(f"Reference without URL : {ref!r} for OSV id: {raw_data['id']!r}") + continue + references.append(Reference(url=ref["url"])) + return references + + def get_affected_purl(affected_pkg, raw_id): """ Return an affected PackageURL or None given a mapping of ``affected_pkg`` diff --git a/vulnerabilities/improvers/__init__.py b/vulnerabilities/improvers/__init__.py index 08cce6ff9..be6f73cb9 100644 --- a/vulnerabilities/improvers/__init__.py +++ b/vulnerabilities/improvers/__init__.py @@ -9,7 +9,6 @@ from vulnerabilities.improvers import valid_versions from vulnerabilities.improvers import vulnerability_status -from vulnerabilities.pipelines import VulnerableCodePipeline from vulnerabilities.pipelines import add_cvss31_to_CVEs from vulnerabilities.pipelines import collect_commits from vulnerabilities.pipelines import compute_advisory_todo @@ -21,39 +20,55 @@ from vulnerabilities.pipelines import flag_ghost_packages from vulnerabilities.pipelines import populate_vulnerability_summary_pipeline from vulnerabilities.pipelines import remove_duplicate_advisories +from vulnerabilities.pipelines.v2_improvers import collect_commits as collect_commits_v2 +from vulnerabilities.pipelines.v2_improvers import compute_package_risk as compute_package_risk_v2 +from vulnerabilities.pipelines.v2_improvers import ( + computer_package_version_rank as compute_version_rank_v2, +) +from vulnerabilities.pipelines.v2_improvers import enhance_with_exploitdb as exploitdb_v2 +from vulnerabilities.pipelines.v2_improvers import enhance_with_kev as enhance_with_kev_v2 +from vulnerabilities.pipelines.v2_improvers import ( + enhance_with_metasploit as enhance_with_metasploit_v2, +) +from vulnerabilities.pipelines.v2_improvers import flag_ghost_packages as flag_ghost_packages_v2 +from vulnerabilities.utils import create_registry -IMPROVERS_REGISTRY = [ - valid_versions.GitHubBasicImprover, - valid_versions.GitLabBasicImprover, - valid_versions.NginxBasicImprover, - valid_versions.ApacheHTTPDImprover, - valid_versions.DebianBasicImprover, - valid_versions.NpmImprover, - valid_versions.ElixirImprover, - valid_versions.ApacheTomcatImprover, - valid_versions.ApacheKafkaImprover, - valid_versions.IstioImprover, - valid_versions.DebianOvalImprover, - valid_versions.UbuntuOvalImprover, - valid_versions.OSSFuzzImprover, - valid_versions.RubyImprover, - valid_versions.GithubOSVImprover, - vulnerability_status.VulnerabilityStatusImprover, - valid_versions.CurlImprover, - flag_ghost_packages.FlagGhostPackagePipeline, - enhance_with_kev.VulnerabilityKevPipeline, - enhance_with_metasploit.MetasploitImproverPipeline, - enhance_with_exploitdb.ExploitDBImproverPipeline, - compute_package_risk.ComputePackageRiskPipeline, - compute_package_version_rank.ComputeVersionRankPipeline, - collect_commits.CollectFixCommitsPipeline, - add_cvss31_to_CVEs.CVEAdvisoryMappingPipeline, - remove_duplicate_advisories.RemoveDuplicateAdvisoriesPipeline, - populate_vulnerability_summary_pipeline.PopulateVulnerabilitySummariesPipeline, - compute_advisory_todo.ComputeToDo, -] - -IMPROVERS_REGISTRY = { - x.pipeline_id if issubclass(x, VulnerableCodePipeline) else x.qualified_name: x - for x in IMPROVERS_REGISTRY -} +IMPROVERS_REGISTRY = create_registry( + [ + valid_versions.GitHubBasicImprover, + valid_versions.GitLabBasicImprover, + valid_versions.NginxBasicImprover, + valid_versions.ApacheHTTPDImprover, + valid_versions.DebianBasicImprover, + valid_versions.NpmImprover, + valid_versions.ElixirImprover, + valid_versions.ApacheTomcatImprover, + valid_versions.ApacheKafkaImprover, + valid_versions.IstioImprover, + valid_versions.DebianOvalImprover, + valid_versions.UbuntuOvalImprover, + valid_versions.OSSFuzzImprover, + valid_versions.RubyImprover, + valid_versions.GithubOSVImprover, + vulnerability_status.VulnerabilityStatusImprover, + valid_versions.CurlImprover, + flag_ghost_packages.FlagGhostPackagePipeline, + enhance_with_kev.VulnerabilityKevPipeline, + enhance_with_metasploit.MetasploitImproverPipeline, + enhance_with_exploitdb.ExploitDBImproverPipeline, + compute_package_risk.ComputePackageRiskPipeline, + compute_package_version_rank.ComputeVersionRankPipeline, + collect_commits.CollectFixCommitsPipeline, + add_cvss31_to_CVEs.CVEAdvisoryMappingPipeline, + remove_duplicate_advisories.RemoveDuplicateAdvisoriesPipeline, + populate_vulnerability_summary_pipeline.PopulateVulnerabilitySummariesPipeline, + exploitdb_v2.ExploitDBImproverPipeline, + enhance_with_kev_v2.VulnerabilityKevPipeline, + flag_ghost_packages_v2.FlagGhostPackagePipeline, + enhance_with_metasploit_v2.MetasploitImproverPipeline, + compute_package_risk_v2.ComputePackageRiskPipeline, + compute_version_rank_v2.ComputeVersionRankPipeline, + collect_commits_v2.CollectFixCommitsPipeline, + compute_advisory_todo.ComputeToDo, + ] +) diff --git a/vulnerabilities/management/commands/import.py b/vulnerabilities/management/commands/import.py index f4876b11a..78ec8bb0a 100644 --- a/vulnerabilities/management/commands/import.py +++ b/vulnerabilities/management/commands/import.py @@ -14,6 +14,7 @@ from vulnerabilities.import_runner import ImportRunner from vulnerabilities.importers import IMPORTERS_REGISTRY from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2 class Command(BaseCommand): @@ -57,7 +58,9 @@ def import_data(self, importers): failed_importers = [] for importer in importers: - if issubclass(importer, VulnerableCodeBaseImporterPipeline): + if issubclass(importer, VulnerableCodeBaseImporterPipeline) or issubclass( + importer, VulnerableCodeBaseImporterPipelineV2 + ): self.stdout.write(f"Importing data using {importer.pipeline_id}") status, error = importer().execute() if status != 0: diff --git a/vulnerabilities/migrations/0094_advisoryalias_advisoryreference_advisoryseverity_and_more.py b/vulnerabilities/migrations/0094_advisoryalias_advisoryreference_advisoryseverity_and_more.py new file mode 100644 index 000000000..fd3f74d98 --- /dev/null +++ b/vulnerabilities/migrations/0094_advisoryalias_advisoryreference_advisoryseverity_and_more.py @@ -0,0 +1,635 @@ +# Generated by Django 4.2.20 on 2025-07-01 10:38 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ("vulnerabilities", "0093_advisorytodo_todorelatedadvisory_and_more"), + ] + + operations = [ + migrations.CreateModel( + name="AdvisoryAlias", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, primary_key=True, serialize=False, verbose_name="ID" + ), + ), + ( + "alias", + models.CharField( + help_text="An alias is a unique vulnerability identifier in some database, such as CVE-2020-2233", + max_length=50, + unique=True, + ), + ), + ], + options={ + "ordering": ["alias"], + }, + ), + migrations.CreateModel( + name="AdvisoryReference", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, primary_key=True, serialize=False, verbose_name="ID" + ), + ), + ( + "url", + models.URLField( + help_text="URL to the vulnerability reference", max_length=1024, unique=True + ), + ), + ( + "reference_type", + models.CharField( + blank=True, + choices=[ + ("advisory", "Advisory"), + ("exploit", "Exploit"), + ("mailing_list", "Mailing List"), + ("bug", "Bug"), + ("other", "Other"), + ], + max_length=20, + ), + ), + ( + "reference_id", + models.CharField( + blank=True, + db_index=True, + help_text="An optional reference ID, such as DSA-4465-1 when available", + max_length=500, + ), + ), + ], + options={ + "ordering": ["reference_id", "url", "reference_type"], + }, + ), + migrations.CreateModel( + name="AdvisorySeverity", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, primary_key=True, serialize=False, verbose_name="ID" + ), + ), + ( + "url", + models.URLField( + db_index=True, + help_text="URL to the vulnerability severity", + max_length=1024, + null=True, + ), + ), + ( + "scoring_system", + models.CharField( + choices=[ + ("cvssv2", "CVSSv2 Base Score"), + ("cvssv3", "CVSSv3 Base Score"), + ("cvssv3.1", "CVSSv3.1 Base Score"), + ("cvssv4", "CVSSv4 Base Score"), + ("rhbs", "RedHat Bugzilla severity"), + ("rhas", "RedHat Aggregate severity"), + ("archlinux", "Archlinux Vulnerability Group Severity"), + ("cvssv3.1_qr", "CVSSv3.1 Qualitative Severity Rating"), + ("generic_textual", "Generic textual severity rating"), + ("apache_httpd", "Apache Httpd Severity"), + ("apache_tomcat", "Apache Tomcat Severity"), + ("epss", "Exploit Prediction Scoring System"), + ("ssvc", "Stakeholder-Specific Vulnerability Categorization"), + ], + help_text="Identifier for the scoring system used. Available choices are: cvssv2: CVSSv2 Base Score,\ncvssv3: CVSSv3 Base Score,\ncvssv3.1: CVSSv3.1 Base Score,\ncvssv4: CVSSv4 Base Score,\nrhbs: RedHat Bugzilla severity,\nrhas: RedHat Aggregate severity,\narchlinux: Archlinux Vulnerability Group Severity,\ncvssv3.1_qr: CVSSv3.1 Qualitative Severity Rating,\ngeneric_textual: Generic textual severity rating,\napache_httpd: Apache Httpd Severity,\napache_tomcat: Apache Tomcat Severity,\nepss: Exploit Prediction Scoring System,\nssvc: Stakeholder-Specific Vulnerability Categorization ", + max_length=50, + ), + ), + ( + "value", + models.CharField(help_text="Example: 9.0, Important, High", max_length=50), + ), + ( + "scoring_elements", + models.CharField( + help_text="Supporting scoring elements used to compute the score values. For example a CVSS vector string as used to compute a CVSS score.", + max_length=150, + null=True, + ), + ), + ( + "published_at", + models.DateTimeField( + blank=True, + help_text="UTC Date of publication of the vulnerability severity", + null=True, + ), + ), + ], + options={ + "ordering": ["url", "scoring_system", "value"], + }, + ), + migrations.CreateModel( + name="AdvisoryV2", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, primary_key=True, serialize=False, verbose_name="ID" + ), + ), + ( + "datasource_id", + models.CharField( + help_text="Unique ID for the datasource used for this advisory .e.g.: nginx_importer_v2", + max_length=100, + ), + ), + ( + "advisory_id", + models.CharField( + help_text="An advisory is a unique vulnerability identifier in some database, such as PYSEC-2020-2233", + max_length=50, + ), + ), + ( + "avid", + models.CharField( + help_text="Unique ID for the datasource used for this advisory .e.g.: pysec_importer_v2/PYSEC-2020-2233", + max_length=500, + ), + ), + ( + "unique_content_id", + models.CharField( + help_text="A 64 character unique identifier for the content of the advisory since we use sha256 as hex", + max_length=64, + unique=True, + ), + ), + ("url", models.URLField(help_text="Link to the advisory on the upstream website")), + ("summary", models.TextField(blank=True)), + ( + "date_published", + models.DateTimeField( + blank=True, help_text="UTC Date of publication of the advisory", null=True + ), + ), + ( + "date_collected", + models.DateTimeField(help_text="UTC Date on which the advisory was collected"), + ), + ( + "date_imported", + models.DateTimeField( + blank=True, + help_text="UTC Date on which the advisory was imported", + null=True, + ), + ), + ( + "status", + models.IntegerField( + choices=[(1, "Published"), (2, "Disputed"), (3, "Invalid")], default=1 + ), + ), + ( + "exploitability", + models.DecimalField( + blank=True, + decimal_places=1, + help_text="Exploitability indicates the likelihood that a vulnerability in a software package could be used by malicious actors to compromise systems, applications, or networks. This metric is determined automatically based on the discovery of known exploits.", + max_digits=2, + null=True, + ), + ), + ( + "weighted_severity", + models.DecimalField( + blank=True, + decimal_places=1, + help_text="Weighted severity is the highest value calculated by multiplying each severity by its corresponding weight, divided by 10.", + max_digits=3, + null=True, + ), + ), + ], + options={ + "ordering": ["datasource_id", "advisory_id", "date_published", "unique_content_id"], + }, + ), + migrations.CreateModel( + name="AdvisoryWeakness", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, primary_key=True, serialize=False, verbose_name="ID" + ), + ), + ("cwe_id", models.IntegerField(help_text="CWE id")), + ], + ), + migrations.CreateModel( + name="PackageV2", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, primary_key=True, serialize=False, verbose_name="ID" + ), + ), + ( + "type", + models.CharField( + blank=True, + help_text="A short code to identify the type of this package. For example: gem for a Rubygem, docker for a container, pypi for a Python Wheel or Egg, maven for a Maven Jar, deb for a Debian package, etc.", + max_length=16, + ), + ), + ( + "namespace", + models.CharField( + blank=True, + help_text="Package name prefix, such as Maven groupid, Docker image owner, GitHub user or organization, etc.", + max_length=255, + ), + ), + ( + "name", + models.CharField(blank=True, help_text="Name of the package.", max_length=100), + ), + ( + "version", + models.CharField( + blank=True, help_text="Version of the package.", max_length=100 + ), + ), + ( + "qualifiers", + models.CharField( + blank=True, + help_text="Extra qualifying data for a package such as the name of an OS, architecture, distro, etc.", + max_length=1024, + ), + ), + ( + "subpath", + models.CharField( + blank=True, + help_text="Extra subpath within a package, relative to the package root.", + max_length=200, + ), + ), + ( + "package_url", + models.CharField( + db_index=True, + help_text="The Package URL for this package.", + max_length=1000, + ), + ), + ( + "plain_package_url", + models.CharField( + db_index=True, + help_text="The Package URL for this package without qualifiers and subpath.", + max_length=1000, + ), + ), + ( + "is_ghost", + models.BooleanField( + db_index=True, + default=False, + help_text="True if the package does not exist in the upstream package manager or its repository.", + ), + ), + ( + "risk_score", + models.DecimalField( + decimal_places=1, + help_text="Risk score between 0.00 and 10.00, where higher values indicate greater vulnerability risk for the package.", + max_digits=3, + null=True, + ), + ), + ( + "version_rank", + models.IntegerField( + db_index=True, + default=0, + help_text="Rank of the version to support ordering by version. Rank zero means the rank has not been defined yet", + ), + ), + ], + options={ + "abstract": False, + }, + ), + migrations.CreateModel( + name="CodeFixV2", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, primary_key=True, serialize=False, verbose_name="ID" + ), + ), + ( + "commits", + models.JSONField( + blank=True, + default=list, + help_text="List of commit identifiers using VCS URLs associated with the code change.", + ), + ), + ( + "pulls", + models.JSONField( + blank=True, + default=list, + help_text="List of pull request URLs associated with the code change.", + ), + ), + ( + "downloads", + models.JSONField( + blank=True, + default=list, + help_text="List of download URLs for the patched code.", + ), + ), + ( + "patch", + models.TextField( + blank=True, + help_text="The code change as a patch in unified diff format.", + null=True, + ), + ), + ( + "notes", + models.TextField( + blank=True, + help_text="Notes or instructions about this code change.", + null=True, + ), + ), + ( + "references", + models.JSONField( + blank=True, + default=list, + help_text="URL references related to this code change.", + ), + ), + ( + "is_reviewed", + models.BooleanField( + default=False, help_text="Indicates if this code change has been reviewed." + ), + ), + ( + "created_at", + models.DateTimeField( + auto_now_add=True, + help_text="Timestamp indicating when this code change was created.", + ), + ), + ( + "updated_at", + models.DateTimeField( + auto_now=True, + help_text="Timestamp indicating when this code change was last updated.", + ), + ), + ( + "advisory", + models.ForeignKey( + help_text="The affected package version to which this code fix applies.", + on_delete=django.db.models.deletion.CASCADE, + related_name="code_fix_v2", + to="vulnerabilities.advisoryv2", + ), + ), + ( + "affected_package", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="code_fix_v2_affected", + to="vulnerabilities.packagev2", + ), + ), + ( + "base_package_version", + models.ForeignKey( + blank=True, + help_text="The base package version to which this code change applies.", + null=True, + on_delete=django.db.models.deletion.SET_NULL, + related_name="codechanges_v2", + to="vulnerabilities.packagev2", + ), + ), + ( + "fixed_package", + models.ForeignKey( + blank=True, + help_text="The fixing package version with this code fix", + null=True, + on_delete=django.db.models.deletion.SET_NULL, + related_name="code_fix_v2_fixed", + to="vulnerabilities.packagev2", + ), + ), + ], + options={ + "abstract": False, + }, + ), + migrations.AddField( + model_name="advisoryv2", + name="affecting_packages", + field=models.ManyToManyField( + help_text="A list of packages that are affected by this advisory.", + related_name="affected_by_advisories", + to="vulnerabilities.packagev2", + ), + ), + migrations.AddField( + model_name="advisoryv2", + name="aliases", + field=models.ManyToManyField( + help_text="A list of serializable Alias objects", + related_name="advisories", + to="vulnerabilities.advisoryalias", + ), + ), + migrations.AddField( + model_name="advisoryv2", + name="fixed_by_packages", + field=models.ManyToManyField( + help_text="A list of packages that are reported by this advisory.", + related_name="fixing_advisories", + to="vulnerabilities.packagev2", + ), + ), + migrations.AddField( + model_name="advisoryv2", + name="references", + field=models.ManyToManyField( + help_text="A list of serializable Reference objects", + related_name="advisories", + to="vulnerabilities.advisoryreference", + ), + ), + migrations.AddField( + model_name="advisoryv2", + name="severities", + field=models.ManyToManyField( + help_text="A list of vulnerability severities associated with this advisory.", + related_name="advisories", + to="vulnerabilities.advisoryseverity", + ), + ), + migrations.AddField( + model_name="advisoryv2", + name="weaknesses", + field=models.ManyToManyField( + help_text="A list of software weaknesses associated with this advisory.", + related_name="advisories", + to="vulnerabilities.advisoryweakness", + ), + ), + migrations.CreateModel( + name="AdvisoryExploit", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, primary_key=True, serialize=False, verbose_name="ID" + ), + ), + ( + "date_added", + models.DateField( + blank=True, + help_text="The date the vulnerability was added to an exploit catalog.", + null=True, + ), + ), + ( + "description", + models.TextField( + blank=True, + help_text="Description of the vulnerability in an exploit catalog, often a refinement of the original CVE description", + null=True, + ), + ), + ( + "required_action", + models.TextField( + blank=True, + help_text="The required action to address the vulnerability, typically to apply vendor updates or apply vendor mitigations or to discontinue use.", + null=True, + ), + ), + ( + "due_date", + models.DateField( + blank=True, + help_text="The date the required action is due, which applies to all USA federal civilian executive branch (FCEB) agencies, but all organizations are strongly encouraged to execute the required action", + null=True, + ), + ), + ( + "notes", + models.TextField( + blank=True, + help_text="Additional notes and resources about the vulnerability, often a URL to vendor instructions.", + null=True, + ), + ), + ( + "known_ransomware_campaign_use", + models.BooleanField( + default=False, + help_text="Known' if this vulnerability is known to have been leveraged as part of a ransomware campaign; \n or 'Unknown' if there is no confirmation that the vulnerability has been utilized for ransomware.", + ), + ), + ( + "source_date_published", + models.DateField( + blank=True, + help_text="The date that the exploit was published or disclosed.", + null=True, + ), + ), + ( + "exploit_type", + models.TextField( + blank=True, + help_text="The type of the exploit as provided by the original upstream data source.", + null=True, + ), + ), + ( + "platform", + models.TextField( + blank=True, + help_text="The platform associated with the exploit as provided by the original upstream data source.", + null=True, + ), + ), + ( + "source_date_updated", + models.DateField( + blank=True, + help_text="The date the exploit was updated in the original upstream data source.", + null=True, + ), + ), + ( + "data_source", + models.TextField( + blank=True, + help_text="The source of the exploit information, such as CISA KEV, exploitdb, metaspoit, or others.", + null=True, + ), + ), + ( + "source_url", + models.URLField( + blank=True, + help_text="The URL to the exploit as provided in the original upstream data source.", + null=True, + ), + ), + ( + "advisory", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="exploits", + to="vulnerabilities.advisoryv2", + ), + ), + ], + ), + migrations.AlterUniqueTogether( + name="advisoryv2", + unique_together={("datasource_id", "advisory_id", "unique_content_id")}, + ), + ] diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index c4ccbd1fa..ab01010d7 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -172,6 +172,7 @@ def with_package_counts(self): ) +# FIXME: Remove when migration from Vulnerability to Advisory is completed class VulnerabilitySeverity(models.Model): url = models.URLField( max_length=1024, @@ -211,6 +212,7 @@ class Meta: ordering = ["url", "scoring_system", "value"] +# FIXME: Remove when migration from Vulnerability to Advisory is completed class VulnerabilityStatusType(models.IntegerChoices): """List of vulnerability statuses.""" @@ -219,6 +221,7 @@ class VulnerabilityStatusType(models.IntegerChoices): INVALID = 3, "Invalid" +# FIXME: Remove when migration from Vulnerability to Advisory is completed class Vulnerability(models.Model): """ A software vulnerability with a unique identifier and alternate ``aliases``. @@ -511,6 +514,7 @@ def get_cwes(self): Database.get_cwes = get_cwes +# FIXME: Remove when migration from Vulnerability to Advisory is completed class Weakness(models.Model): """ A Common Weakness Enumeration model @@ -557,6 +561,7 @@ def to_dict(self): return {"cwe_id": self.cwe_id, "name": self.name, "description": self.description} +# FIXME: Remove when migration from Vulnerability to Advisory is completed class VulnerabilityReferenceQuerySet(BaseQuerySet): def for_cpe(self): """ @@ -565,6 +570,7 @@ def for_cpe(self): return self.filter(reference_id__startswith="cpe") +# FIXME: Remove when migration from Vulnerability to Advisory is completed class VulnerabilityReference(models.Model): """ A reference to a vulnerability such as a security advisory from a Linux distribution or language @@ -622,6 +628,7 @@ def is_cpe(self): return self.reference_id.startswith("cpe") +# FIXME: Remove when migration from Vulnerability to Advisory is completed class VulnerabilityRelatedReference(models.Model): """ A reference related to a vulnerability. @@ -642,6 +649,7 @@ class Meta: ordering = ["vulnerability", "reference"] +# FIXME: Remove when migration from Vulnerability to Advisory is completed class PackageQuerySet(BaseQuerySet, PackageURLQuerySet): def get_fixed_by_package_versions(self, purl: PackageURL, fix=True): """ @@ -808,6 +816,7 @@ def get_purl_query_lookups(purl): return purl_to_dict(plain_purl, with_empty=False) +# FIXME: Remove when migration from Vulnerability to Advisory is completed class Package(PackageURLMixin): """ A software package with related vulnerabilities. @@ -1118,7 +1127,6 @@ def fixing_vulnerabilities(self): """ Return a queryset of Vulnerabilities that are fixed by this package. """ - print("A") return self.fixed_by_vulnerabilities.all() @property @@ -1136,6 +1144,7 @@ def affecting_vulns(self): ) +# FIXME: Remove when migration from Vulnerability to Advisory is completed class PackageRelatedVulnerabilityBase(models.Model): """ Abstract base class for package-vulnerability relations. @@ -1232,11 +1241,13 @@ def add_package_vulnerability_changelog(self, advisory): ) +# FIXME: Remove when migration from Vulnerability to Advisory is completed class FixingPackageRelatedVulnerability(PackageRelatedVulnerabilityBase): class Meta(PackageRelatedVulnerabilityBase.Meta): verbose_name_plural = "Fixing Package Related Vulnerabilities" +# FIXME: Remove when migration from Vulnerability to Advisory is completed class AffectedByPackageRelatedVulnerability(PackageRelatedVulnerabilityBase): severities = models.ManyToManyField( @@ -1258,6 +1269,7 @@ def for_cve(self): return self.filter(alias__startswith="CVE") +# FIXME: Remove when migration from Vulnerability to Advisory is completed class Alias(models.Model): """ An alias is a unique vulnerability identifier in some database, such as @@ -1311,10 +1323,35 @@ def url(self): return f"https://github.com/nodejs/security-wg/blob/main/vuln/npm/{id}.json" +class AdvisoryV2QuerySet(BaseQuerySet): + def search(query): + """ + This function will take a string as an input, the string could be an alias or an advisory ID or + something in the advisory description. + """ + return AdvisoryV2.objects.filter( + Q(advisory_id__icontains=query) + | Q(aliases__alias__icontains=query) + | Q(summary__icontains=query) + | Q(references__url__icontains=query) + ).distinct() + + class AdvisoryQuerySet(BaseQuerySet): - pass + def search(query): + """ + This function will take a string as an input, the string could be an alias or an advisory ID or + something in the advisory description. + """ + return Advisory.objects.filter( + Q(advisory_id__icontains=query) + | Q(aliases__alias__icontains=query) + | Q(summary__icontains=query) + | Q(references__url__icontains=query) + ).distinct() +# FIXME: Remove when migration from Vulnerability to Advisory is completed class Advisory(models.Model): """ An advisory represents data directly obtained from upstream transformed @@ -1797,6 +1834,60 @@ class Meta: abstract = True +class CodeChangeV2(models.Model): + """ + Abstract base model representing a change in code, either introducing or fixing a vulnerability. + This includes details about commits, patches, and related metadata. + + We are tracking commits, pulls and downloads as references to the code change. The goal is to + keep track and store the actual code patch in the ``patch`` field. When not available the patch + will be inferred from these references using improvers. + """ + + commits = models.JSONField( + blank=True, + default=list, + help_text="List of commit identifiers using VCS URLs associated with the code change.", + ) + pulls = models.JSONField( + blank=True, + default=list, + help_text="List of pull request URLs associated with the code change.", + ) + downloads = models.JSONField( + blank=True, default=list, help_text="List of download URLs for the patched code." + ) + patch = models.TextField( + blank=True, null=True, help_text="The code change as a patch in unified diff format." + ) + base_package_version = models.ForeignKey( + "PackageV2", + null=True, + blank=True, + on_delete=models.SET_NULL, + related_name="codechanges_v2", + help_text="The base package version to which this code change applies.", + ) + notes = models.TextField( + blank=True, null=True, help_text="Notes or instructions about this code change." + ) + references = models.JSONField( + blank=True, default=list, help_text="URL references related to this code change." + ) + is_reviewed = models.BooleanField( + default=False, help_text="Indicates if this code change has been reviewed." + ) + created_at = models.DateTimeField( + auto_now_add=True, help_text="Timestamp indicating when this code change was created." + ) + updated_at = models.DateTimeField( + auto_now=True, help_text="Timestamp indicating when this code change was last updated." + ) + + class Meta: + abstract = True + + class CodeFix(CodeChange): """ A code fix is a code change that addresses a vulnerability and is associated: @@ -1821,6 +1912,35 @@ class CodeFix(CodeChange): ) +class CodeFixV2(CodeChangeV2): + """ + A code fix is a code change that addresses a vulnerability and is associated: + - with a specific advisory + - package that has been affected + - optionally with a specific fixing package version when it is known + """ + + advisory = models.ForeignKey( + "AdvisoryV2", + on_delete=models.CASCADE, + related_name="code_fix_v2", + help_text="The affected package version to which this code fix applies.", + ) + + affected_package = models.ForeignKey( + "PackageV2", on_delete=models.CASCADE, related_name="code_fix_v2_affected" + ) + + fixed_package = models.ForeignKey( + "PackageV2", + null=True, + blank=True, + on_delete=models.SET_NULL, + related_name="code_fix_v2_fixed", + help_text="The fixing package version with this code fix", + ) + + class PipelineRun(models.Model): """The Database representation of a pipeline execution.""" @@ -2333,21 +2453,812 @@ class AdvisoryToDo(models.Model): class Meta: unique_together = ("related_advisories_id", "issue_type") - def save(self, *args, **kwargs): - self.full_clean() - return super().save(*args, **kwargs) +class AdvisorySeverity(models.Model): + url = models.URLField( + max_length=1024, + null=True, + help_text="URL to the vulnerability severity", + db_index=True, + ) -class ToDoRelatedAdvisory(models.Model): - todo = models.ForeignKey( - AdvisoryToDo, - on_delete=models.CASCADE, + scoring_system_choices = tuple( + (system.identifier, system.name) for system in SCORING_SYSTEMS.values() ) - advisory = models.ForeignKey( - Advisory, - on_delete=models.CASCADE, + scoring_system = models.CharField( + max_length=50, + choices=scoring_system_choices, + help_text="Identifier for the scoring system used. Available choices are: {} ".format( + ",\n".join(f"{sid}: {sname}" for sid, sname in scoring_system_choices) + ), + ) + + value = models.CharField(max_length=50, help_text="Example: 9.0, Important, High") + + scoring_elements = models.CharField( + max_length=150, + null=True, + help_text="Supporting scoring elements used to compute the score values. " + "For example a CVSS vector string as used to compute a CVSS score.", + ) + + published_at = models.DateTimeField( + blank=True, null=True, help_text="UTC Date of publication of the vulnerability severity" ) + objects = BaseQuerySet.as_manager() + class Meta: - unique_together = ("todo", "advisory") + ordering = ["url", "scoring_system", "value"] + + +class AdvisoryWeakness(models.Model): + """ + A weakness is a software weakness that is associated with a vulnerability. + """ + + cwe_id = models.IntegerField(help_text="CWE id") + + cwe_by_id = {} + + def get_cwe(self, cwe_id): + if not self.cwe_by_id: + db = Database() + for weakness in db.get_cwes(): + self.cwe_by_id[str(weakness.cwe_id)] = weakness + return self.cwe_by_id[cwe_id] + + @property + def cwe(self): + return f"CWE-{self.cwe_id}" + + @property + def weakness(self): + """ + Return a queryset of Weakness for this vulnerability. + """ + try: + weakness = self.get_cwe(str(self.cwe_id)) + return weakness + except Exception as e: + logger.warning(f"Could not find CWE {self.cwe_id}: {e}") + + @property + def name(self): + """Return the weakness's name.""" + return self.weakness.name if self.weakness else "" + + @property + def description(self): + """Return the weakness's description.""" + return self.weakness.description if self.weakness else "" + + def to_dict(self): + return {"cwe_id": self.cwe_id, "name": self.name, "description": self.description} + + +class AdvisoryReference(models.Model): + url = models.URLField( + max_length=1024, + help_text="URL to the vulnerability reference", + unique=True, + ) + + ADVISORY = "advisory" + EXPLOIT = "exploit" + MAILING_LIST = "mailing_list" + BUG = "bug" + OTHER = "other" + + REFERENCE_TYPES = [ + (ADVISORY, "Advisory"), + (EXPLOIT, "Exploit"), + (MAILING_LIST, "Mailing List"), + (BUG, "Bug"), + (OTHER, "Other"), + ] + + reference_type = models.CharField(max_length=20, choices=REFERENCE_TYPES, blank=True) + + reference_id = models.CharField( + max_length=500, + help_text="An optional reference ID, such as DSA-4465-1 when available", + blank=True, + db_index=True, + ) + + class Meta: + ordering = ["reference_id", "url", "reference_type"] + + def __str__(self): + reference_id = f" {self.reference_id}" if self.reference_id else "" + return f"{self.url}{reference_id}" + + @property + def is_cpe(self): + """ + Return True if this is a CPE reference. + """ + return self.reference_id.startswith("cpe") + + +class AdvisoryAlias(models.Model): + alias = models.CharField( + max_length=50, + unique=True, + blank=False, + null=False, + help_text="An alias is a unique vulnerability identifier in some database, " + "such as CVE-2020-2233", + ) + + class Meta: + ordering = ["alias"] + + def __str__(self): + return self.alias + + @cached_property + def url(self): + """ + Create a URL for the alias. + """ + alias: str = self.alias + if alias.startswith("CVE"): + return f"https://nvd.nist.gov/vuln/detail/{alias}" + + if alias.startswith("GHSA"): + return f"https://github.com/advisories/{alias}" + + if alias.startswith("NPM-"): + id = alias.lstrip("NPM-") + return f"https://github.com/nodejs/security-wg/blob/main/vuln/npm/{id}.json" + + +class AdvisoryV2(models.Model): + """ + An advisory represents data directly obtained from upstream transformed + into structured data + """ + + # This is similar to a type or a namespace + datasource_id = models.CharField( + max_length=100, + blank=False, + null=False, + help_text="Unique ID for the datasource used for this advisory ." "e.g.: nginx_importer_v2", + ) + + # This is similar to a name + advisory_id = models.CharField( + max_length=50, + blank=False, + null=False, + unique=False, + help_text="An advisory is a unique vulnerability identifier in some database, " + "such as PYSEC-2020-2233", + ) + + avid = models.CharField( + max_length=500, + blank=False, + null=False, + help_text="Unique ID for the datasource used for this advisory ." + "e.g.: pysec_importer_v2/PYSEC-2020-2233", + ) + + # This is similar to a version + unique_content_id = models.CharField( + max_length=64, + blank=False, + null=False, + unique=True, + help_text="A 64 character unique identifier for the content of the advisory since we use sha256 as hex", + ) + url = models.URLField( + blank=False, + null=False, + help_text="Link to the advisory on the upstream website", + ) + + # TODO: Have a mapping that gives datasource class by datasource ID + # Get label from datasource class + # Remove this from model + # In the UI - Use label + # In the API - Use datasource_id + # Have an API endpoint for all info for datasources - show license, label + + summary = models.TextField( + blank=True, + ) + aliases = models.ManyToManyField( + AdvisoryAlias, + related_name="advisories", + help_text="A list of serializable Alias objects", + ) + references = models.ManyToManyField( + AdvisoryReference, + related_name="advisories", + help_text="A list of serializable Reference objects", + ) + severities = models.ManyToManyField( + AdvisorySeverity, + related_name="advisories", + help_text="A list of vulnerability severities associated with this advisory.", + ) + weaknesses = models.ManyToManyField( + AdvisoryWeakness, + related_name="advisories", + help_text="A list of software weaknesses associated with this advisory.", + ) + date_published = models.DateTimeField( + blank=True, null=True, help_text="UTC Date of publication of the advisory" + ) + date_collected = models.DateTimeField(help_text="UTC Date on which the advisory was collected") + date_imported = models.DateTimeField( + blank=True, null=True, help_text="UTC Date on which the advisory was imported" + ) + + affecting_packages = models.ManyToManyField( + "PackageV2", + related_name="affected_by_advisories", + help_text="A list of packages that are affected by this advisory.", + ) + + fixed_by_packages = models.ManyToManyField( + "PackageV2", + related_name="fixing_advisories", + help_text="A list of packages that are reported by this advisory.", + ) + + status = models.IntegerField( + choices=VulnerabilityStatusType.choices, default=VulnerabilityStatusType.PUBLISHED + ) + + exploitability = models.DecimalField( + null=True, + blank=True, + max_digits=2, + decimal_places=1, + help_text="Exploitability indicates the likelihood that a vulnerability in a software package could be used by malicious actors to compromise systems, " + "applications, or networks. This metric is determined automatically based on the discovery of known exploits.", + ) + + weighted_severity = models.DecimalField( + null=True, + blank=True, + max_digits=3, + decimal_places=1, + help_text="Weighted severity is the highest value calculated by multiplying each severity by its corresponding weight, divided by 10.", + ) + + @property + def risk_score(self): + """ + Risk expressed as a number ranging from 0 to 10. + Risk is calculated from weighted severity and exploitability values. + It is the maximum value of (the weighted severity multiplied by its exploitability) or 10 + Risk = min(weighted severity * exploitability, 10) + """ + if self.exploitability and self.weighted_severity: + risk_score = min(float(self.exploitability * self.weighted_severity), 10.0) + return round(risk_score, 1) + + objects = AdvisoryQuerySet.as_manager() + + class Meta: + unique_together = ["datasource_id", "advisory_id", "unique_content_id"] + ordering = ["datasource_id", "advisory_id", "date_published", "unique_content_id"] + + def save(self, *args, **kwargs): + self.full_clean() + return super().save(*args, **kwargs) + + @property + def get_status_label(self): + label_by_status = {choice[0]: choice[1] for choice in VulnerabilityStatusType.choices} + return label_by_status.get(self.status) or VulnerabilityStatusType.PUBLISHED.label + + def get_absolute_url(self): + """ + Return this Vulnerability details absolute URL. + """ + return reverse("advisory_details", args=[self.id]) + + def to_advisory_data(self) -> "AdvisoryDataV2": + from vulnerabilities.importer import AdvisoryDataV2 + from vulnerabilities.importer import AffectedPackage + from vulnerabilities.importer import ReferenceV2 + + return AdvisoryDataV2( + aliases=[item.alias for item in self.aliases.all()], + summary=self.summary, + affected_packages=[ + AffectedPackage.from_dict(pkg) for pkg in self.affected_packages if pkg + ], + references=[ReferenceV2.from_dict(ref) for ref in self.references], + date_published=self.date_published, + weaknesses=self.weaknesses, + severities=self.severities, + url=self.url, + ) + + @property + def get_aliases(self): + """ + Return a queryset of all Aliases for this vulnerability. + """ + return self.aliases.all() + + def aggregate_fixed_and_affected_packages(self): + from vulnerabilities.utils import get_purl_version_class + + sorted_fixed_by_packages = self.fixed_by_packages.filter(is_ghost=False).order_by( + "type", "namespace", "name", "qualifiers", "subpath" + ) + + if sorted_fixed_by_packages: + sorted_fixed_by_packages.first().calculate_version_rank + + sorted_affected_packages = self.affecting_packages.all() + + if sorted_affected_packages: + sorted_affected_packages.first().calculate_version_rank + + grouped_fixed_by_packages = { + key: list(group) + for key, group in groupby( + sorted_fixed_by_packages, + key=attrgetter("type", "namespace", "name", "qualifiers", "subpath"), + ) + } + + all_affected_fixed_by_matches = [] + + for sorted_affected_package in sorted_affected_packages: + affected_fixed_by_matches = { + "affected_package": sorted_affected_package, + "matched_fixed_by_packages": [], + } + + # Build the key to find matching group + key = ( + sorted_affected_package.type, + sorted_affected_package.namespace, + sorted_affected_package.name, + sorted_affected_package.qualifiers, + sorted_affected_package.subpath, + ) + + # Get matching group from pre-grouped fixed_by_packages + matching_fixed_packages = grouped_fixed_by_packages.get(key, []) + + # Get version classes for comparison + affected_version_class = get_purl_version_class(sorted_affected_package) + affected_version = affected_version_class(sorted_affected_package.version) + + # Compare versions and filter valid matches + matched_fixed_by_packages = [ + fixed_by_package.purl + for fixed_by_package in matching_fixed_packages + if get_purl_version_class(fixed_by_package)(fixed_by_package.version) + > affected_version + ] + + affected_fixed_by_matches["matched_fixed_by_packages"] = matched_fixed_by_packages + all_affected_fixed_by_matches.append(affected_fixed_by_matches) + return sorted_fixed_by_packages, sorted_affected_packages, all_affected_fixed_by_matches + + alias = get_aliases + + +class ToDoRelatedAdvisory(models.Model): + todo = models.ForeignKey( + AdvisoryToDo, + on_delete=models.CASCADE, + ) + + advisory = models.ForeignKey( + Advisory, + on_delete=models.CASCADE, + ) + + class Meta: + unique_together = ("todo", "advisory") + + +class PackageQuerySetV2(BaseQuerySet, PackageURLQuerySet): + def search(self, query: str = None): + """ + Return a Package queryset searching for the ``query``. + Make a best effort approach to find matching packages either based + on exact purl, partial purl or just name and namespace. + """ + query = query and query.strip() + if not query: + return self.none() + qs = self + + try: + # if it's a valid purl, try to parse it and use it as is + purl = str(utils.plain_purl(query)) + qs = qs.filter(package_url__istartswith=purl) + except ValueError: + # otherwise use query as a plain string + qs = qs.filter(package_url__icontains=query) + return qs.order_by("package_url") + + def with_vulnerability_counts(self): + return self.annotate( + vulnerability_count=Count( + "affected_by_advisories", + ), + patched_vulnerability_count=Count( + "fixing_advisories", + ), + ) + + def get_fixed_by_package_versions(self, purl: PackageURL, fix=True): + """ + Return a queryset of all the package versions of this `package` that fix any vulnerability. + If `fix` is False, return all package versions whether or not they fix a vulnerability. + """ + filter_dict = { + "name": purl.name, + "namespace": purl.namespace, + "type": purl.type, + "qualifiers": purl.qualifiers, + "subpath": purl.subpath, + } + + if fix: + filter_dict["fixing_advisories__isnull"] = False + + # TODO: why do we need distinct + return PackageV2.objects.filter(**filter_dict).distinct() + + def get_or_create_from_purl(self, purl: Union[PackageURL, str]): + """ + Return a new or existing Package given a ``purl`` PackageURL object or PURL string. + """ + package, is_created = PackageV2.objects.get_or_create(**purl_to_dict(purl=purl)) + + return package, is_created + + def only_vulnerable(self): + return self._vulnerable(True) + + def only_non_vulnerable(self): + return self._vulnerable(False).filter(is_ghost=False) + + def for_purl(self, purl): + """ + Return a queryset matching the ``purl`` Package URL. + """ + return self.filter(package_url=purl) + + def for_purls(self, purls=()): + """ + Return a queryset of Packages matching a list of PURLs. + """ + return self.filter(package_url__in=purls).distinct() + + def _vulnerable(self, vulnerable=True): + """ + Filter to select only vulnerable or non-vulnearble packages. + """ + return self.with_is_vulnerable().filter(is_vulnerable=vulnerable) + + def with_is_vulnerable(self): + """ + Annotate Package with ``is_vulnerable`` boolean attribute. + """ + return self.annotate( + is_vulnerable=Exists(AdvisoryV2.objects.filter(affecting_packages__pk=OuterRef("pk"))) + ) + + +class PackageV2(PackageURLMixin): + """ + A software package with related vulnerabilities. + """ + + package_url = models.CharField( + max_length=1000, + null=False, + help_text="The Package URL for this package.", + db_index=True, + ) + + plain_package_url = models.CharField( + max_length=1000, + null=False, + help_text="The Package URL for this package without qualifiers and subpath.", + db_index=True, + ) + + is_ghost = models.BooleanField( + default=False, + help_text="True if the package does not exist in the upstream package manager or its repository.", + db_index=True, + ) + + risk_score = models.DecimalField( + null=True, + max_digits=3, + decimal_places=1, + help_text="Risk score between 0.00 and 10.00, where higher values " + "indicate greater vulnerability risk for the package.", + ) + + version_rank = models.IntegerField( + help_text="Rank of the version to support ordering by version. Rank " + "zero means the rank has not been defined yet", + default=0, + db_index=True, + ) + + def __str__(self): + return self.package_url + + @property + def purl(self): + return self.package_url + + def save(self, *args, **kwargs): + """ + Save, normalizing PURL fields. + """ + purl = PackageURL( + type=self.type, + namespace=self.namespace, + name=self.name, + version=self.version, + qualifiers=self.qualifiers, + subpath=self.subpath, + ) + + # We re-parse the purl to ensure name and namespace + # are set correctly + normalized = normalize_purl(purl=purl) + + for name, value in purl_to_dict(normalized).items(): + setattr(self, name, value) + + self.package_url = str(normalized) + plain_purl = utils.plain_purl(normalized) + self.plain_package_url = str(plain_purl) + super().save(*args, **kwargs) + + objects = PackageQuerySetV2.as_manager() + + @property + def calculate_version_rank(self): + """ + Calculate and return the `version_rank` for a package that does not have one. + If this package already has a `version_rank`, return it. + + The calculated rank will be interpolated between two packages that have + `version_rank` values and are closest to this package in terms of version order. + """ + + group_packages = PackageV2.objects.filter( + type=self.type, + namespace=self.namespace, + name=self.name, + ) + + if any(p.version_rank == 0 for p in group_packages): + sorted_packages = sorted(group_packages, key=lambda p: self.version_class(p.version)) + for rank, package in enumerate(sorted_packages, start=1): + package.version_rank = rank + PackageV2.objects.bulk_update(sorted_packages, fields=["version_rank"]) + return self.version_rank + + @property + def fixed_package_details(self): + """ + Return a mapping of vulnerabilities that affect this package and the next and + latest non-vulnerable versions. + """ + package_details = {} + package_details["purl"] = PackageURL.from_string(self.purl) + + next_non_vulnerable, latest_non_vulnerable = self.get_non_vulnerable_versions() + package_details["next_non_vulnerable"] = next_non_vulnerable + package_details["latest_non_vulnerable"] = latest_non_vulnerable + + package_details["advisories"] = self.get_affecting_vulnerabilities() + + return package_details + + def get_non_vulnerable_versions(self): + """ + Return a tuple of the next and latest non-vulnerable versions as Package instance. + Return a tuple of (None, None) if there is no non-vulnerable version. + """ + if self.version_rank == 0: + self.calculate_version_rank + non_vulnerable_versions = PackageV2.objects.get_fixed_by_package_versions( + self, fix=False + ).only_non_vulnerable() + + later_non_vulnerable_versions = non_vulnerable_versions.filter( + version_rank__gt=self.version_rank + ) + + later_non_vulnerable_versions = list(later_non_vulnerable_versions) + + if later_non_vulnerable_versions: + sorted_versions = later_non_vulnerable_versions + next_non_vulnerable = sorted_versions[0] + latest_non_vulnerable = sorted_versions[-1] + return next_non_vulnerable, latest_non_vulnerable + + return None, None + + @cached_property + def version_class(self): + range_class = RANGE_CLASS_BY_SCHEMES.get(self.type) + return range_class.version_class if range_class else Version + + def get_absolute_url(self): + """ + Return this Vulnerability details absolute URL. + """ + return reverse("package_details_v2", args=[self.purl]) + + @cached_property + def current_version(self): + return self.version_class(self.version) + + def get_affecting_vulnerabilities(self): + """ + Return a list of vulnerabilities that affect this package together with information regarding + the versions that fix the vulnerabilities. + """ + if self.version_rank == 0: + self.calculate_version_rank + package_details_advs = [] + + fixed_by_packages = PackageV2.objects.get_fixed_by_package_versions(self, fix=True) + + package_advisories = self.affected_by_advisories.prefetch_related( + Prefetch( + "fixed_by_packages", + queryset=fixed_by_packages, + to_attr="fixed_packages", + ) + ) + + for adv in package_advisories: + package_details_advs.append({"advisory": adv}) + later_fixed_packages = [] + + for fixed_pkg in adv.fixed_by_packages.all(): + if fixed_pkg not in fixed_by_packages: + continue + fixed_version = self.version_class(fixed_pkg.version) + if fixed_version > self.current_version: + later_fixed_packages.append(fixed_pkg) + + next_fixed_package_vulns = [] + + sort_fixed_by_packages_by_version = [] + if later_fixed_packages: + sort_fixed_by_packages_by_version = sorted( + later_fixed_packages, key=lambda p: p.version_rank + ) + + fixed_by_pkgs = [] + + for vuln_details in package_details_advs: + if vuln_details["advisory"] != adv: + continue + vuln_details["fixed_by_purl"] = [] + vuln_details["fixed_by_purl_advisories"] = [] + + for fixed_by_pkg in sort_fixed_by_packages_by_version: + fixed_by_package_details = {} + fixed_by_purl = PackageURL.from_string(fixed_by_pkg.purl) + next_fixed_package_vulns = list(fixed_by_pkg.affected_by_advisories.all()) + + fixed_by_package_details["fixed_by_purl"] = fixed_by_purl + fixed_by_package_details["fixed_by_purl_advisories"] = next_fixed_package_vulns + fixed_by_pkgs.append(fixed_by_package_details) + + vuln_details["fixed_by_package_details"] = fixed_by_pkgs + + return package_details_advs + + +class AdvisoryExploit(models.Model): + """ + A vulnerability exploit is code used to + take advantage of a security flaw for unauthorized access or malicious activity. + """ + + advisory = models.ForeignKey( + AdvisoryV2, + related_name="exploits", + on_delete=models.CASCADE, + ) + + date_added = models.DateField( + null=True, + blank=True, + help_text="The date the vulnerability was added to an exploit catalog.", + ) + + description = models.TextField( + null=True, + blank=True, + help_text="Description of the vulnerability in an exploit catalog, often a refinement of the original CVE description", + ) + + required_action = models.TextField( + null=True, + blank=True, + help_text="The required action to address the vulnerability, typically to " + "apply vendor updates or apply vendor mitigations or to discontinue use.", + ) + + due_date = models.DateField( + null=True, + blank=True, + help_text="The date the required action is due, which applies" + " to all USA federal civilian executive branch (FCEB) agencies, " + "but all organizations are strongly encouraged to execute the required action", + ) + + notes = models.TextField( + null=True, + blank=True, + help_text="Additional notes and resources about the vulnerability," + " often a URL to vendor instructions.", + ) + + known_ransomware_campaign_use = models.BooleanField( + default=False, + help_text="""Known' if this vulnerability is known to have been leveraged as part of a ransomware campaign; + or 'Unknown' if there is no confirmation that the vulnerability has been utilized for ransomware.""", + ) + + source_date_published = models.DateField( + null=True, blank=True, help_text="The date that the exploit was published or disclosed." + ) + + exploit_type = models.TextField( + null=True, + blank=True, + help_text="The type of the exploit as provided by the original upstream data source.", + ) + + platform = models.TextField( + null=True, + blank=True, + help_text="The platform associated with the exploit as provided by the original upstream data source.", + ) + + source_date_updated = models.DateField( + null=True, + blank=True, + help_text="The date the exploit was updated in the original upstream data source.", + ) + + data_source = models.TextField( + null=True, + blank=True, + help_text="The source of the exploit information, such as CISA KEV, exploitdb, metaspoit, or others.", + ) + + source_url = models.URLField( + null=True, + blank=True, + help_text="The URL to the exploit as provided in the original upstream data source.", + ) + + @property + def get_known_ransomware_campaign_use_type(self): + return "Known" if self.known_ransomware_campaign_use else "Unknown" diff --git a/vulnerabilities/pipelines/__init__.py b/vulnerabilities/pipelines/__init__.py index d41b05321..3d1316cce 100644 --- a/vulnerabilities/pipelines/__init__.py +++ b/vulnerabilities/pipelines/__init__.py @@ -15,18 +15,29 @@ from traceback import format_exc as traceback_format_exc from typing import Iterable from typing import List +from typing import Optional from aboutcode.pipeline import LoopProgress from aboutcode.pipeline import PipelineDefinition from aboutcode.pipeline import humanize_time +from fetchcode import package_versions +from packageurl import PackageURL from vulnerabilities.importer import AdvisoryData +from vulnerabilities.importer import AffectedPackage +from vulnerabilities.importer import UnMergeablePackageError from vulnerabilities.improver import MAX_CONFIDENCE from vulnerabilities.models import Advisory +from vulnerabilities.models import PackageV2 from vulnerabilities.models import PipelineRun from vulnerabilities.pipes.advisory import import_advisory from vulnerabilities.pipes.advisory import insert_advisory +from vulnerabilities.pipes.advisory import insert_advisory_v2 +from vulnerabilities.utils import AffectedPackage as LegacyAffectedPackage from vulnerabilities.utils import classproperty +from vulnerabilities.utils import get_affected_packages_by_patched_package +from vulnerabilities.utils import nearest_patched_package +from vulnerabilities.utils import resolve_version_range module_logger = logging.getLogger(__name__) @@ -148,14 +159,6 @@ def on_failure(self): """ pass - @classproperty - def pipeline_id(cls): - """Return unique pipeline_id set in cls.pipeline_id""" - - if cls.pipeline_id is None or cls.pipeline_id == "": - raise NotImplementedError("pipeline_id is not defined or is empty") - return cls.pipeline_id - class VulnerableCodeBaseImporterPipeline(VulnerableCodePipeline): """ @@ -207,12 +210,13 @@ def collect_and_store_advisories(self): progress = LoopProgress(total_iterations=estimated_advisory_count, logger=self.log) for advisory in progress.iter(self.collect_advisories()): - if _obj := insert_advisory( - advisory=advisory, - pipeline_id=self.pipeline_id, - logger=self.log, - ): - collected_advisory_count += 1 + if isinstance(advisory, AdvisoryData): + if _obj := insert_advisory( + advisory=advisory, + pipeline_id=self.pipeline_id, + logger=self.log, + ): + collected_advisory_count += 1 self.log(f"Successfully collected {collected_advisory_count:,d} advisories") @@ -248,3 +252,256 @@ def import_advisory(self, advisory: Advisory) -> int: f"Failed to import advisory: {advisory!r} with error {e!r}:\n{traceback_format_exc()}", level=logging.ERROR, ) + + +class VulnerableCodeBaseImporterPipelineV2(VulnerableCodePipeline): + """ + Base importer pipeline for importing advisories. + + Uses: + Subclass this Pipeline and implement ``advisories_count`` and ``collect_advisories`` + method. Also override the ``steps`` and ``advisory_confidence`` as needed. + """ + + pipeline_id = None # Unique Pipeline ID, this should be the name of pipeline module. + license_url = None + spdx_license_expression = None + repo_url = None + advisory_confidence = MAX_CONFIDENCE + ignorable_versions = [] + unfurl_version_ranges = False + + @classmethod + def steps(cls): + return (cls.collect_and_store_advisories,) + + def collect_advisories(self) -> Iterable[AdvisoryData]: + """ + Yield AdvisoryData for importer pipeline. + + Populate the `self.collected_advisories_count` field and yield AdvisoryData + """ + raise NotImplementedError + + def advisories_count(self) -> int: + """ + Return the estimated AdvisoryData to be yielded by ``collect_advisories``. + + Used by ``collect_and_store_advisories`` to log the progress of advisory collection. + """ + raise NotImplementedError + + def collect_and_store_advisories(self): + collected_advisory_count = 0 + estimated_advisory_count = self.advisories_count() + + if estimated_advisory_count > 0: + self.log(f"Collecting {estimated_advisory_count:,d} advisories") + + progress = LoopProgress(total_iterations=estimated_advisory_count, logger=self.log) + for advisory in progress.iter(self.collect_advisories()): + if advisory is None: + self.log("Advisory is None, skipping") + continue + if _obj := insert_advisory_v2( + advisory=advisory, + pipeline_id=self.pipeline_id, + get_advisory_packages=self.get_advisory_packages, + logger=self.log, + ): + collected_advisory_count += 1 + + self.log(f"Successfully collected {collected_advisory_count:,d} advisories") + + def get_advisory_packages(self, advisory_data: AdvisoryData) -> list: + """ + Return the list of packages for the given advisory. + + Used by ``import_advisory`` to get the list of packages for the advisory. + """ + from vulnerabilities.improvers import default + + affected_purls = [] + fixed_purls = [] + for affected_package in advisory_data.affected_packages: + package_affected_purls, package_fixed_purls = default.get_exact_purls( + affected_package=affected_package + ) + affected_purls.extend(package_affected_purls) + fixed_purls.extend(package_fixed_purls) + + if self.unfurl_version_ranges: + vulnerable_pvs, fixed_pvs = self.get_impacted_packages( + affected_packages=advisory_data.affected_packages, + advisory_date_published=advisory_data.date_published, + ) + affected_purls.extend(vulnerable_pvs) + fixed_purls.extend(fixed_pvs) + + vulnerable_packages = [] + fixed_packages = [] + + for affected_purl in affected_purls: + vulnerable_package, _ = PackageV2.objects.get_or_create_from_purl(purl=affected_purl) + vulnerable_packages.append(vulnerable_package) + + for fixed_purl in fixed_purls: + fixed_package, _ = PackageV2.objects.get_or_create_from_purl(purl=fixed_purl) + fixed_packages.append(fixed_package) + + return vulnerable_packages, fixed_packages + + def get_published_package_versions( + self, package_url: PackageURL, until: Optional[datetime] = None + ) -> List[str]: + """ + Return a list of versions published before `until` for the `package_url` + """ + versions_before_until = [] + try: + versions = package_versions.versions(str(package_url)) + for version in versions or []: + if until and version.release_date and version.release_date > until: + continue + versions_before_until.append(version.value) + + return versions_before_until + except Exception as e: + self.log( + f"Failed to fetch versions for package {str(package_url)} {e!r}", + level=logging.ERROR, + ) + return [] + + def get_impacted_packages(self, affected_packages, advisory_date_published): + """ + Return a tuple of lists of affected and fixed PackageURLs + """ + if not affected_packages: + return [], [] + + mergable = True + + # TODO: We should never had the exception in first place + try: + purl, affected_version_ranges, fixed_versions = AffectedPackage.merge(affected_packages) + except UnMergeablePackageError: + self.log(f"Cannot merge with different purls {affected_packages!r}", logging.ERROR) + mergable = False + + if not mergable: + vulnerable_packages = [] + fixed_packages = [] + for affected_package in affected_packages: + purl = affected_package.package + affected_version_range = affected_package.affected_version_range + fixed_version = affected_package.fixed_version + pkg_type = purl.type + pkg_namespace = purl.namespace + pkg_name = purl.name + if not affected_version_range and fixed_version: + fixed_packages.append( + PackageURL( + type=pkg_type, + namespace=pkg_namespace, + name=pkg_name, + version=str(fixed_version), + ) + ) + else: + valid_versions = self.get_published_package_versions( + package_url=purl, until=advisory_date_published + ) + affected_pvs, fixed_pvs = self.resolve_package_versions( + affected_version_range=affected_version_range, + pkg_type=pkg_type, + pkg_namespace=pkg_namespace, + pkg_name=pkg_name, + valid_versions=valid_versions, + ) + vulnerable_packages.extend(affected_pvs) + fixed_packages.extend(fixed_pvs) + return vulnerable_packages, fixed_packages + else: + pkg_type = purl.type + pkg_namespace = purl.namespace + pkg_name = purl.name + pkg_qualifiers = purl.qualifiers + fixed_purls = [ + PackageURL( + type=pkg_type, + namespace=pkg_namespace, + name=pkg_name, + version=str(version), + qualifiers=pkg_qualifiers, + ) + for version in fixed_versions + ] + if not affected_version_ranges: + return [], fixed_purls + else: + valid_versions = self.get_published_package_versions( + package_url=purl, until=advisory_date_published + ) + vulnerable_packages = [] + fixed_packages = [] + for affected_version_range in affected_version_ranges: + vulnerable_pvs, fixed_pvs = self.resolve_package_versions( + affected_version_range=affected_version_range, + pkg_type=pkg_type, + pkg_namespace=pkg_namespace, + pkg_name=pkg_name, + valid_versions=valid_versions, + ) + vulnerable_packages.extend(vulnerable_pvs) + fixed_packages.extend(fixed_pvs) + return vulnerable_packages, fixed_packages + + def resolve_package_versions( + self, + affected_version_range, + pkg_type, + pkg_namespace, + pkg_name, + valid_versions, + ): + """ + Return a tuple of lists of ``affected_packages`` and ``fixed_packages`` PackageURL for the given `affected_version_range` and `valid_versions`. + + ``valid_versions`` are the valid version listed on the package registry for that package + + """ + aff_vers, unaff_vers = resolve_version_range( + affected_version_range=affected_version_range, + ignorable_versions=self.ignorable_versions, + package_versions=valid_versions, + ) + + affected_purls = list( + self.expand_verion_range_to_purls(pkg_type, pkg_namespace, pkg_name, aff_vers) + ) + + unaffected_purls = list( + self.expand_verion_range_to_purls(pkg_type, pkg_namespace, pkg_name, unaff_vers) + ) + + fixed_packages = [] + affected_packages = [] + + patched_packages = nearest_patched_package( + vulnerable_packages=affected_purls, resolved_packages=unaffected_purls + ) + + for ( + fixed_package, + affected_purls, + ) in get_affected_packages_by_patched_package(patched_packages).items(): + if fixed_package: + fixed_packages.append(fixed_package) + affected_packages.extend(affected_purls) + + return affected_packages, fixed_packages + + def expand_verion_range_to_purls(self, pkg_type, pkg_namespace, pkg_name, versions): + for version in versions: + yield PackageURL(type=pkg_type, namespace=pkg_namespace, name=pkg_name, version=version) diff --git a/vulnerabilities/pipelines/v2_importers/apache_httpd_importer.py b/vulnerabilities/pipelines/v2_importers/apache_httpd_importer.py new file mode 100644 index 000000000..90ea32b75 --- /dev/null +++ b/vulnerabilities/pipelines/v2_importers/apache_httpd_importer.py @@ -0,0 +1,341 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import logging +import re +import urllib.parse +from typing import Iterable + +import requests +from bs4 import BeautifulSoup +from packageurl import PackageURL +from univers.version_constraint import VersionConstraint +from univers.version_range import ApacheVersionRange +from univers.versions import SemverVersion + +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.importer import AffectedPackage +from vulnerabilities.importer import Reference +from vulnerabilities.importer import VulnerabilitySeverity +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2 +from vulnerabilities.severity_systems import APACHE_HTTPD +from vulnerabilities.utils import create_weaknesses_list +from vulnerabilities.utils import cwe_regex +from vulnerabilities.utils import get_item + +logger = logging.getLogger(__name__) + + +def fetch_links(url): + links = [] + data = requests.get(url).content + soup = BeautifulSoup(data, features="lxml") + for tag in soup.find_all("a"): + link = tag.get("href") + if not link.endswith("json"): + continue + links.append(urllib.parse.urljoin(url, link)) + return links + + +def get_weaknesses(cve_data): + """ + Extract CWE IDs from CVE data. + + Args: + cve_data (dict): The CVE data in a dictionary format. + + Returns: + List[int]: A list of unique CWE IDs. + + Examples: + >>> mock_cve_data1 = { + ... "containers": { + ... "cna": { + ... "providerMetadata": { + ... "orgId": "f0158376-9dc2-43b6-827c-5f631a4d8d09" + ... }, + ... "title": "mod_macro buffer over-read", + ... "problemTypes": [ + ... { + ... "descriptions": [ + ... { + ... "description": "CWE-125 Out-of-bounds Read", + ... "lang": "en", + ... "cweId": "CWE-125", + ... "type": "CWE" + ... } + ... ] + ... } + ... ] + ... } + ... } + ... } + >>> mock_cve_data2 = { + ... "data_type": "CVE", + ... "data_format": "MITRE", + ... "data_version": "4.0", + ... "generator": { + ... "engine": "Vulnogram 0.0.9" + ... }, + ... "CVE_data_meta": { + ... "ID": "CVE-2022-28614", + ... "ASSIGNER": "security@apache.org", + ... "TITLE": "read beyond bounds via ap_rwrite() ", + ... "STATE": "PUBLIC" + ... }, + ... "problemtype": { + ... "problemtype_data": [ + ... { + ... "description": [ + ... { + ... "lang": "eng", + ... "value": "CWE-190 Integer Overflow or Wraparound" + ... } + ... ] + ... }, + ... { + ... "description": [ + ... { + ... "lang": "eng", + ... "value": "CWE-200 Exposure of Sensitive Information to an Unauthorized Actor" + ... } + ... ] + ... } + ... ] + ... } + ... } + + >>> get_weaknesses(mock_cve_data1) + [125] + + >>> get_weaknesses(mock_cve_data2) + [190, 200] + """ + alias = get_item(cve_data, "CVE_data_meta", "ID") + cwe_strings = [] + if alias: + problemtype_data = get_item(cve_data, "problemtype", "problemtype_data") or [] + for problem in problemtype_data: + for desc in problem.get("description", []): + value = desc.get("value", "") + cwe_id_string_list = re.findall(cwe_regex, value) + cwe_strings.extend(cwe_id_string_list) + else: + problemTypes = cve_data.get("containers", {}).get("cna", {}).get("problemTypes", []) + descriptions = problemTypes[0].get("descriptions", []) if len(problemTypes) > 0 else [] + for description in descriptions: + cwe_id_string = description.get("cweId", "") + cwe_strings.append(cwe_id_string) + + weaknesses = create_weaknesses_list(cwe_strings) + return weaknesses + + +class ApacheHTTPDImporterPipeline(VulnerableCodeBaseImporterPipelineV2): + """ + Apache HTTPD Importer Pipeline + + This pipeline imports security advisories from the Apache HTTPD project. + """ + + pipeline_id = "apache_httpd_importer_v2" + spdx_license_expression = "Apache-2.0" + license_url = "https://www.apache.org/licenses/LICENSE-2.0" + base_url = "https://httpd.apache.org/security/json/" + unfurl_version_ranges = True + + links = [] + + ignorable_versions = frozenset( + [ + "AGB_BEFORE_AAA_CHANGES", + "APACHE_1_2b1", + "APACHE_1_2b10", + "APACHE_1_2b11", + "APACHE_1_2b2", + "APACHE_1_2b3", + "APACHE_1_2b4", + "APACHE_1_2b5", + "APACHE_1_2b6", + "APACHE_1_2b7", + "APACHE_1_2b8", + "APACHE_1_2b9", + "APACHE_1_3_PRE_NT", + "APACHE_1_3a1", + "APACHE_1_3b1", + "APACHE_1_3b2", + "APACHE_1_3b3", + "APACHE_1_3b5", + "APACHE_1_3b6", + "APACHE_1_3b7", + "APACHE_2_0_2001_02_09", + "APACHE_2_0_52_WROWE_RC1", + "APACHE_2_0_ALPHA", + "APACHE_2_0_ALPHA_2", + "APACHE_2_0_ALPHA_3", + "APACHE_2_0_ALPHA_4", + "APACHE_2_0_ALPHA_5", + "APACHE_2_0_ALPHA_6", + "APACHE_2_0_ALPHA_7", + "APACHE_2_0_ALPHA_8", + "APACHE_2_0_ALPHA_9", + "APACHE_2_0_BETA_CANDIDATE_1", + "APACHE_BIG_SYMBOL_RENAME_POST", + "APACHE_BIG_SYMBOL_RENAME_PRE", + "CHANGES", + "HTTPD_LDAP_1_0_0", + "INITIAL", + "MOD_SSL_2_8_3", + "PCRE_3_9", + "POST_APR_SPLIT", + "PRE_APR_CHANGES", + "STRIKER_2_0_51_RC1", + "STRIKER_2_0_51_RC2", + "STRIKER_2_1_0_RC1", + "WROWE_2_0_43_PRE1", + "apache-1_3-merge-1-post", + "apache-1_3-merge-1-pre", + "apache-1_3-merge-2-post", + "apache-1_3-merge-2-pre", + "apache-apr-merge-3", + "apache-doc-split-01", + "dg_last_1_2_doc_merge", + "djg-apache-nspr-07", + "djg_nspr_split", + "moving_to_httpd_module", + "mpm-3", + "mpm-merge-1", + "mpm-merge-2", + "post_ajp_proxy", + "pre_ajp_proxy", + ] + ) + unfurl_version_ranges = True + + @classmethod + def steps(cls): + return (cls.collect_and_store_advisories,) + + def collect_advisories(self) -> Iterable[AdvisoryData]: + if not self.links: + self.links = fetch_links(self.base_url) + for link in self.links: + data = requests.get(link).json() + yield self.to_advisory(data) + + def advisories_count(self) -> int: + """Count the number of advisories available in the JSON files.""" + if not self.links: + self.links = fetch_links(self.base_url) + return len(self.links) + + def to_advisory(self, data): + alias = get_item(data, "CVE_data_meta", "ID") + if not alias: + alias = get_item(data, "cveMetadata", "cveId") + descriptions = get_item(data, "description", "description_data") or [] + description = None + for desc in descriptions: + if desc.get("lang") == "eng": + description = desc.get("value") + break + + severities = [] + impacts = data.get("impact") or [] + for impact in impacts: + value = impact.get("other") + if value: + severities.append( + VulnerabilitySeverity( + system=APACHE_HTTPD, + value=value, + scoring_elements="", + ) + ) + break + reference = Reference( + reference_id=alias, + url=urllib.parse.urljoin(self.base_url, f"{alias}.json"), + ) + + versions_data = [] + for vendor in get_item(data, "affects", "vendor", "vendor_data") or []: + for products in get_item(vendor, "product", "product_data") or []: + for version_data in get_item(products, "version", "version_data") or []: + versions_data.append(version_data) + + fixed_versions = [] + for timeline_object in data.get("timeline") or []: + timeline_value = timeline_object.get("value") + if "release" in timeline_value: + split_timeline_value = timeline_value.split(" ") + if "never" in timeline_value: + continue + if "release" in split_timeline_value[-1]: + fixed_versions.append(split_timeline_value[0]) + if "release" in split_timeline_value[0]: + fixed_versions.append(split_timeline_value[-1]) + + affected_packages = [] + affected_version_range = self.to_version_ranges(versions_data, fixed_versions) + if affected_version_range: + affected_packages.append( + AffectedPackage( + package=PackageURL( + type="apache", + name="httpd", + ), + affected_version_range=affected_version_range, + ) + ) + + weaknesses = get_weaknesses(data) + + return AdvisoryData( + advisory_id=alias, + aliases=[], + summary=description or "", + affected_packages=affected_packages, + references_v2=[reference], + weaknesses=weaknesses, + url=reference.url, + severities=severities, + ) + + def to_version_ranges(self, versions_data, fixed_versions): + constraints = [] + for version_data in versions_data: + version_value = version_data["version_value"] + range_expression = version_data["version_affected"] + if range_expression not in {"<=", ">=", "?=", "!<", "="}: + raise ValueError(f"unknown comparator found! {range_expression}") + comparator_by_range_expression = { + ">=": ">=", + "!<": ">=", + "<=": "<=", + "=": "=", + } + comparator = comparator_by_range_expression.get(range_expression) + if comparator: + constraints.append( + VersionConstraint(comparator=comparator, version=SemverVersion(version_value)) + ) + + for fixed_version in fixed_versions: + # The VersionConstraint method `invert()` inverts the fixed_version's comparator, + # enabling inclusion of multiple fixed versions with the `affected_version_range` values. + constraints.append( + VersionConstraint( + comparator="=", + version=SemverVersion(fixed_version), + ).invert() + ) + + return ApacheVersionRange(constraints=constraints) diff --git a/vulnerabilities/pipelines/v2_importers/elixir_security_importer.py b/vulnerabilities/pipelines/v2_importers/elixir_security_importer.py new file mode 100644 index 000000000..902dd5248 --- /dev/null +++ b/vulnerabilities/pipelines/v2_importers/elixir_security_importer.py @@ -0,0 +1,124 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +from pathlib import Path +from typing import Iterable + +from dateutil import parser as dateparser +from fetchcode.vcs import fetch_via_vcs +from packageurl import PackageURL +from univers.version_constraint import VersionConstraint +from univers.version_range import HexVersionRange + +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.importer import AffectedPackage +from vulnerabilities.importer import Reference +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2 +from vulnerabilities.utils import is_cve +from vulnerabilities.utils import load_yaml + + +class ElixirSecurityImporterPipeline(VulnerableCodeBaseImporterPipelineV2): + """ + Elixir Security Advisiories Importer Pipeline + + This pipeline imports security advisories for elixir. + """ + + pipeline_id = "elixir_security_importer_v2" + spdx_license_expression = "CC0-1.0" + license_url = "https://github.com/dependabot/elixir-security-advisories/blob/master/LICENSE.txt" + repo_url = "git+https://github.com/dependabot/elixir-security-advisories" + unfurl_version_ranges = True + + @classmethod + def steps(cls): + return (cls.collect_and_store_advisories,) + + def clone(self): + self.log(f"Cloning `{self.repo_url}`") + self.vcs_response = fetch_via_vcs(self.repo_url) + + def advisories_count(self) -> int: + base_path = Path(self.vcs_response.dest_dir) + count = len(list((base_path / "packages").glob("**/*.yml"))) + return count + + def collect_advisories(self) -> Iterable[AdvisoryData]: + try: + base_path = Path(self.vcs_response.dest_dir) + vuln = base_path / "packages" + for file in vuln.glob("**/*.yml"): + yield from self.process_file(file, base_path) + finally: + if self.vcs_response: + self.vcs_response.delete() + + def process_file(self, file, base_path) -> Iterable[AdvisoryData]: + relative_path = str(file.relative_to(base_path)).strip("/") + advisory_url = ( + f"https://github.com/dependabot/elixir-security-advisories/blob/master/{relative_path}" + ) + yaml_file = load_yaml(str(file)) + + summary = yaml_file.get("description") or "" + pkg_name = yaml_file.get("package") or "" + + cve_id = "" + cve = yaml_file.get("cve") or "" + if cve and not cve.startswith("CVE-"): + cve_id = f"CVE-{cve}" + elif cve: + cve_id = cve + + if not cve_id or not is_cve(cve_id): + return + + references = [] + link = yaml_file.get("link") or "" + if link: + references.append(Reference(url=link)) + + constraints = [] + vrc = HexVersionRange.version_class + unaffected_versions = yaml_file.get("unaffected_versions") or [] + patched_versions = yaml_file.get("patched_versions") or [] + + for version in unaffected_versions: + constraints.append(VersionConstraint.from_string(version_class=vrc, string=version)) + + for version in patched_versions: + if version.startswith("~>"): + version = version[2:] + constraints.append( + VersionConstraint.from_string(version_class=vrc, string=version).invert() + ) + + affected_packages = [] + if pkg_name: + affected_packages.append( + AffectedPackage( + package=PackageURL(type="hex", name=pkg_name), + affected_version_range=HexVersionRange(constraints=constraints), + ) + ) + + date_published = None + if yaml_file.get("disclosure_date"): + date_published = dateparser.parse(yaml_file.get("disclosure_date")) + + yield AdvisoryData( + advisory_id=cve_id, + aliases=[], + summary=summary, + references_v2=references, + affected_packages=affected_packages, + url=advisory_url, + date_published=date_published, + ) diff --git a/vulnerabilities/pipelines/v2_importers/github_importer.py b/vulnerabilities/pipelines/v2_importers/github_importer.py new file mode 100644 index 000000000..9ac360016 --- /dev/null +++ b/vulnerabilities/pipelines/v2_importers/github_importer.py @@ -0,0 +1,393 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import logging +from traceback import format_exc as traceback_format_exc +from typing import Callable +from typing import Iterable +from typing import List +from typing import Optional + +from cwe2.database import Database +from dateutil import parser as dateparser +from packageurl import PackageURL +from univers.version_range import RANGE_CLASS_BY_SCHEMES +from univers.version_range import build_range_from_github_advisory_constraint + +from vulnerabilities import severity_systems +from vulnerabilities import utils +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.importer import AffectedPackage +from vulnerabilities.importer import Reference +from vulnerabilities.importer import VulnerabilitySeverity +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2 +from vulnerabilities.utils import dedupe +from vulnerabilities.utils import get_cwe_id +from vulnerabilities.utils import get_item + + +class GitHubAPIImporterPipeline(VulnerableCodeBaseImporterPipelineV2): + """ + GitHub Importer Pipeline + + This pipeline imports security advisories from GitHub Security Advisories. + """ + + pipeline_id = "github_importer_v2" + spdx_license_expression = "CC-BY-4.0" + license_url = "https://github.com/github/advisory-database/blob/main/LICENSE.md" + unfurl_version_ranges = True + + ignorable_versions = frozenset( + [ + "0.1-bulbasaur", + "0.1-charmander", + "0.3m1", + "0.3m2", + "0.3m3", + "0.3m4", + "0.3m5", + "0.4m1", + "0.4m2", + "0.4m3", + "0.4m4", + "0.4m5", + "0.5m1", + "0.5m2", + "0.5m3", + "0.5m4", + "0.5m5", + "0.6m1", + "0.6m2", + "0.6m3", + "0.6m4", + "0.6m5", + "0.6m6", + "0.7.10p1", + "0.7.11p1", + "0.7.11p2", + "0.7.11p3", + "0.8.1p1", + "0.8.3p1", + "0.8.4p1", + "0.8.4p2", + "0.8.6p1", + "0.8.7p1", + "0.9-doduo", + "0.9-eevee", + "0.9-fearow", + "0.9-gyarados", + "0.9-horsea", + "0.9-ivysaur", + "2013-01-21T20:33:09+0100", + "2013-01-23T17:11:52+0100", + "2013-02-01T20:50:46+0100", + "2013-02-02T19:59:03+0100", + "2013-02-02T20:23:17+0100", + "2013-02-08T17:40:57+0000", + "2013-03-27T16:32:26+0100", + "2013-05-09T12:47:53+0200", + "2013-05-10T17:55:56+0200", + "2013-05-14T20:16:05+0200", + "2013-06-01T10:32:51+0200", + "2013-07-19T09:11:08+0000", + "2013-08-12T21:48:56+0200", + "2013-09-11T19-27-10", + "2013-12-23T17-51-15", + "2014-01-12T15-52-10", + "2.0.1rc2-git", + "3.0.0b3-", + "3.0b6dev-r41684", + "-class.-jw.util.version.Version-", + "vulnerabilities", + ] + ) + + @classmethod + def steps(cls): + return (cls.collect_and_store_advisories,) + + package_type_by_github_ecosystem = { + # "MAVEN": "maven", + # "NUGET": "nuget", + # "COMPOSER": "composer", + # "PIP": "pypi", + # "RUBYGEMS": "gem", + "NPM": "npm", + # "RUST": "cargo", + # "GO": "golang", + } + + def advisories_count(self): + advisory_query = """ + query{ + securityVulnerabilities(first: 0, ecosystem: %s) { + totalCount + } + } + """ + advisory_counts = 0 + for ecosystem in self.package_type_by_github_ecosystem.keys(): + graphql_query = {"query": advisory_query % (ecosystem)} + response = utils.fetch_github_graphql_query(graphql_query) + advisory_counts += get_item(response, "data", "securityVulnerabilities", "totalCount") + return advisory_counts + + def collect_advisories(self) -> Iterable[AdvisoryData]: + + # TODO: We will try to gather more info from GH API + # Check https://github.com/nexB/vulnerablecode/issues/1039#issuecomment-1366458885 + # Check https://github.com/nexB/vulnerablecode/issues/645 + # set of all possible values of first '%s' = {'MAVEN','COMPOSER', 'NUGET', 'RUBYGEMS', 'PYPI', 'NPM', 'RUST'} + # second '%s' is interesting, it will have the value '' for the first request, + advisory_query = """ + query{ + securityVulnerabilities(first: 100, ecosystem: %s, %s) { + edges { + node { + advisory { + identifiers { + type + value + } + summary + references { + url + } + severity + cwes(first: 10){ + nodes { + cweId + } + } + publishedAt + } + firstPatchedVersion{ + identifier + } + package { + name + } + vulnerableVersionRange + } + } + pageInfo { + hasNextPage + endCursor + } + } + } + """ + for ecosystem, package_type in self.package_type_by_github_ecosystem.items(): + end_cursor_exp = "" + while True: + graphql_query = {"query": advisory_query % (ecosystem, end_cursor_exp)} + response = utils.fetch_github_graphql_query(graphql_query) + + page_info = get_item(response, "data", "securityVulnerabilities", "pageInfo") + end_cursor = get_item(page_info, "endCursor") + if end_cursor: + end_cursor = f'"{end_cursor}"' + end_cursor_exp = f"after: {end_cursor}" + + yield from process_response(response, package_type=package_type) + + if not get_item(page_info, "hasNextPage"): + break + + +def get_purl(pkg_type: str, github_name: str, logger: Callable = None) -> Optional[PackageURL]: + """ + Return a PackageURL by splitting the `github_name` using the `pkg_type` + convention. Return None and log an error if we can not split or it is an + unknown package type. + + For example:: + >>> expected = PackageURL(type='maven', namespace='org.apache.commons', name='commons-lang3') + >>> assert get_purl("maven", "org.apache.commons:commons-lang3") == expected + + >>> expected = PackageURL(type="composer", namespace="foo", name="bar") + >>> assert get_purl("composer", "foo/bar") == expected + """ + if pkg_type == "maven": + if ":" not in github_name: + if logger: + logger(f"get_purl: Invalid maven package name {github_name}", level=logging.ERROR) + return + ns, _, name = github_name.partition(":") + return PackageURL(type=pkg_type, namespace=ns, name=name) + + if pkg_type in ("composer", "npm"): + if "/" not in github_name: + return PackageURL(type=pkg_type, name=github_name) + vendor, _, name = github_name.partition("/") + return PackageURL(type=pkg_type, namespace=vendor, name=name) + + if pkg_type in ("nuget", "pypi", "gem", "golang", "npm", "cargo"): + return PackageURL(type=pkg_type, name=github_name) + + if logger: + logger(f"get_purl: Unknown package type {pkg_type}", level=logging.ERROR) + + +def process_response( + resp: dict, package_type: str, logger: Callable = None +) -> Iterable[AdvisoryData]: + """ + Yield `AdvisoryData` by taking `resp` and `ecosystem` as input + """ + vulnerabilities = get_item(resp, "data", "securityVulnerabilities", "edges") or [] + if not vulnerabilities: + if logger: + logger( + f"No vulnerabilities found for package_type: {package_type!r} in response: {resp!r}", + level=logging.ERROR, + ) + return + + for vulnerability in vulnerabilities: + aliases = [] + affected_packages = [] + github_advisory = get_item(vulnerability, "node") + if not github_advisory: + if logger: + logger(f"No node found in {vulnerability!r}", level=logging.ERROR) + continue + + advisory = get_item(github_advisory, "advisory") + if not advisory: + if logger: + logger(f"No advisory found in {github_advisory!r}", level=logging.ERROR) + continue + + summary = get_item(advisory, "summary") or "" + + references = get_item(advisory, "references") or [] + if references: + urls = (ref["url"] for ref in references) + references = [Reference.from_url(u) for u in urls] + + date_published = get_item(advisory, "publishedAt") + if date_published: + date_published = dateparser.parse(date_published) + + name = get_item(github_advisory, "package", "name") + if name: + purl = get_purl(pkg_type=package_type, github_name=name, logger=logger) + if purl: + affected_range = get_item(github_advisory, "vulnerableVersionRange") + fixed_version = get_item(github_advisory, "firstPatchedVersion", "identifier") + if affected_range: + try: + affected_range = build_range_from_github_advisory_constraint( + package_type, affected_range + ) + except Exception as e: + if logger: + logger( + f"Could not parse affected range {affected_range!r} {e!r} \n {traceback_format_exc()}", + level=logging.ERROR, + ) + affected_range = None + if fixed_version: + try: + fixed_version = RANGE_CLASS_BY_SCHEMES[package_type].version_class( + fixed_version + ) + except Exception as e: + if logger: + logger( + f"Invalid fixed version {fixed_version!r} {e!r} \n {traceback_format_exc()}", + level=logging.ERROR, + ) + fixed_version = None + if affected_range or fixed_version: + affected_packages.append( + AffectedPackage( + package=purl, + affected_version_range=affected_range, + fixed_version=fixed_version, + ) + ) + identifiers = get_item(advisory, "identifiers") or [] + ghsa_id = "" + severities = [] + for identifier in identifiers: + value = identifier["value"] + identifier_type = identifier["type"] + aliases.append(value) + # attach the GHSA with severity score + if identifier_type == "GHSA": + # Each Node has only one GHSA, hence exit after attaching + # score to this GHSA + ghsa_id = value + for ref in references: + if ref.reference_id == value: + severity = get_item(advisory, "severity") + if severity: + severities = [ + VulnerabilitySeverity( + system=severity_systems.CVSS31_QUALITY, + value=severity, + url=ref.url, + ) + ] + + elif identifier_type == "CVE": + pass + else: + if logger: + logger( + f"Unknown identifier type {identifier_type!r} and value {value!r}", + level=logging.ERROR, + ) + + weaknesses = get_cwes_from_github_advisory(advisory, logger) + + advisory_id = None + + aliases = sorted(dedupe(aliases)) + + advisory_id = ghsa_id or aliases[0] + + aliases.remove(advisory_id) + + yield AdvisoryData( + advisory_id=ghsa_id, + aliases=aliases, + summary=summary, + references_v2=references, + severities=severities, + affected_packages=affected_packages, + date_published=date_published, + weaknesses=weaknesses, + url=f"https://github.com/advisories/{ghsa_id}", + ) + + +def get_cwes_from_github_advisory(advisory, logger=None) -> List[int]: + """ + Return the cwe-id list from advisory ex: [ 522 ] + by extracting the cwe_list from advisory ex: [{'cweId': 'CWE-522'}] + then remove the CWE- from string and convert it to integer 522 and Check if the CWE in CWE-Database + """ + weaknesses = [] + db = Database() + cwe_list = get_item(advisory, "cwes", "nodes") or [] + for cwe_item in cwe_list: + cwe_string = get_item(cwe_item, "cweId") + if cwe_string: + cwe_id = get_cwe_id(cwe_string) + try: + db.get(cwe_id) + weaknesses.append(cwe_id) + except Exception as e: + if logger: + logger(f"Invalid CWE id {e!r} \n {traceback_format_exc()}", level=logging.ERROR) + return weaknesses diff --git a/vulnerabilities/pipelines/v2_importers/gitlab_importer.py b/vulnerabilities/pipelines/v2_importers/gitlab_importer.py new file mode 100644 index 000000000..1f175f07f --- /dev/null +++ b/vulnerabilities/pipelines/v2_importers/gitlab_importer.py @@ -0,0 +1,329 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import logging +import traceback +from pathlib import Path +from typing import Iterable +from typing import List +from typing import Tuple + +import pytz +import saneyaml +from dateutil import parser as dateparser +from fetchcode.vcs import fetch_via_vcs +from packageurl import PackageURL +from univers.version_range import RANGE_CLASS_BY_SCHEMES +from univers.version_range import VersionRange +from univers.version_range import from_gitlab_native +from univers.versions import Version + +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.importer import AffectedPackage +from vulnerabilities.importer import Reference +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2 +from vulnerabilities.utils import build_description +from vulnerabilities.utils import get_advisory_url +from vulnerabilities.utils import get_cwe_id + + +class GitLabImporterPipeline(VulnerableCodeBaseImporterPipelineV2): + """ + GitLab Importer Pipeline + + Collect advisory from GitLab Advisory Database (Open Source Edition). + """ + + pipeline_id = "gitlab_importer_v2" + spdx_license_expression = "MIT" + license_url = "https://gitlab.com/gitlab-org/advisories-community/-/blob/main/LICENSE" + repo_url = "git+https://gitlab.com/gitlab-org/advisories-community/" + unfurl_version_ranges = True + + @classmethod + def steps(cls): + return ( + cls.clone, + cls.collect_and_store_advisories, + cls.clean_downloads, + ) + + purl_type_by_gitlab_scheme = { + "conan": "conan", + "gem": "gem", + # Entering issue to parse go package names https://github.com/nexB/vulnerablecode/issues/742 + # "go": "golang", + "maven": "maven", + "npm": "npm", + "nuget": "nuget", + "packagist": "composer", + "pypi": "pypi", + } + + gitlab_scheme_by_purl_type = {v: k for k, v in purl_type_by_gitlab_scheme.items()} + + def clone(self): + self.log(f"Cloning `{self.repo_url}`") + self.vcs_response = fetch_via_vcs(self.repo_url) + + def advisories_count(self): + root = Path(self.vcs_response.dest_dir) + return sum(1 for _ in root.rglob("*.yml")) + + def collect_advisories(self) -> Iterable[AdvisoryData]: + base_path = Path(self.vcs_response.dest_dir) + + for file_path in base_path.rglob("*.yml"): + if file_path.parent == base_path: + continue + + gitlab_type, _, _ = parse_advisory_path( + base_path=base_path, + file_path=file_path, + ) + + if gitlab_type not in self.purl_type_by_gitlab_scheme: + self.log( + f"Unknown package type {gitlab_type!r} in {file_path!r}", + level=logging.ERROR, + ) + continue + + advisory = parse_gitlab_advisory( + file=file_path, + base_path=base_path, + gitlab_scheme_by_purl_type=self.gitlab_scheme_by_purl_type, + purl_type_by_gitlab_scheme=self.purl_type_by_gitlab_scheme, + logger=self.log, + ) + + if not advisory: + self.log( + f"Failed to parse advisory from {file_path!r}", + level=logging.ERROR, + ) + continue + + yield advisory + + def clean_downloads(self): + if self.vcs_response: + self.log(f"Removing cloned repository") + self.vcs_response.delete() + + def on_failure(self): + self.clean_downloads() + + +def parse_advisory_path(base_path: Path, file_path: Path) -> Tuple[str, str, str]: + """ + Parse a gitlab advisory file and return a 3-tuple of: + (gitlab_type, package_slug, vulnerability_id) + + For example:: + + >>> base_path = Path("/tmp/tmpi1klhpmd/checkout") + >>> file_path=Path("/tmp/tmpi1klhpmd/checkout/pypi/gradio/CVE-2021-43831.yml") + >>> parse_advisory_path(base_path=base_path, file_path=file_path) + ('pypi', 'gradio', 'CVE-2021-43831') + + >>> file_path=Path("/tmp/tmpi1klhpmd/checkout/nuget/github.com/beego/beego/v2/nuget/CVE-2021-43831.yml") + >>> parse_advisory_path(base_path=base_path, file_path=file_path) + ('nuget', 'github.com/beego/beego/v2/nuget', 'CVE-2021-43831') + + >>> file_path = Path("/tmp/tmpi1klhpmd/checkout/npm/@express/beego/beego/v2/CVE-2021-43831.yml") + >>> parse_advisory_path(base_path=base_path, file_path=file_path) + ('npm', '@express/beego/beego/v2', 'CVE-2021-43831') + """ + relative_path_segments = file_path.relative_to(base_path).parts + gitlab_type = relative_path_segments[0] + vuln_id = file_path.stem + package_slug = "/".join(relative_path_segments[1:-1]) + + return gitlab_type, package_slug, vuln_id + + +def get_purl(package_slug, purl_type_by_gitlab_scheme, logger): + """ + Return a PackageURL object from a package slug + """ + parts = [p for p in package_slug.strip("/").split("/") if p] + gitlab_scheme = parts[0] + purl_type = purl_type_by_gitlab_scheme[gitlab_scheme] + if gitlab_scheme == "go": + name = "/".join(parts[1:]) + return PackageURL(type=purl_type, namespace=None, name=name) + # if package slug is of the form: + # "nuget/NuGet.Core" + if len(parts) == 2: + name = parts[1] + return PackageURL(type=purl_type, name=name) + # if package slug is of the form: + # "nuget/github.com/beego/beego/v2/nuget" + if len(parts) >= 3: + name = parts[-1] + namespace = "/".join(parts[1:-1]) + return PackageURL(type=purl_type, namespace=namespace, name=name) + logger(f"get_purl: package_slug can not be parsed: {package_slug!r}", level=logging.ERROR) + return + + +def extract_affected_packages( + affected_version_range: VersionRange, + fixed_versions: List[Version], + purl: PackageURL, +) -> Iterable[AffectedPackage]: + """ + Yield AffectedPackage objects, one for each fixed_version + + In case of gitlab advisory data we get a list of fixed_versions and a affected_version_range. + Since we can not determine which package fixes which range. + We store the all the fixed_versions with the same affected_version_range in the advisory. + Later the advisory data is used to be inferred in the GitLabBasicImprover. + """ + for fixed_version in fixed_versions: + yield AffectedPackage( + package=purl, + fixed_version=fixed_version, + affected_version_range=affected_version_range, + ) + + +def parse_gitlab_advisory( + file, base_path, gitlab_scheme_by_purl_type, purl_type_by_gitlab_scheme, logger +): + """ + Parse a Gitlab advisory file and return an AdvisoryData or None. + These files are YAML. There is a JSON schema documented at + https://gitlab.com/gitlab-org/advisories-community/-/blob/main/ci/schema/schema.json + + Sample YAML file: + --- + identifier: "GMS-2018-26" + package_slug: "packagist/amphp/http" + title: "Incorrect header injection check" + description: "amphp/http isn't properly protected against HTTP header injection." + pubdate: "2018-03-15" + affected_range: "<1.0.1" + fixed_versions: + - "v1.0.1" + urls: + - "https://github.com/amphp/http/pull/4" + cwe_ids: + - "CWE-1035" + - "CWE-937" + identifiers: + - "GMS-2018-26" + """ + with open(file) as f: + gitlab_advisory = saneyaml.load(f) + if not isinstance(gitlab_advisory, dict): + logger( + f"parse_gitlab_advisory: unknown gitlab advisory format in {file!r} with data: {gitlab_advisory!r}", + level=logging.ERROR, + ) + return + + # refer to schema here https://gitlab.com/gitlab-org/advisories-community/-/blob/main/ci/schema/schema.json + aliases = gitlab_advisory.get("identifiers") + advisory_id = gitlab_advisory.get("identifier") + if advisory_id in aliases: + aliases.remove(advisory_id) + summary = build_description(gitlab_advisory.get("title"), gitlab_advisory.get("description")) + urls = gitlab_advisory.get("urls") + references = [Reference.from_url(u) for u in urls] + + print(references) + + cwe_ids = gitlab_advisory.get("cwe_ids") or [] + cwe_list = list(map(get_cwe_id, cwe_ids)) + + date_published = dateparser.parse(gitlab_advisory.get("pubdate")) + date_published = date_published.replace(tzinfo=pytz.UTC) + package_slug = gitlab_advisory.get("package_slug") + advisory_url = get_advisory_url( + file=file, + base_path=base_path, + url="https://gitlab.com/gitlab-org/advisories-community/-/blob/main/", + ) + purl: PackageURL = get_purl( + package_slug=package_slug, + purl_type_by_gitlab_scheme=purl_type_by_gitlab_scheme, + logger=logger, + ) + if not purl: + logger( + f"parse_yaml_file: purl is not valid: {file!r} {package_slug!r}", level=logging.ERROR + ) + return AdvisoryData( + aliases=aliases, + summary=summary, + references=references, + date_published=date_published, + url=advisory_url, + ) + affected_version_range = None + fixed_versions = gitlab_advisory.get("fixed_versions") or [] + affected_range = gitlab_advisory.get("affected_range") + gitlab_native_schemes = set(["pypi", "gem", "npm", "go", "packagist", "conan"]) + vrc: VersionRange = RANGE_CLASS_BY_SCHEMES[purl.type] + gitlab_scheme = gitlab_scheme_by_purl_type[purl.type] + try: + if affected_range: + if gitlab_scheme in gitlab_native_schemes: + affected_version_range = from_gitlab_native( + gitlab_scheme=gitlab_scheme, string=affected_range + ) + else: + affected_version_range = vrc.from_native(affected_range) + except Exception as e: + logger( + f"parse_yaml_file: affected_range is not parsable: {affected_range!r} for: {purl!s} error: {e!r}\n {traceback.format_exc()}", + level=logging.ERROR, + ) + + parsed_fixed_versions = [] + for fixed_version in fixed_versions: + try: + fixed_version = vrc.version_class(fixed_version) + parsed_fixed_versions.append(fixed_version) + except Exception as e: + logger( + f"parse_yaml_file: fixed_version is not parsable`: {fixed_version!r} error: {e!r}\n {traceback.format_exc()}", + level=logging.ERROR, + ) + + if parsed_fixed_versions: + affected_packages = list( + extract_affected_packages( + affected_version_range=affected_version_range, + fixed_versions=parsed_fixed_versions, + purl=purl, + ) + ) + else: + if not affected_version_range: + affected_packages = [] + else: + affected_packages = [ + AffectedPackage( + package=purl, + affected_version_range=affected_version_range, + ) + ] + return AdvisoryData( + advisory_id=advisory_id, + aliases=aliases, + summary=summary, + references_v2=references, + date_published=date_published, + affected_packages=affected_packages, + weaknesses=cwe_list, + url=advisory_url, + ) diff --git a/vulnerabilities/pipelines/v2_importers/npm_importer.py b/vulnerabilities/pipelines/v2_importers/npm_importer.py new file mode 100644 index 000000000..19d21c987 --- /dev/null +++ b/vulnerabilities/pipelines/v2_importers/npm_importer.py @@ -0,0 +1,178 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +# Author: Navonil Das (@NavonilDas) + +from pathlib import Path +from typing import Iterable + +import pytz +from dateutil.parser import parse +from fetchcode.vcs import fetch_via_vcs +from packageurl import PackageURL +from univers.version_range import NpmVersionRange + +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.importer import AffectedPackage +from vulnerabilities.importer import Reference +from vulnerabilities.importer import VulnerabilitySeverity +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2 +from vulnerabilities.severity_systems import CVSSV2 +from vulnerabilities.severity_systems import CVSSV3 +from vulnerabilities.utils import build_description +from vulnerabilities.utils import load_json + + +class NpmImporterPipeline(VulnerableCodeBaseImporterPipelineV2): + """ + Node.js Security Working Group importer pipeline + + Import advisories from nodejs security working group including node proper advisories and npm advisories. + """ + + pipeline_id = "nodejs_security_wg" + spdx_license_expression = "MIT" + license_url = "https://github.com/nodejs/security-wg/blob/main/LICENSE.md" + repo_url = "git+https://github.com/nodejs/security-wg" + unfurl_version_ranges = True + + @classmethod + def steps(cls): + return ( + cls.clone, + cls.collect_and_store_advisories, + cls.clean_downloads, + ) + + def clone(self): + self.log(f"Cloning `{self.repo_url}`") + self.vcs_response = fetch_via_vcs(self.repo_url) + + def advisories_count(self): + vuln_directory = Path(self.vcs_response.dest_dir) / "vuln" / "npm" + return sum(1 for _ in vuln_directory.glob("*.json")) + + def collect_advisories(self) -> Iterable[AdvisoryData]: + vuln_directory = Path(self.vcs_response.dest_dir) / "vuln" / "npm" + + for advisory in vuln_directory.glob("*.json"): + yield self.to_advisory_data(advisory) + + def to_advisory_data(self, file: Path) -> Iterable[AdvisoryData]: + if file.name == "index.json": + self.log(f"Skipping {file.name} file") + return + data = load_json(file) + id = data.get("id") + description = data.get("overview") or "" + summary = data.get("title") or "" + # TODO: Take care of description + date_published = None + if isinstance(data.get("created_at"), str): + date_published = parse(data.get("created_at")).replace(tzinfo=pytz.UTC) + references = [] + cvss_vector = data.get("cvss_vector") + cvss_score = data.get("cvss_score") + severities = [] + if cvss_vector and cvss_vector.startswith("CVSS:3.0/"): + severities.append( + VulnerabilitySeverity( + system=CVSSV3, + value=cvss_score, + url=f"https://github.com/nodejs/security-wg/blob/main/vuln/npm/{id}.json", + ) + ) + if cvss_vector and cvss_vector.startswith("CVSS:2.0/"): + severities.append( + VulnerabilitySeverity( + system=CVSSV2, + value=cvss_score, + url=f"https://github.com/nodejs/security-wg/blob/main/vuln/npm/{id}.json", + ) + ) + if not id: + self.log(f"Advisory ID not found in {file}") + return + + advisory_reference = Reference( + url=f"https://github.com/nodejs/security-wg/blob/main/vuln/npm/{id}.json", + reference_id=id, + ) + + for ref in data.get("references") or []: + references.append( + Reference( + url=ref, + ) + ) + + if advisory_reference not in references: + references.append(advisory_reference) + + package_name = data.get("module_name") + affected_packages = [] + if package_name: + affected_packages.append(self.get_affected_package(data, package_name)) + advsisory_aliases = data.get("cves") or [] + + return AdvisoryData( + advisory_id=f"npm-{id}", + aliases=advsisory_aliases, + summary=build_description(summary=summary, description=description), + date_published=date_published, + affected_packages=affected_packages, + references_v2=references, + severities=severities, + url=f"https://github.com/nodejs/security-wg/blob/main/vuln/npm/{id}.json", + ) + + def get_affected_package(self, data, package_name): + affected_version_range = None + unaffected_version_range = None + fixed_version = None + + vulnerable_range = data.get("vulnerable_versions") or "" + patched_range = data.get("patched_versions") or "" + + # https://github.com/nodejs/security-wg/blob/cfaa51cc5c83f01eea61b69658f7bc76a77c5979/vuln/npm/213.json#L14 + if vulnerable_range == "<=99.999.99999": + vulnerable_range = "*" + if vulnerable_range: + affected_version_range = NpmVersionRange.from_native(vulnerable_range) + + # https://github.com/nodejs/security-wg/blob/cfaa51cc5c83f01eea61b69658f7bc76a77c5979/vuln/npm/213.json#L15 + if patched_range == "<0.0.0": + patched_range = None + if patched_range: + unaffected_version_range = NpmVersionRange.from_native(patched_range) + + # We only store single fixed versions and not a range of fixed versions + # If there is a single constraint in the unaffected_version_range + # having comparator as ">=" then we store that as the fixed version + if unaffected_version_range and len(unaffected_version_range.constraints) == 1: + constraint = unaffected_version_range.constraints[0] + if constraint.comparator == ">=": + fixed_version = constraint.version + + return AffectedPackage( + package=PackageURL( + type="npm", + name=package_name, + ), + affected_version_range=affected_version_range, + fixed_version=fixed_version, + ) + + def clean_downloads(self): + if self.vcs_response: + self.log(f"Removing cloned repository") + self.vcs_response.delete() + + def on_failure(self): + self.clean_downloads() diff --git a/vulnerabilities/pipelines/v2_importers/nvd_importer.py b/vulnerabilities/pipelines/v2_importers/nvd_importer.py new file mode 100644 index 000000000..1166ac8ef --- /dev/null +++ b/vulnerabilities/pipelines/v2_importers/nvd_importer.py @@ -0,0 +1,338 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import gzip +import json +import logging +from datetime import date +from traceback import format_exc as traceback_format_exc +from typing import Iterable + +import attr +import requests +from dateutil import parser as dateparser + +from vulnerabilities import severity_systems +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.importer import Reference +from vulnerabilities.importer import VulnerabilitySeverity +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2 +from vulnerabilities.utils import get_cwe_id +from vulnerabilities.utils import get_item + + +class NVDImporterPipeline(VulnerableCodeBaseImporterPipelineV2): + """ + NVD Importer Pipeline + + Collect advisories from NVD. + """ + + pipeline_id = "nvd_importer_v2" + # See https://github.com/nexB/vulnerablecode/issues/665 for follow up + spdx_license_expression = ( + "LicenseRef-scancode-us-govt-public-domain AND LicenseRef-scancode-cve-tou" + ) + license_url = "https://nvd.nist.gov/general/FAQ-Sections/General-FAQs#faqLink7" + notice = """ + See https://nvd.nist.gov/general/FAQ-Sections/General-FAQs#faqLink7 + All NVD data is freely available from our data feeds + (https://nvd.nist.gov/vuln/data-feeds). There are no fees, licensing + restrictions, or even a requirement to register. All NIST publications are + available in the public domain according to Title 17 of the United States + Code. Acknowledgment of the NVD when using our information is appreciated. + In addition, please email nvd@nist.gov to let us know how the information is + being used + + See also https://cve.mitre.org/about/termsofuse.html + Terms of Use + LICENSE + [...] + CVE Usage: MITRE hereby grants you a perpetual, worldwide, non-exclusive, no- + charge, royalty-free, irrevocable copyright license to reproduce, prepare + derivative works of, publicly display, publicly perform, sublicense, and + distribute Common Vulnerabilities and Exposures (CVE®). Any copy you make for + such purposes is authorized provided that you reproduce MITRE's copyright + designation and this license in any such copy. DISCLAIMERS + + ALL DOCUMENTS AND THE INFORMATION CONTAINED THEREIN PROVIDED BY MITRE ARE + PROVIDED ON AN "AS IS" BASIS AND THE CONTRIBUTOR, THE ORGANIZATION HE/SHE + REPRESENTS OR IS SPONSORED BY (IF ANY), THE MITRE CORPORATION, ITS BOARD OF + TRUSTEES, OFFICERS, AGENTS, AND EMPLOYEES, DISCLAIM ALL WARRANTIES, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE + INFORMATION THEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF + MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + """ + + @classmethod + def steps(cls): + return (cls.collect_and_store_advisories,) + + def advisories_count(self): + url = "https://services.nvd.nist.gov/rest/json/cves/2.0?resultsPerPage=1" + + advisory_count = 0 + try: + response = requests.get(url) + response.raise_for_status() + data = response.json() + except requests.HTTPError as http_err: + self.log( + f"HTTP error occurred: {http_err} \n {traceback_format_exc()}", + level=logging.ERROR, + ) + return advisory_count + + advisory_count = data.get("totalResults", 0) + return advisory_count + + def collect_advisories(self) -> Iterable[AdvisoryData]: + for _year, cve_data in fetch_cve_data_1_1(logger=self.log): + yield from to_advisories(cve_data=cve_data) + + +# Isolating network calls for simplicity of testing +def fetch(url, logger=None): + if logger: + logger(f"Fetching `{url}`") + gz_file = requests.get(url) + data = gzip.decompress(gz_file.content) + try: + data = data.decode("utf-8") + except UnicodeDecodeError: + logger(f"Failed to decode data from {url}") + return {} + return json.loads(data) + + +def fetch_cve_data_1_1(starting_year=2002, logger=None): + """ + Yield tuples of (year, lists of CVE mappings) from the NVD, one for each + year since ``starting_year`` defaulting to 2002. + """ + current_year = date.today().year + # NVD json feeds start from 2002. + for year in range(starting_year, current_year + 1): + download_url = f"https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-{year}.json.gz" + yield year, fetch(url=download_url, logger=logger) + + +def to_advisories(cve_data): + """ + Yield AdvisoryData objects from a CVE json feed. + """ + for cve_item in CveItem.from_cve_data(cve_data=cve_data): + if cve_item.is_related_to_hardware or not cve_item.cve_id: + continue + yield cve_item.to_advisory() + + +@attr.attributes +class CveItem: + cve_item = attr.attrib(default=attr.Factory(dict), type=dict) + + @classmethod + def to_advisories(cls, cve_data, skip_hardware=True): + """ + Yield AdvisoryData objects from ``cve_data`` data for CVE JSON 1.1feed. + Skip hardware + """ + for cve_item in CveItem.from_cve_data(cve_data=cve_data, skip_hardware=skip_hardware): + yield cve_item.to_advisory() + + @classmethod + def from_cve_data(cls, cve_data, skip_hardware=True): + """ + Yield CVE items mapping from a cve_data list of CVE mappings from the NVD. + """ + for cve_item in cve_data.get("CVE_Items") or []: + if not cve_item: + continue + if not isinstance(cve_item, dict): + raise ValueError(f"cve_item: {cve_item!r} is not a mapping") + cve_item = cls(cve_item=cve_item) + if skip_hardware and cve_item.is_related_to_hardware: + continue + yield cve_item + + @property + def cve_id(self): + return self.cve_item["cve"]["CVE_data_meta"]["ID"] + + @property + def summary(self): + """ + Return a descriptive summary. + """ + # In 99% of cases len(cve_item['cve']['description']['description_data']) == 1 , so + # this usually returns cve_item['cve']['description']['description_data'][0]['value'] + # In the remaining 1% cases this returns the longest summary. + # FIXME: we should retun the full description WITH the summry as the first line instead + summaries = [] + for desc in get_item(self.cve_item, "cve", "description", "description_data") or []: + if desc.get("value"): + summaries.append(desc["value"]) + return max(summaries, key=len) if summaries else None + + @property + def cpes(self): + """ + Return a list of unique CPE strings for this CVE. + """ + # FIXME: we completely ignore the configurations here + cpes = [] + for node in get_item(self.cve_item, "configurations", "nodes") or []: + for cpe_data in node.get("cpe_match") or []: + cpe23_uri = cpe_data.get("cpe23Uri") + if cpe23_uri and cpe23_uri not in cpes: + cpes.append(cpe23_uri) + return cpes + + @property + def severities(self): + """ + Return a list of VulnerabilitySeverity for this CVE. + """ + severities = [] + impact = self.cve_item.get("impact") or {} + base_metric_v4 = impact.get("baseMetricV4") or {} + if base_metric_v4: + cvss_v4 = base_metric_v4.get("cvssV4") or {} + vs = VulnerabilitySeverity( + system=severity_systems.CVSSV4, + value=str(cvss_v4.get("baseScore") or ""), + scoring_elements=str(cvss_v4.get("vectorString") or ""), + url=f"https://nvd.nist.gov/vuln/detail/{self.cve_id}", + ) + severities.append(vs) + + base_metric_v3 = impact.get("baseMetricV3") or {} + if base_metric_v3: + cvss_v3 = get_item(base_metric_v3, "cvssV3") + version = cvss_v3.get("version") + system = None + if version == "3.1": + system = severity_systems.CVSSV31 + else: + system = severity_systems.CVSSV3 + vs = VulnerabilitySeverity( + system=system, + value=str(cvss_v3.get("baseScore") or ""), + scoring_elements=str(cvss_v3.get("vectorString") or ""), + url=f"https://nvd.nist.gov/vuln/detail/{self.cve_id}", + ) + severities.append(vs) + + base_metric_v2 = impact.get("baseMetricV2") or {} + if base_metric_v2: + cvss_v2 = base_metric_v2.get("cvssV2") or {} + vs = VulnerabilitySeverity( + system=severity_systems.CVSSV2, + value=str(cvss_v2.get("baseScore") or ""), + scoring_elements=str(cvss_v2.get("vectorString") or ""), + url=f"https://nvd.nist.gov/vuln/detail/{self.cve_id}", + ) + severities.append(vs) + + return severities + + @property + def reference_urls(self): + """ + Return a list unique of reference URLs. + """ + # FIXME: we should also collect additional data from the references such as tags and ids + + urls = [] + for reference in get_item(self.cve_item, "cve", "references", "reference_data") or []: + ref_url = reference.get("url") + if ref_url and ref_url.startswith(("http", "ftp")) and ref_url not in urls: + urls.append(ref_url) + return urls + + @property + def references(self): + """ + Return a list of AdvisoryReference. + """ + # FIXME: we should also collect additional data from the references such as tags and ids + references = [] + + # we track each CPE as a reference for now + for cpe in self.cpes: + cpe_url = f"https://nvd.nist.gov/vuln/search/results?adv_search=true&isCpeNameSearch=true&query={cpe}" + references.append(Reference(reference_id=cpe, url=cpe_url)) + + # FIXME: we also add the CVE proper as a reference, but is this correct? + references.append( + Reference( + url=f"https://nvd.nist.gov/vuln/detail/{self.cve_id}", + reference_id=self.cve_id, + ) + ) + + # clean to remove dupes for the CVE id proper + ref_urls = [ + ru + for ru in self.reference_urls + if ru != f"https://nvd.nist.gov/vuln/detail/{self.cve_id}" + ] + references.extend([Reference(url=url) for url in ref_urls]) + + return references + + @property + def is_related_to_hardware(self): + """ + Return True if this CVE item is for hardware (as opposed to software). + """ + return any(is_related_to_hardware(cpe) for cpe in self.cpes) + + @property + def weaknesses(self): + """ + Return a list of CWE IDs like: [119, 189] + """ + weaknesses = [] + for weaknesses_item in ( + get_item(self.cve_item, "cve", "problemtype", "problemtype_data") or [] + ): + weaknesses_description = weaknesses_item.get("description") or [] + for weaknesses_value in weaknesses_description: + cwe_id = ( + weaknesses_value.get("value") if weaknesses_value.get("lang") == "en" else None + ) + if cwe_id in ["NVD-CWE-Other", "NVD-CWE-noinfo"] or not cwe_id: + continue # Skip Invalid CWE + weaknesses.append(get_cwe_id(cwe_id)) + return weaknesses + + def to_advisory(self): + """ + Return an AdvisoryData object from this CVE item + """ + return AdvisoryData( + advisory_id=self.cve_id, + aliases=[], + summary=self.summary, + references_v2=self.references, + date_published=dateparser.parse(self.cve_item.get("publishedDate")), + weaknesses=self.weaknesses, + severities=self.severities, + url=f"https://nvd.nist.gov/vuln/detail/{self.cve_id}", + ) + + +def is_related_to_hardware(cpe): + """ + Return True if the ``cpe`` is related to hardware. + """ + cpe_comps = cpe.split(":") + # CPE follow the format cpe:cpe_version:product_type:vendor:product + return len(cpe_comps) > 2 and cpe_comps[2] == "h" diff --git a/vulnerabilities/pipelines/v2_importers/postgresql_importer.py b/vulnerabilities/pipelines/v2_importers/postgresql_importer.py new file mode 100644 index 000000000..2f5a49439 --- /dev/null +++ b/vulnerabilities/pipelines/v2_importers/postgresql_importer.py @@ -0,0 +1,163 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import urllib.parse as urlparse +from typing import Iterable + +import requests +from bs4 import BeautifulSoup +from packageurl import PackageURL +from univers.version_range import GenericVersionRange +from univers.versions import GenericVersion + +from vulnerabilities import severity_systems +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.importer import AffectedPackage +from vulnerabilities.importer import Reference +from vulnerabilities.importer import VulnerabilitySeverity +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2 + + +class PostgreSQLImporterPipeline(VulnerableCodeBaseImporterPipelineV2): + """ + PostgreSQL Importer Pipeline + + This pipeline imports security advisories from the PostgreSQL project. + """ + + pipeline_id = "postgresql_importer_v2" + license_url = "https://www.postgresql.org/about/licence/" + spdx_license_expression = "PostgreSQL" + base_url = "https://www.postgresql.org/support/security/" + + links = set() + + @classmethod + def steps(cls): + return (cls.collect_and_store_advisories,) + + def advisories_count(self) -> int: + if not self.links: + self.collect_links() + return len(self.links) + + def collect_advisories(self) -> Iterable[AdvisoryData]: + if not self.links: + self.collect_links() + + for url in self.links: + data = requests.get(url).content + yield from self.to_advisories(data) + + def collect_links(self): + known_urls = {self.base_url} + visited_urls = set() + + while True: + unvisited_urls = known_urls - visited_urls + for url in unvisited_urls: + data = requests.get(url).content + visited_urls.add(url) + known_urls.update(self.find_advisory_urls(data)) + if known_urls == visited_urls: + break + self.links = known_urls + + def to_advisories(self, data): + advisories = [] + soup = BeautifulSoup(data, features="lxml") + tables = soup.select("table") + + if not tables: + return advisories + + table = tables[0] + + for row in table.select("tbody tr"): + ref_col, affected_col, fixed_col, severity_score_col, desc_col = row.select("td") + summary = desc_col.text + pkg_qualifiers = {"os": "windows"} if "windows" in summary.lower() else {} + + affected_packages = [] + affected_version_list = [v.strip() for v in affected_col.text.split(",") if v.strip()] + fixed_version_list = [v.strip() for v in fixed_col.text.split(",") if v.strip()] + + if fixed_version_list: + for fixed_version in fixed_version_list: + affected_packages.append( + AffectedPackage( + package=PackageURL( + name="postgresql", type="generic", qualifiers=pkg_qualifiers + ), + affected_version_range=GenericVersionRange.from_versions( + affected_version_list + ) + if affected_version_list + else None, + fixed_version=GenericVersion(fixed_version), + ) + ) + elif affected_version_list: + affected_packages.append( + AffectedPackage( + package=PackageURL( + name="postgresql", type="generic", qualifiers=pkg_qualifiers + ), + affected_version_range=GenericVersionRange.from_versions( + affected_version_list + ), + ) + ) + + cve_id = "" + try: + cve_id = ref_col.select(".nobr")[0].text + except IndexError: + pass + + references = [] + vector_link_tag = severity_score_col.find("a") + for a_tag in ref_col.select("a"): + link = a_tag.attrs["href"] + if link.startswith("/"): + link = urlparse.urljoin("https://www.postgresql.org/", link) + severities = [] + if "support/security/CVE" in link and vector_link_tag: + parsed_link = urlparse.urlparse(vector_link_tag["href"]) + cvss3_vector = urlparse.parse_qs(parsed_link.query).get("vector", [""])[0] + cvss3_base_score = vector_link_tag.text + severities.append( + VulnerabilitySeverity( + system=severity_systems.CVSSV3, + value=cvss3_base_score, + scoring_elements=cvss3_vector, + ) + ) + references.append(Reference(url=link, severities=severities)) + + if cve_id: + advisories.append( + AdvisoryData( + advisory_id=cve_id, + aliases=[], + summary=summary, + references_v2=references, + affected_packages=affected_packages, + url=f"https://www.postgresql.org/support/security/{cve_id}", + ) + ) + + return advisories + + def find_advisory_urls(self, page_data): + soup = BeautifulSoup(page_data, features="lxml") + return { + urlparse.urljoin("https://www.postgresql.org/", a_tag.attrs["href"]) + for a_tag in soup.select("h3+ p a") + } diff --git a/vulnerabilities/pipelines/v2_importers/pypa_importer.py b/vulnerabilities/pipelines/v2_importers/pypa_importer.py new file mode 100644 index 000000000..7463cc4bd --- /dev/null +++ b/vulnerabilities/pipelines/v2_importers/pypa_importer.py @@ -0,0 +1,74 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +from pathlib import Path +from typing import Iterable + +import saneyaml +from fetchcode.vcs import fetch_via_vcs + +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2 +from vulnerabilities.utils import get_advisory_url + + +class PyPaImporterPipeline(VulnerableCodeBaseImporterPipelineV2): + """ + Pypa Importer Pipeline + + Collect advisories from PyPA GitHub repository.""" + + pipeline_id = "pypa_importer_v2" + spdx_license_expression = "CC-BY-4.0" + license_url = "https://github.com/pypa/advisory-database/blob/main/LICENSE" + repo_url = "git+https://github.com/pypa/advisory-database" + unfurl_version_ranges = True + + @classmethod + def steps(cls): + return ( + cls.clone, + cls.collect_and_store_advisories, + cls.clean_downloads, + ) + + def clone(self): + self.log(f"Cloning `{self.repo_url}`") + self.vcs_response = fetch_via_vcs(self.repo_url) + + def advisories_count(self): + vulns_directory = Path(self.vcs_response.dest_dir) / "vulns" + return sum(1 for _ in vulns_directory.rglob("*.yaml")) + + def collect_advisories(self) -> Iterable[AdvisoryData]: + from vulnerabilities.importers.osv import parse_advisory_data_v2 + + base_directory = Path(self.vcs_response.dest_dir) + vulns_directory = base_directory / "vulns" + + for advisory in vulns_directory.rglob("*.yaml"): + advisory_url = get_advisory_url( + file=advisory, + base_path=base_directory, + url="https://github.com/pypa/advisory-database/blob/main/", + ) + advisory_dict = saneyaml.load(advisory.read_text()) + yield parse_advisory_data_v2( + raw_data=advisory_dict, + supported_ecosystems=["pypi"], + advisory_url=advisory_url, + ) + + def clean_downloads(self): + if self.vcs_response: + self.log(f"Removing cloned repository") + self.vcs_response.delete() + + def on_failure(self): + self.clean_downloads() diff --git a/vulnerabilities/pipelines/v2_importers/pysec_importer.py b/vulnerabilities/pipelines/v2_importers/pysec_importer.py new file mode 100644 index 000000000..e67f41a28 --- /dev/null +++ b/vulnerabilities/pipelines/v2_importers/pysec_importer.py @@ -0,0 +1,67 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# +import json +import logging +from io import BytesIO +from typing import Iterable +from zipfile import ZipFile + +import requests + +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2 + + +class PyPIImporterPipeline(VulnerableCodeBaseImporterPipelineV2): + """ + PyPI Importer Pipeline + + Collect advisories from PyPI.""" + + pipeline_id = "pysec_importer_v2" + license_url = "https://github.com/pypa/advisory-database/blob/main/LICENSE" + url = "https://osv-vulnerabilities.storage.googleapis.com/PyPI/all.zip" + spdx_license_expression = "CC-BY-4.0" + unfurl_version_ranges = True + + @classmethod + def steps(cls): + return ( + cls.fetch_zip, + cls.collect_and_store_advisories, + ) + + def fetch_zip(self): + self.log(f"Fetching `{self.url}`") + self.advisory_zip = requests.get(self.url).content + + def advisories_count(self) -> int: + with ZipFile(BytesIO(self.advisory_zip)) as zip: + advisory_count = sum(1 for file in zip.namelist() if file.startswith("PYSEC-")) + return advisory_count + + def collect_advisories(self) -> Iterable[AdvisoryData]: + """Yield AdvisoryData using a zipped data dump of OSV data""" + from vulnerabilities.importers.osv import parse_advisory_data_v2 + + with ZipFile(BytesIO(self.advisory_zip)) as zip_file: + for file_name in zip_file.namelist(): + if not file_name.startswith("PYSEC-"): + self.log( + f"Unsupported PyPI advisory data file: {file_name}", + level=logging.ERROR, + ) + continue + with zip_file.open(file_name) as f: + vul_info = json.load(f) + yield parse_advisory_data_v2( + raw_data=vul_info, + supported_ecosystems=["pypi"], + advisory_url=self.url, + ) diff --git a/vulnerabilities/pipelines/v2_importers/vulnrichment_importer.py b/vulnerabilities/pipelines/v2_importers/vulnrichment_importer.py new file mode 100644 index 000000000..b2ddfd3cd --- /dev/null +++ b/vulnerabilities/pipelines/v2_importers/vulnrichment_importer.py @@ -0,0 +1,318 @@ +import json +import logging +import re +from pathlib import Path +from typing import Iterable + +import dateparser +from fetchcode.vcs import fetch_via_vcs + +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.importer import Reference +from vulnerabilities.importer import VulnerabilitySeverity +from vulnerabilities.models import VulnerabilityReference +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2 +from vulnerabilities.severity_systems import SCORING_SYSTEMS +from vulnerabilities.utils import get_advisory_url +from vulnerabilities.utils import get_cwe_id +from vulnerabilities.utils import get_reference_id + +logger = logging.getLogger(__name__) + + +class VulnrichImporterPipeline(VulnerableCodeBaseImporterPipelineV2): + """ + Vulnrichment Importer Pipeline + + This pipeline imports security advisories from Vulnrichment project. + """ + + pipeline_id = "vulnrichment_importer_v2" + spdx_license_expression = "CC0-1.0" + license_url = "https://github.com/cisagov/vulnrichment/blob/develop/LICENSE" + repo_url = "git+https://github.com/cisagov/vulnrichment.git" + + @classmethod + def steps(cls): + return ( + cls.clone, + cls.collect_and_store_advisories, + cls.clean_downloads, + ) + + def clone(self): + self.log(f"Cloning `{self.repo_url}`") + self.vcs_response = fetch_via_vcs(self.repo_url) + + def advisories_count(self): + vuln_directory = Path(self.vcs_response.dest_dir) + return sum(1 for _ in vuln_directory.glob("*.json")) + + def collect_advisories(self) -> Iterable[AdvisoryData]: + base_path = Path(self.vcs_response.dest_dir) + for file_path in base_path.glob("**/**/*.json"): + if not file_path.name.startswith("CVE-"): + continue + with open(file_path) as f: + raw_data = json.load(f) + advisory_url = get_advisory_url( + file=file_path, + base_path=base_path, + url="https://github.com/cisagov/vulnrichment/blob/develop/", + ) + yield self.parse_cve_advisory(raw_data, advisory_url) + + def parse_cve_advisory(self, raw_data, advisory_url): + cve_metadata = raw_data.get("cveMetadata", {}) + cve_id = cve_metadata.get("cveId") + state = cve_metadata.get("state") + + date_published = cve_metadata.get("datePublished") + if date_published: + date_published = dateparser.parse(date_published) + + # Extract containers + containers = raw_data.get("containers", {}) + cna_data = containers.get("cna", {}) + adp_data = containers.get("adp", {}) + + # Extract descriptions + summary = "" + description_list = cna_data.get("descriptions", []) + for description_dict in description_list: + if not description_dict.get("lang") in ["en", "en-US"]: + continue + summary = description_dict.get("value") + + # Extract metrics + severities = [] + metrics = cna_data.get("metrics", []) + [ + adp_metrics for data in adp_data for adp_metrics in data.get("metrics", []) + ] + + vulnrichment_scoring_system = { + "cvssV4_0": SCORING_SYSTEMS["cvssv4"], + "cvssV3_1": SCORING_SYSTEMS["cvssv3.1"], + "cvssV3_0": SCORING_SYSTEMS["cvssv3"], + "cvssV2_0": SCORING_SYSTEMS["cvssv2"], + "other": { + "ssvc": SCORING_SYSTEMS["ssvc"], + }, # ignore kev + } + + for metric in metrics: + for metric_type, metric_value in metric.items(): + if metric_type not in vulnrichment_scoring_system: + continue + + if metric_type == "other": + other_types = metric_value.get("type") + if other_types == "ssvc": + content = metric_value.get("content", {}) + vector_string, decision = ssvc_calculator(content) + scoring_system = vulnrichment_scoring_system[metric_type][other_types] + severity = VulnerabilitySeverity( + system=scoring_system, value=decision, scoring_elements=vector_string + ) + severities.append(severity) + # ignore kev + else: + vector_string = metric_value.get("vectorString") + base_score = metric_value.get("baseScore") + scoring_system = vulnrichment_scoring_system[metric_type] + severity = VulnerabilitySeverity( + system=scoring_system, value=base_score, scoring_elements=vector_string + ) + severities.append(severity) + + # Extract references cpes and ignore affected products + cpes = set() + for affected_product in cna_data.get("affected", []): + if type(affected_product) != dict: + continue + cpes.update(affected_product.get("cpes") or []) + + references = [] + for ref in cna_data.get("references", []): + # https://github.com/CVEProject/cve-schema/blob/main/schema/tags/reference-tags.json + # We removed all unwanted reference types and set the default reference type to 'OTHER'. + ref_type = VulnerabilityReference.OTHER + vul_ref_types = { + "exploit": VulnerabilityReference.EXPLOIT, + "issue-tracking": VulnerabilityReference.BUG, + "mailing-list": VulnerabilityReference.MAILING_LIST, + "third-party-advisory": VulnerabilityReference.ADVISORY, + "vendor-advisory": VulnerabilityReference.ADVISORY, + "vdb-entry": VulnerabilityReference.ADVISORY, + } + + for tag_type in ref.get("tags", []): + if tag_type in vul_ref_types: + ref_type = vul_ref_types.get(tag_type) + + url = ref.get("url") + reference = Reference( + reference_id=get_reference_id(url), + url=url, + reference_type=ref_type, + ) + + references.append(reference) + + cpes_ref = [ + Reference( + reference_id=cpe, + reference_type=VulnerabilityReference.OTHER, + url=f"https://nvd.nist.gov/vuln/search/results?adv_search=true&isCpeNameSearch=true&query={cpe}", + ) + for cpe in sorted(list(cpes)) + ] + references.extend(cpes_ref) + + weaknesses = set() + for problem_type in cna_data.get("problemTypes", []): + descriptions = problem_type.get("descriptions", []) + for description in descriptions: + cwe_id = description.get("cweId") + if cwe_id: + weaknesses.add(get_cwe_id(cwe_id)) + + description_text = description.get("description") + if description_text: + pattern = r"CWE-(\d+)" + match = re.search(pattern, description_text) + if match: + weaknesses.add(int(match.group(1))) + + return AdvisoryData( + advisory_id=cve_id, + aliases=[], + summary=summary, + references_v2=references, + date_published=date_published, + weaknesses=sorted(weaknesses), + url=advisory_url, + severities=severities, + ) + + def clean_downloads(self): + if self.vcs_response: + self.log("Removing cloned repository") + self.vcs_response.delete() + + def on_failure(self): + self.clean_downloads() + + +def ssvc_calculator(ssvc_data): + """ + Return the ssvc vector and the decision value + """ + options = ssvc_data.get("options", []) + timestamp = ssvc_data.get("timestamp") + + # Extract the options into a dictionary + options_dict = {k: v.lower() for option in options for k, v in option.items()} + + # We copied the table value from this link. + # https://www.cisa.gov/sites/default/files/publications/cisa-ssvc-guide%20508c.pdf + + # Determining Mission and Well-Being Impact Value + mission_well_being_table = { + # (Mission Prevalence, Public Well-being Impact) : "Mission & Well-being" + ("minimal", "minimal"): "low", + ("minimal", "material"): "medium", + ("minimal", "irreversible"): "high", + ("support", "minimal"): "medium", + ("support", "material"): "medium", + ("support", "irreversible"): "high", + ("essential", "minimal"): "high", + ("essential", "material"): "high", + ("essential", "irreversible"): "high", + } + + if "Mission Prevalence" not in options_dict: + options_dict["Mission Prevalence"] = "minimal" + + if "Public Well-being Impact" not in options_dict: + options_dict["Public Well-being Impact"] = "material" + + options_dict["Mission & Well-being"] = mission_well_being_table[ + (options_dict["Mission Prevalence"], options_dict["Public Well-being Impact"]) + ] + + decision_key = ( + options_dict.get("Exploitation"), + options_dict.get("Automatable"), + options_dict.get("Technical Impact"), + options_dict.get("Mission & Well-being"), + ) + + decision_points = { + "Exploitation": {"E": {"none": "N", "poc": "P", "active": "A"}}, + "Automatable": {"A": {"no": "N", "yes": "Y"}}, + "Technical Impact": {"T": {"partial": "P", "total": "T"}}, + "Public Well-being Impact": {"B": {"minimal": "M", "material": "A", "irreversible": "I"}}, + "Mission Prevalence": {"P": {"minimal": "M", "support": "S", "essential": "E"}}, + "Mission & Well-being": {"M": {"low": "L", "medium": "M", "high": "H"}}, + } + + # Create the SSVC vector + ssvc_vector = "SSVCv2/" + for key, value_map in options_dict.items(): + options_key = decision_points.get(key) + for lhs, rhs_map in options_key.items(): + ssvc_vector += f"{lhs}:{rhs_map.get(value_map)}/" + + # "Decision": {"D": {"Track": "T", "Track*": "R", "Attend": "A", "Act": "C"}}, + decision_values = {"Track": "T", "Track*": "R", "Attend": "A", "Act": "C"} + + decision_lookup = { + ("none", "no", "partial", "low"): "Track", + ("none", "no", "partial", "medium"): "Track", + ("none", "no", "partial", "high"): "Track", + ("none", "no", "total", "low"): "Track", + ("none", "no", "total", "medium"): "Track", + ("none", "no", "total", "high"): "Track*", + ("none", "yes", "partial", "low"): "Track", + ("none", "yes", "partial", "medium"): "Track", + ("none", "yes", "partial", "high"): "Attend", + ("none", "yes", "total", "low"): "Track", + ("none", "yes", "total", "medium"): "Track", + ("none", "yes", "total", "high"): "Attend", + ("poc", "no", "partial", "low"): "Track", + ("poc", "no", "partial", "medium"): "Track", + ("poc", "no", "partial", "high"): "Track*", + ("poc", "no", "total", "low"): "Track", + ("poc", "no", "total", "medium"): "Track*", + ("poc", "no", "total", "high"): "Attend", + ("poc", "yes", "partial", "low"): "Track", + ("poc", "yes", "partial", "medium"): "Track", + ("poc", "yes", "partial", "high"): "Attend", + ("poc", "yes", "total", "low"): "Track", + ("poc", "yes", "total", "medium"): "Track*", + ("poc", "yes", "total", "high"): "Attend", + ("active", "no", "partial", "low"): "Track", + ("active", "no", "partial", "medium"): "Track", + ("active", "no", "partial", "high"): "Attend", + ("active", "no", "total", "low"): "Track", + ("active", "no", "total", "medium"): "Attend", + ("active", "no", "total", "high"): "Act", + ("active", "yes", "partial", "low"): "Attend", + ("active", "yes", "partial", "medium"): "Attend", + ("active", "yes", "partial", "high"): "Act", + ("active", "yes", "total", "low"): "Attend", + ("active", "yes", "total", "medium"): "Act", + ("active", "yes", "total", "high"): "Act", + } + + decision = decision_lookup.get(decision_key, "") + + if decision: + ssvc_vector += f"D:{decision_values.get(decision)}/" + + if timestamp: + timestamp_formatted = dateparser.parse(timestamp).strftime("%Y-%m-%dT%H:%M:%SZ") + + ssvc_vector += f"{timestamp_formatted}/" + return ssvc_vector, decision diff --git a/vulnerabilities/pipelines/v2_improvers/collect_commits.py b/vulnerabilities/pipelines/v2_improvers/collect_commits.py new file mode 100644 index 000000000..32fb1ce79 --- /dev/null +++ b/vulnerabilities/pipelines/v2_improvers/collect_commits.py @@ -0,0 +1,252 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import re + +from aboutcode.pipeline import LoopProgress + +from vulnerabilities.models import AdvisoryV2 +from vulnerabilities.models import CodeFixV2 +from vulnerabilities.pipelines import VulnerableCodePipeline + + +def is_vcs_url_already_processed(commit_id): + """ + Check if a VCS URL exists in a CodeFix entry. + """ + return CodeFixV2.objects.filter(commits__contains=[commit_id]).exists() + + +class CollectFixCommitsPipeline(VulnerableCodePipeline): + """ + Improver pipeline to scout References and create CodeFix entries. + """ + + pipeline_id = "collect_fix_commits_v2" + license_expression = None + + @classmethod + def steps(cls): + return (cls.collect_and_store_fix_commits,) + + def collect_and_store_fix_commits(self): + affected_advisories = ( + AdvisoryV2.objects.filter(affecting_packages__isnull=False) + .prefetch_related("affecting_packages") + .distinct() + ) + + self.log(f"Processing {affected_advisories.count():,d} references to collect fix commits.") + + created_fix_count = 0 + progress = LoopProgress(total_iterations=affected_advisories.count(), logger=self.log) + + for adv in progress.iter(affected_advisories.paginated(per_page=500)): + for reference in adv.references.all(): + if not "/commit/" in reference.url: + continue + if not is_vcs_url(reference.url): + continue + + vcs_url = normalize_vcs_url(repo_url=reference.url) + + if not vcs_url: + continue + + # Skip if already processed + if is_vcs_url_already_processed(commit_id=vcs_url): + self.log( + f"Skipping already processed reference: {reference.url} with VCS URL {vcs_url}" + ) + continue + # check if vcs_url has commit + for package in adv.affecting_packages.all(): + code_fix, created = CodeFixV2.objects.get_or_create( + commits=[vcs_url], + advisory=adv, + affected_package=package, + ) + + if created: + created_fix_count += 1 + self.log( + f"Created CodeFix entry for reference: {reference.url} with VCS URL {vcs_url}" + ) + + self.log(f"Successfully created {created_fix_count:,d} CodeFix entries.") + + +PLAIN_URLS = ( + "https://", + "http://", +) + +VCS_URLS = ( + "git://", + "git+git://", + "git+https://", + "git+http://", + "hg://", + "hg+http://", + "hg+https://", + "svn://", + "svn+https://", + "svn+http://", +) + + +# TODO: This function was borrowed from scancode-toolkit. We need to create a shared library for that. +def normalize_vcs_url(repo_url, vcs_tool=None): + """ + Return a normalized vcs_url version control URL given some `repo_url` and an + optional `vcs_tool` hint (such as 'git', 'hg', etc.) + + Return None if repo_url is not recognized as a VCS URL. + + Handles shortcuts for GitHub, GitHub gist, Bitbucket, or GitLab repositories + and more using the same approach as npm install: + + See https://docs.npmjs.com/files/package.json#repository + or https://getcomposer.org/doc/05-repositories.md + + This is done here in npm: + https://github.com/npm/npm/blob/d3c858ce4cfb3aee515bb299eb034fe1b5e44344/node_modules/hosted-git-info/git-host-info.js + + These should be resolved: + npm/npm + gist:11081aaa281 + bitbucket:example/repo + gitlab:another/repo + expressjs/serve-static + git://github.com/angular/di.js.git + git://github.com/hapijs/boom + git@github.com:balderdashy/waterline-criteria.git + http://github.com/ariya/esprima.git + http://github.com/isaacs/nopt + https://github.com/chaijs/chai + https://github.com/christkv/kerberos.git + https://gitlab.com/foo/private.git + git@gitlab.com:foo/private.git + """ + if not repo_url or not isinstance(repo_url, str): + return + + repo_url = repo_url.strip() + if not repo_url: + return + + # TODO: If we match http and https, we may should add more check in + # case if the url is not a repo one. For example, check the domain + # name in the url... + if repo_url.startswith(VCS_URLS + PLAIN_URLS): + return repo_url + + if repo_url.startswith("git@"): + tool, _, right = repo_url.partition("@") + if ":" in repo_url: + host, _, repo = right.partition(":") + else: + # git@github.com/Filirom1/npm2aur.git + host, _, repo = right.partition("/") + + if any(r in host for r in ("bitbucket", "gitlab", "github")): + scheme = "https" + else: + scheme = "git" + + return f"{scheme}://{host}/{repo}" + + # FIXME: where these URL schemes come from?? + if repo_url.startswith(("bitbucket:", "gitlab:", "github:", "gist:")): + repo = repo_url.split(":")[1] + hoster_urls = { + "bitbucket": f"https://bitbucket.org/{repo}", + "github": f"https://github.com/{repo}", + "gitlab": f"https://gitlab.com/{repo}", + "gist": f"https://gist.github.com/{repo}", + } + hoster, _, repo = repo_url.partition(":") + return hoster_urls[hoster] % locals() + + if len(repo_url.split("/")) == 2: + # implicit github, but that's only on NPM? + return f"https://github.com/{repo_url}" + return repo_url + + +def is_vcs_url(repo_url): + """ + Check if a given URL or string matches a valid VCS (Version Control System) URL. + + Supports: + - Standard VCS URL protocols (git, http, https, ssh) + - Shortcut syntax (e.g., github:user/repo, gitlab:group/repo) + - GitHub shortcut (e.g., user/repo) + + Args: + repo_url (str): The repository URL or shortcut to validate. + + Returns: + bool: True if the string is a valid VCS URL, False otherwise. + + Examples: + >>> is_vcs_url("git://github.com/angular/di.js.git") + True + >>> is_vcs_url("github:user/repo") + True + >>> is_vcs_url("user/repo") + True + >>> is_vcs_url("https://github.com/user/repo.git") + True + >>> is_vcs_url("git@github.com:user/repo.git") + True + >>> is_vcs_url("http://github.com/isaacs/nopt") + True + >>> is_vcs_url("https://gitlab.com/foo/private.git") + True + >>> is_vcs_url("git@gitlab.com:foo/private.git") + True + >>> is_vcs_url("bitbucket:example/repo") + True + >>> is_vcs_url("gist:11081aaa281") + True + >>> is_vcs_url("ftp://example.com/not-a-repo") + False + >>> is_vcs_url("random-string") + False + >>> is_vcs_url("https://example.com/not-a-repo") + False + """ + if not repo_url or not isinstance(repo_url, str): + return False + + repo_url = repo_url.strip() + if not repo_url: + return False + + # Define valid VCS domains + vcs_domains = r"(github\.com|gitlab\.com|bitbucket\.org|gist\.github\.com)" + + # 1. Match URLs with standard protocols pointing to VCS domains + if re.match(rf"^(git|ssh|http|https)://{vcs_domains}/[\w\-.]+/[\w\-.]+", repo_url): + return True + + # 2. Match SSH URLs (e.g., git@github.com:user/repo.git) + if re.match(rf"^git@{vcs_domains}:[\w\-.]+/[\w\-.]+(\.git)?$", repo_url): + return True + + # 3. Match shortcut syntax (e.g., github:user/repo) + if re.match(r"^(github|gitlab|bitbucket|gist):[\w\-./]+$", repo_url): + return True + + # 4. Match implicit GitHub shortcut (e.g., user/repo) + if re.match(r"^[\w\-]+/[\w\-]+$", repo_url): + return True + + return False diff --git a/vulnerabilities/pipelines/v2_improvers/compute_package_risk.py b/vulnerabilities/pipelines/v2_improvers/compute_package_risk.py new file mode 100644 index 000000000..55608f0d1 --- /dev/null +++ b/vulnerabilities/pipelines/v2_improvers/compute_package_risk.py @@ -0,0 +1,143 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# +from aboutcode.pipeline import LoopProgress + +from vulnerabilities.models import AdvisoryV2 +from vulnerabilities.models import PackageV2 +from vulnerabilities.pipelines import VulnerableCodePipeline +from vulnerabilities.risk import compute_package_risk_v2 +from vulnerabilities.risk import compute_vulnerability_risk_factors + + +class ComputePackageRiskPipeline(VulnerableCodePipeline): + """ + Compute risk score for packages. + + See https://github.com/aboutcode-org/vulnerablecode/issues/1543 + """ + + pipeline_id = "compute_package_risk_v2" + license_expression = None + + @classmethod + def steps(cls): + return ( + cls.compute_and_store_vulnerability_risk_score, + cls.compute_and_store_package_risk_score, + ) + + def compute_and_store_vulnerability_risk_score(self): + affected_advisories = ( + AdvisoryV2.objects.filter(affecting_packages__isnull=False) + .prefetch_related( + "references", + "severities", + "exploits", + ) + .distinct() + ) + + self.log( + f"Calculating risk for {affected_advisories.count():,d} vulnerability with a affected packages records" + ) + + progress = LoopProgress(total_iterations=affected_advisories.count(), logger=self.log) + + updatables = [] + updated_vulnerability_count = 0 + batch_size = 5000 + + for advisory in progress.iter(affected_advisories.paginated(per_page=batch_size)): + severities = advisory.severities.all() + references = advisory.references.all() + exploits = advisory.exploits.all() + + weighted_severity, exploitability = compute_vulnerability_risk_factors( + references=references, + severities=severities, + exploits=exploits, + ) + advisory.weighted_severity = weighted_severity + advisory.exploitability = exploitability + print( + f"Computed risk for {advisory.advisory_id} with weighted_severity={weighted_severity} and exploitability={exploitability}" + ) + updatables.append(advisory) + + if len(updatables) >= batch_size: + updated_vulnerability_count += bulk_update( + model=AdvisoryV2, + items=updatables, + fields=["weighted_severity", "exploitability"], + logger=self.log, + ) + + updated_vulnerability_count += bulk_update( + model=AdvisoryV2, + items=updatables, + fields=["weighted_severity", "exploitability"], + logger=self.log, + ) + + self.log( + f"Successfully added risk score for {updated_vulnerability_count:,d} vulnerability" + ) + + def compute_and_store_package_risk_score(self): + affected_packages = ( + PackageV2.objects.filter(affected_by_advisories__isnull=False) + ).distinct() + + self.log(f"Calculating risk for {affected_packages.count():,d} affected package records") + + progress = LoopProgress( + total_iterations=affected_packages.count(), + logger=self.log, + progress_step=5, + ) + + updatables = [] + updated_package_count = 0 + batch_size = 10000 + + for package in progress.iter(affected_packages.paginated(per_page=batch_size)): + risk_score = compute_package_risk_v2(package) + + if not risk_score: + continue + + package.risk_score = risk_score + updatables.append(package) + + if len(updatables) >= batch_size: + updated_package_count += bulk_update( + model=PackageV2, + items=updatables, + fields=["risk_score"], + logger=self.log, + ) + updated_package_count += bulk_update( + model=PackageV2, + items=updatables, + fields=["risk_score"], + logger=self.log, + ) + self.log(f"Successfully added risk score for {updated_package_count:,d} package") + + +def bulk_update(model, items, fields, logger): + item_count = 0 + if items: + try: + model.objects.bulk_update(objs=items, fields=fields) + item_count += len(items) + except Exception as e: + logger(f"Error updating {model.__name__}: {e}") + items.clear() + return item_count diff --git a/vulnerabilities/pipelines/v2_improvers/computer_package_version_rank.py b/vulnerabilities/pipelines/v2_improvers/computer_package_version_rank.py new file mode 100644 index 000000000..dd10a1695 --- /dev/null +++ b/vulnerabilities/pipelines/v2_improvers/computer_package_version_rank.py @@ -0,0 +1,93 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +from itertools import groupby + +from aboutcode.pipeline import LoopProgress +from django.db import transaction +from univers.version_range import RANGE_CLASS_BY_SCHEMES +from univers.versions import Version + +from vulnerabilities.models import PackageV2 +from vulnerabilities.pipelines import VulnerableCodePipeline + + +class ComputeVersionRankPipeline(VulnerableCodePipeline): + """ + A pipeline to compute and assign version ranks for all packages. + """ + + pipeline_id = "compute_version_rank_v2" + license_expression = None + + @classmethod + def steps(cls): + return (cls.compute_and_store_version_rank,) + + def compute_and_store_version_rank(self): + """ + Compute and assign version ranks to all packages. + """ + groups = PackageV2.objects.only("type", "namespace", "name").order_by( + "type", "namespace", "name" + ) + + def key(package): + return package.type, package.namespace, package.name + + groups = groupby(groups, key=key) + + groups = [(list(x), list(y)) for x, y in groups] + + total_groups = len(groups) + self.log(f"Calculating `version_rank` for {total_groups:,d} groups of packages.") + + progress = LoopProgress( + total_iterations=total_groups, + logger=self.log, + progress_step=5, + ) + + for group, packages in progress.iter(groups): + type, namespace, name = group + if type not in RANGE_CLASS_BY_SCHEMES: + continue + self.update_version_rank_for_group(packages) + + self.log("Successfully populated `version_rank` for all packages.") + + @transaction.atomic + def update_version_rank_for_group(self, packages): + """ + Update the `version_rank` for all packages in a specific group. + """ + + # Sort the packages by version + sorted_packages = self.sort_packages_by_version(packages) + + # Assign version ranks + updates = [] + for rank, package in enumerate(sorted_packages, start=1): + package.version_rank = rank + updates.append(package) + + # Bulk update to save the ranks + PackageV2.objects.bulk_update(updates, fields=["version_rank"]) + + def sort_packages_by_version(self, packages): + """ + Sort packages by version using `version_class`. + """ + + if not packages: + return [] + version_class = RANGE_CLASS_BY_SCHEMES.get(packages[0].type).version_class + if not version_class: + version_class = Version + return sorted(packages, key=lambda p: version_class(p.version)) diff --git a/vulnerabilities/pipelines/v2_improvers/enhance_with_exploitdb.py b/vulnerabilities/pipelines/v2_improvers/enhance_with_exploitdb.py new file mode 100644 index 000000000..c306502d8 --- /dev/null +++ b/vulnerabilities/pipelines/v2_improvers/enhance_with_exploitdb.py @@ -0,0 +1,169 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import csv +import io +import logging +from traceback import format_exc as traceback_format_exc + +import requests +from aboutcode.pipeline import LoopProgress +from dateutil import parser as dateparser +from django.db import DataError + +from vulnerabilities.models import AdvisoryAlias +from vulnerabilities.models import AdvisoryExploit +from vulnerabilities.models import AdvisoryReference +from vulnerabilities.models import AdvisoryV2 +from vulnerabilities.pipelines import VulnerableCodePipeline + + +class ExploitDBImproverPipeline(VulnerableCodePipeline): + """ + ExploitDB Improver Pipeline: Fetch ExploitDB data, iterate over it to find the vulnerability with + the specified alias, and create or update the ref and ref-type accordingly. + """ + + pipeline_id = "enhance_with_exploitdb_v2" + spdx_license_expression = "GPL-2.0" + + @classmethod + def steps(cls): + return ( + cls.fetch_exploits, + cls.add_exploit, + ) + + def fetch_exploits(self): + exploit_db_url = ( + "https://gitlab.com/exploit-database/exploitdb/-/raw/main/files_exploits.csv" + ) + self.log(f"Fetching {exploit_db_url}") + + try: + response = requests.get(exploit_db_url) + response.raise_for_status() + except requests.exceptions.HTTPError as http_err: + self.log( + f"Failed to fetch the Exploit-DB Exploits: {exploit_db_url} with error {http_err!r}:\n{traceback_format_exc()}", + level=logging.ERROR, + ) + raise + + self.exploit_data = io.StringIO(response.text) + + def add_exploit(self): + + csvreader = csv.DictReader(self.exploit_data) + + raw_data = list(csvreader) + fetched_exploit_count = len(raw_data) + + vulnerability_exploit_count = 0 + self.log(f"Enhancing the vulnerability with {fetched_exploit_count:,d} exploit records") + progress = LoopProgress(total_iterations=fetched_exploit_count, logger=self.log) + + for row in progress.iter(raw_data): + vulnerability_exploit_count += add_vulnerability_exploit(row, self.log) + + self.log(f"Successfully added {vulnerability_exploit_count:,d} exploit-db advisory exploit") + + +def add_vulnerability_exploit(row, logger): + advisories = set() + + aliases = row["codes"].split(";") if row["codes"] else [] + + if not aliases: + return 0 + + for raw_alias in aliases: + try: + if alias := AdvisoryAlias.objects.get(alias=raw_alias): + for adv in alias.advisories.all(): + advisories.add(adv) + else: + advs = AdvisoryV2.objects.filter(advisory_id=raw_alias) + for adv in advs: + advisories.add(adv) + except AdvisoryAlias.DoesNotExist: + continue + + if not advisories: + logger(f"No advisory found for aliases {aliases}") + return 0 + + date_added = parse_date(row["date_added"]) + source_date_published = parse_date(row["date_published"]) + source_date_updated = parse_date(row["date_updated"]) + + for advisory in advisories: + add_exploit_references(row["codes"], row["source_url"], row["file"], advisory, logger) + try: + AdvisoryExploit.objects.update_or_create( + advisory=advisory, + data_source="Exploit-DB", + defaults={ + "date_added": date_added, + "description": row["description"], + "known_ransomware_campaign_use": row["verified"], + "source_date_published": source_date_published, + "exploit_type": row["type"], + "platform": row["platform"], + "source_date_updated": source_date_updated, + "source_url": row["source_url"], + }, + ) + except DataError as e: + logger( + f"Failed to Create the Vulnerability Exploit-DB with error {e!r}:\n{traceback_format_exc()}", + level=logging.ERROR, + ) + return 1 + + +def add_exploit_references(ref_id, direct_url, path, adv, logger): + url_map = { + "file_url": f"https://gitlab.com/exploit-database/exploitdb/-/blob/main/{path}", + "direct_url": direct_url, + } + + for key, url in url_map.items(): + if url: + try: + ref, created = AdvisoryReference.objects.update_or_create( + url=url, + defaults={ + "reference_id": ref_id, + "reference_type": AdvisoryReference.EXPLOIT, + }, + ) + + if created: + ref.advisories.add(adv) + ref.save() + logger(f"Created {ref} for {adv} with {key}={url}") + + except DataError as e: + logger( + f"Failed to Create the Vulnerability Reference For Exploit-DB with error {e!r}:\n{traceback_format_exc()}", + level=logging.ERROR, + ) + + +def parse_date(date_string): + if date_string: + try: + date_obj = dateparser.parse(date_string).date() + return date_obj.strftime("%Y-%m-%d") + except (ValueError, TypeError, Exception) as e: + logging.error( + f"Error while parsing ExploitDB date '{date_string}' with error {e!r}:\n{traceback_format_exc()}" + ) + return diff --git a/vulnerabilities/pipelines/v2_improvers/enhance_with_kev.py b/vulnerabilities/pipelines/v2_improvers/enhance_with_kev.py new file mode 100644 index 000000000..486d79232 --- /dev/null +++ b/vulnerabilities/pipelines/v2_improvers/enhance_with_kev.py @@ -0,0 +1,103 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import logging +from traceback import format_exc as traceback_format_exc + +import requests +from aboutcode.pipeline import LoopProgress + +from vulnerabilities.models import AdvisoryAlias +from vulnerabilities.models import AdvisoryExploit +from vulnerabilities.models import AdvisoryV2 +from vulnerabilities.pipelines import VulnerableCodePipeline + + +class VulnerabilityKevPipeline(VulnerableCodePipeline): + """ + Known Exploited Vulnerabilities Pipeline: Retrieve KEV data, iterate through it to identify vulnerabilities + by their associated aliases, and create or update the corresponding Exploit instances. + """ + + pipeline_id = "enhance_with_kev_v2" + license_expression = None + + @classmethod + def steps(cls): + return ( + cls.fetch_exploits, + cls.add_exploits, + ) + + def fetch_exploits(self): + kev_url = "https://raw.githubusercontent.com/aboutcode-org/aboutcode-mirror-kev/refs/heads/main/known_exploited_vulnerabilities.json" + self.log(f"Fetching {kev_url}") + + try: + response = requests.get(kev_url) + response.raise_for_status() + except requests.exceptions.HTTPError as http_err: + self.log( + f"Failed to fetch the KEV Exploits: {kev_url} with error {http_err!r}:\n{traceback_format_exc()}", + level=logging.ERROR, + ) + raise + self.kev_data = response.json() + + def add_exploits(self): + fetched_exploit_count = self.kev_data.get("count") + self.log(f"Enhancing the vulnerability with {fetched_exploit_count:,d} exploit records") + + vulnerability_exploit_count = 0 + progress = LoopProgress(total_iterations=fetched_exploit_count, logger=self.log) + + for record in progress.iter(self.kev_data.get("vulnerabilities", [])): + vulnerability_exploit_count += add_vulnerability_exploit( + kev_vul=record, + logger=self.log, + ) + + self.log(f"Successfully added {vulnerability_exploit_count:,d} kev exploit") + + +def add_vulnerability_exploit(kev_vul, logger): + cve_id = kev_vul.get("cveID") + + if not cve_id: + return 0 + + advisories = set() + try: + if alias := AdvisoryAlias.objects.get(alias=cve_id): + for adv in alias.advisories.all(): + advisories.add(adv) + else: + advs = AdvisoryV2.objects.filter(advisory_id=cve_id) + for adv in advs: + advisories.add(adv) + except AdvisoryAlias.DoesNotExist: + logger(f"No advisory found for aliases {cve_id}") + return 0 + + for advisory in advisories: + AdvisoryExploit.objects.update_or_create( + advisory=advisory, + data_source="KEV", + defaults={ + "description": kev_vul["shortDescription"], + "date_added": kev_vul["dateAdded"], + "required_action": kev_vul["requiredAction"], + "due_date": kev_vul["dueDate"], + "notes": kev_vul["notes"], + "known_ransomware_campaign_use": True + if kev_vul["knownRansomwareCampaignUse"] == "Known" + else False, + }, + ) + return 1 diff --git a/vulnerabilities/pipelines/v2_improvers/enhance_with_metasploit.py b/vulnerabilities/pipelines/v2_improvers/enhance_with_metasploit.py new file mode 100644 index 000000000..fbfea5150 --- /dev/null +++ b/vulnerabilities/pipelines/v2_improvers/enhance_with_metasploit.py @@ -0,0 +1,126 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import logging +from traceback import format_exc as traceback_format_exc + +import requests +import saneyaml +from aboutcode.pipeline import LoopProgress +from dateutil import parser as dateparser + +from vulnerabilities.models import AdvisoryAlias +from vulnerabilities.models import AdvisoryExploit +from vulnerabilities.models import AdvisoryV2 +from vulnerabilities.pipelines import VulnerableCodePipeline + + +class MetasploitImproverPipeline(VulnerableCodePipeline): + """ + Metasploit Exploits Pipeline: Retrieve Metasploit data, iterate through it to identify vulnerabilities + by their associated aliases, and create or update the corresponding Exploit instances. + """ + + pipeline_id = "enhance_with_metasploit_v2" + spdx_license_expression = "BSD-3-clause" + + @classmethod + def steps(cls): + return ( + cls.fetch_exploits, + cls.add_advisory_exploits, + ) + + def fetch_exploits(self): + url = "https://raw.githubusercontent.com/rapid7/metasploit-framework/master/db/modules_metadata_base.json" + self.log(f"Fetching {url}") + try: + response = requests.get(url) + response.raise_for_status() + except requests.exceptions.HTTPError as http_err: + self.log( + f"Failed to fetch the Metasploit Exploits: {url} with error {http_err!r}:\n{traceback_format_exc()}", + level=logging.ERROR, + ) + raise + + self.metasploit_data = response.json() + + def add_advisory_exploits(self): + fetched_exploit_count = len(self.metasploit_data) + self.log(f"Enhancing the vulnerability with {fetched_exploit_count:,d} exploit records") + + vulnerability_exploit_count = 0 + progress = LoopProgress(total_iterations=fetched_exploit_count, logger=self.log) + for _, record in progress.iter(self.metasploit_data.items()): + vulnerability_exploit_count += add_advisory_exploit( + record=record, + logger=self.log, + ) + self.log(f"Successfully added {vulnerability_exploit_count:,d} vulnerability exploit") + + +def add_advisory_exploit(record, logger): + advisories = set() + references = record.get("references", []) + + interesting_references = [ + ref for ref in references if not ref.startswith("OSVDB") and not ref.startswith("URL-") + ] + + if not interesting_references: + return 0 + + for ref in interesting_references: + try: + if alias := AdvisoryAlias.objects.get(alias=ref): + for adv in alias.advisories.all(): + advisories.add(adv) + else: + advs = AdvisoryV2.objects.filter(advisory_id=ref) + for adv in advs: + advisories.add(adv) + except AdvisoryAlias.DoesNotExist: + continue + + if not advisories: + logger(f"No advisories found for aliases {interesting_references}") + return 0 + + description = record.get("description", "") + notes = record.get("notes", {}) + platform = record.get("platform") + + source_url = "" + if path := record.get("path"): + source_url = f"https://github.com/rapid7/metasploit-framework/tree/master{path}" + source_date_published = None + + if disclosure_date := record.get("disclosure_date"): + try: + source_date_published = dateparser.parse(disclosure_date).date() + except ValueError as e: + logger( + f"Error while parsing date {disclosure_date} with error {e!r}:\n{traceback_format_exc()}", + level=logging.ERROR, + ) + + for advisory in advisories: + AdvisoryExploit.objects.update_or_create( + advisory=advisory, + data_source="Metasploit", + defaults={ + "description": description, + "notes": saneyaml.dump(notes), + "source_date_published": source_date_published, + "platform": platform, + "source_url": source_url, + }, + ) + return 1 diff --git a/vulnerabilities/pipelines/v2_improvers/flag_ghost_packages.py b/vulnerabilities/pipelines/v2_improvers/flag_ghost_packages.py new file mode 100644 index 000000000..8a4825df4 --- /dev/null +++ b/vulnerabilities/pipelines/v2_improvers/flag_ghost_packages.py @@ -0,0 +1,104 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import logging +from itertools import groupby +from traceback import format_exc as traceback_format_exc + +from aboutcode.pipeline import LoopProgress +from fetchcode.package_versions import SUPPORTED_ECOSYSTEMS as FETCHCODE_SUPPORTED_ECOSYSTEMS +from fetchcode.package_versions import versions +from packageurl import PackageURL + +from vulnerabilities.models import PackageV2 +from vulnerabilities.pipelines import VulnerableCodePipeline + + +class FlagGhostPackagePipeline(VulnerableCodePipeline): + """Detect and flag packages that do not exist upstream.""" + + pipeline_id = "flag_ghost_packages_v2" + + @classmethod + def steps(cls): + return (cls.flag_ghost_packages,) + + def flag_ghost_packages(self): + detect_and_flag_ghost_packages(logger=self.log) + + +def detect_and_flag_ghost_packages(logger=None): + """Check if packages are available upstream. If not, mark them as ghost package.""" + interesting_packages_qs = ( + PackageV2.objects.order_by("type", "namespace", "name") + .filter(type__in=FETCHCODE_SUPPORTED_ECOSYSTEMS) + .filter(qualifiers="") + .filter(subpath="") + ) + + distinct_packages_count = ( + interesting_packages_qs.values("type", "namespace", "name") + .distinct("type", "namespace", "name") + .count() + ) + + grouped_packages = groupby( + interesting_packages_qs.paginated(), + key=lambda pkg: (pkg.type, pkg.namespace, pkg.name), + ) + + ghost_package_count = 0 + progress = LoopProgress(total_iterations=distinct_packages_count, logger=logger) + for type_namespace_name, packages in progress.iter(grouped_packages): + ghost_package_count += flag_ghost_packages( + base_purl=PackageURL(*type_namespace_name), + packages=packages, + logger=logger, + ) + + if logger: + logger(f"Successfully flagged {ghost_package_count:,d} ghost Packages") + + +def flag_ghost_packages(base_purl, packages, logger=None): + """ + Check if `packages` are available upstream. + If not, update `is_ghost` to `True`. + Return the number of packages flagged as ghost. + """ + known_versions = get_versions(purl=base_purl, logger=logger) + # Skip if encounter error while fetching known versions + if known_versions is None: + return 0 + + ghost_packages = 0 + for pkg in packages: + pkg.is_ghost = False + if pkg.version.lstrip("vV") not in known_versions: + pkg.is_ghost = True + ghost_packages += 1 + + if logger: + logger(f"Flagging ghost package {pkg.purl!s}", level=logging.DEBUG) + pkg.save() + + return ghost_packages + + +def get_versions(purl, logger=None): + """Return set of known versions for the given purl.""" + try: + return {v.value.lstrip("vV") for v in versions(str(purl))} + except Exception as e: + if logger: + logger( + f"Error while fetching known versions for {purl!s}: {e!r} \n {traceback_format_exc()}", + level=logging.ERROR, + ) + return diff --git a/vulnerabilities/pipes/advisory.py b/vulnerabilities/pipes/advisory.py index 46f8b1ed3..d5d88fbfd 100644 --- a/vulnerabilities/pipes/advisory.py +++ b/vulnerabilities/pipes/advisory.py @@ -22,6 +22,11 @@ from vulnerabilities.importer import AdvisoryData from vulnerabilities.improver import MAX_CONFIDENCE from vulnerabilities.models import Advisory +from vulnerabilities.models import AdvisoryAlias +from vulnerabilities.models import AdvisoryReference +from vulnerabilities.models import AdvisorySeverity +from vulnerabilities.models import AdvisoryV2 +from vulnerabilities.models import AdvisoryWeakness from vulnerabilities.models import AffectedByPackageRelatedVulnerability from vulnerabilities.models import Alias from vulnerabilities.models import FixingPackageRelatedVulnerability @@ -38,6 +43,61 @@ def get_or_create_aliases(aliases: List) -> QuerySet: return Alias.objects.filter(alias__in=aliases) +from django.db.models import Q + + +def get_or_create_advisory_aliases(aliases: List[str]) -> List[AdvisoryAlias]: + existing = AdvisoryAlias.objects.filter(alias__in=aliases) + existing_aliases = {a.alias for a in existing} + + to_create = [AdvisoryAlias(alias=alias) for alias in aliases if alias not in existing_aliases] + AdvisoryAlias.objects.bulk_create(to_create, ignore_conflicts=True) + + return list(AdvisoryAlias.objects.filter(alias__in=aliases)) + + +def get_or_create_advisory_references(references: List) -> List[AdvisoryReference]: + reference_urls = [ref.url for ref in references] + existing = AdvisoryReference.objects.filter(url__in=reference_urls) + existing_urls = {r.url for r in existing} + + to_create = [ + AdvisoryReference(reference_id=ref.reference_id, url=ref.url) + for ref in references + if ref.url not in existing_urls + ] + AdvisoryReference.objects.bulk_create(to_create, ignore_conflicts=True) + + return list(AdvisoryReference.objects.filter(url__in=reference_urls)) + + +def get_or_create_advisory_severities(severities: List) -> QuerySet: + severity_objs = [] + for severity in severities: + published_at = str(severity.published_at) if severity.published_at else None + sev, _ = AdvisorySeverity.objects.get_or_create( + scoring_system=severity.system.identifier, + value=severity.value, + scoring_elements=severity.scoring_elements, + defaults={ + "published_at": published_at, + }, + url=severity.url, + ) + severity_objs.append(sev) + return AdvisorySeverity.objects.filter(id__in=[severity.id for severity in severity_objs]) + + +def get_or_create_advisory_weaknesses(weaknesses: List[str]) -> List[AdvisoryWeakness]: + existing = AdvisoryWeakness.objects.filter(cwe_id__in=weaknesses) + existing_ids = {w.cwe_id for w in existing} + + to_create = [AdvisoryWeakness(cwe_id=w) for w in weaknesses if w not in existing_ids] + AdvisoryWeakness.objects.bulk_create(to_create, ignore_conflicts=True) + + return list(AdvisoryWeakness.objects.filter(cwe_id__in=weaknesses)) + + def insert_advisory(advisory: AdvisoryData, pipeline_id: str, logger: Callable = None): from vulnerabilities.utils import compute_content_id @@ -76,6 +136,64 @@ def insert_advisory(advisory: AdvisoryData, pipeline_id: str, logger: Callable = return advisory_obj +def insert_advisory_v2( + advisory: AdvisoryData, + pipeline_id: str, + get_advisory_packages: Callable, + logger: Callable = None, +): + from vulnerabilities.utils import compute_content_id + + advisory_obj = None + aliases = get_or_create_advisory_aliases(aliases=advisory.aliases) + references = get_or_create_advisory_references(references=advisory.references_v2) + severities = get_or_create_advisory_severities(severities=advisory.severities) + weaknesses = get_or_create_advisory_weaknesses(weaknesses=advisory.weaknesses) + content_id = compute_content_id(advisory_data=advisory) + affecting_packages, fixed_by_packages = get_advisory_packages(advisory_data=advisory) + try: + default_data = { + "datasource_id": pipeline_id, + "advisory_id": advisory.advisory_id, + "avid": f"{pipeline_id}/{advisory.advisory_id}", + "summary": advisory.summary, + "date_published": advisory.date_published, + "date_collected": datetime.now(timezone.utc), + } + + advisory_obj, _ = AdvisoryV2.objects.get_or_create( + unique_content_id=content_id, + url=advisory.url, + defaults=default_data, + ) + related_fields = { + "aliases": aliases, + "references": references, + "severities": severities, + "weaknesses": weaknesses, + "fixed_by_packages": fixed_by_packages, + "affecting_packages": affecting_packages, + } + + for field_name, values in related_fields.items(): + if values: + getattr(advisory_obj, field_name).add(*values) + + except Advisory.MultipleObjectsReturned: + logger.error( + f"Multiple Advisories returned: unique_content_id: {content_id}, url: {advisory.url}, advisory: {advisory!r}" + ) + raise + except Exception as e: + if logger: + logger( + f"Error while processing {advisory!r} with aliases {advisory.aliases!r}: {e!r} \n {traceback_format_exc()}", + level=logging.ERROR, + ) + + return advisory_obj + + @transaction.atomic def import_advisory( advisory: Advisory, diff --git a/vulnerabilities/risk.py b/vulnerabilities/risk.py index a4508a03f..56f19171e 100644 --- a/vulnerabilities/risk.py +++ b/vulnerabilities/risk.py @@ -36,6 +36,8 @@ def get_weighted_severity(severities): score_list = [] for severity in severities: + if not severity.url: + continue parsed_url = urlparse(severity.url) severity_source = parsed_url.netloc.replace("www.", "", 1) weight = WEIGHT_CONFIG.get(severity_source, DEFAULT_WEIGHT) @@ -112,3 +114,19 @@ def compute_package_risk(package): return return round(max(result), 1) + + +def compute_package_risk_v2(package): + """ + Calculate the risk for a package by iterating over all vulnerabilities that affects this package + and determining the associated risk. + """ + result = [] + for advisory in package.affected_by_advisories.all(): + if risk := advisory.risk_score: + result.append(float(risk)) + + if not result: + return + + return round(max(result), 1) diff --git a/vulnerabilities/templates/advisory_detail.html b/vulnerabilities/templates/advisory_detail.html new file mode 100644 index 000000000..8a386d4ec --- /dev/null +++ b/vulnerabilities/templates/advisory_detail.html @@ -0,0 +1,614 @@ +{% extends "base.html" %} +{% load humanize %} +{% load widget_tweaks %} +{% load static %} +{% load show_cvss %} +{% load url_filters %} + +{% block title %} +VulnerableCode Advisory Details - {{ advisory.advisory_id }} +{% endblock %} + +{% block content %} + +{% if advisory %} +
+
+
+
+ Advisory details: + + {{advisory.datasource_id}} / {{ advisory.advisory_id }} + +
+
+ + +
+
+
+ + + + + + + + + + + + + + + {% if severity_score_range %} + + + + {% endif %} + + + + + + + + + + + + + + + + + + + + + + + + + +
Advisory ID {{ advisory.datasource_id }}/{{ advisory.advisory_id }}
Aliases + {% for alias in aliases %} + {% if alias.url %} + {{ alias }} + {% else %} + {{ alias }} + {% endif %} +
+ {% endfor %} +
Summary{{ advisory.summary }} +
Severity score range{{ severity_score_range }} +
Status{{ status }}
+ Exploitability + {{ advisory.exploitability }} +
Weighted Severity + {{ advisory.weighted_severity }} +
Risk + {{ advisory.risk_score }} +
Affected and Fixed Packages + + Package Details + +
+
+ Weaknesses ({{ weaknesses|length }}) +
+
+ + {% for weakness in weaknesses %} + + + + + + {% empty %} + + + + {% endfor %} +
CWE-{{ weakness.cwe_id }} + + {{ weakness.name }} + +
+ There are no known CWE. +
+
+
+
+ + +
+ + + + + + + {% for severity in severities %} + + + + + + {% empty %} + + + + {% endfor %} +
System Score Found at
{{ severity.scoring_system }}{{ severity.value }} + {{ severity.url }} +
+ There are no known severity scores. +
+
+ +
+ + + + + + + + + {% for ref in references %} + + {% if ref.reference_id %} + + {% else %} + + {% endif %} + + {% if ref.reference_type %} + + {% else %} + + {% endif %} + + + + {% empty %} + + + + {% endfor %} +
Reference id Reference type URL
{{ ref.reference_id }}{{ ref.get_reference_type_display }}{{ ref.url }}
+ There are no known references. +
+
+ +
+ {% for exploit in advisory.exploits.all %} + + + + + + + + {% if exploit.date_added %} + + + + + {% endif %} + {% if exploit.description %} + + + + + {% endif %} + {% if exploit.required_action %} + + + + + {% endif %} + {% if exploit.due_date %} + + + + + {% endif %} + {% if exploit.notes %} + + + + + {% endif %} + {% if exploit.known_ransomware_campaign_use is not None %} + + + + + {% endif %} + {% if exploit.source_date_published %} + + + + + {% endif %} + {% if exploit.exploit_type %} + + + + + {% endif %} + {% if exploit.platform %} + + + + + {% endif %} + {% if exploit.source_date_updated %} + + + + + {% endif %} + + {% if exploit.source_url %} + + + + + {% endif %} + +
Data source {{ exploit.data_source }}
+ + Date added + + {{ exploit.date_added }}
+ + Description + + {{ exploit.description }}
+ + Required action + + {{ exploit.required_action }}
+ + Due date + + {{ exploit.due_date }}
+ + Note + +
{{ exploit.notes }}
+ + Ransomware campaign use + + {{ exploit.known_ransomware_campaign_use|yesno:"Known,Unknown" }}
+ + Source publication date + + {{ exploit.source_date_published }}
+ + Exploit type + + {{ exploit.exploit_type }}
+ + Platform + + {{ exploit.platform }}
+ + Source update date + + {{ exploit.source_date_updated }}
+ + Source URL + + {{ exploit.source_url }}
+ {% empty %} + + + No exploits are available. + + + {% endfor %} +
+ +
+ {% for severity_vector in severity_vectors %} + {% if severity_vector.vector.version == '2.0' %} + Vector: {{ severity_vector.vector.vectorString }} Found at {{ severity_vector.origin }} + + + + + + + + + + + + + + + + + + + +
Exploitability (E)Access Vector (AV)Access Complexity (AC)Authentication (Au)Confidentiality Impact (C)Integrity Impact (I)Availability Impact (A)
{{ severity_vector.vector.exploitability|cvss_printer:"high,functional,unproven,proof_of_concept,not_defined" }}{{ severity_vector.vector.accessVector|cvss_printer:"local,adjacent_network,network" }}{{ severity_vector.vector.accessComplexity|cvss_printer:"high,medium,low" }}{{ severity_vector.vector.authentication|cvss_printer:"multiple,single,none" }}{{ severity_vector.vector.confidentialityImpact|cvss_printer:"none,partial,complete" }}{{ severity_vector.vector.integrityImpact|cvss_printer:"none,partial,complete" }}{{ severity_vector.vector.availabilityImpact|cvss_printer:"none,partial,complete" }}
+ {% elif severity_vector.vector.version == '3.1' or severity_vector.vector.version == '3.0'%} + Vector: {{ severity_vector.vector.vectorString }} Found at {{ severity_vector.origin }} + + + + + + + + + + + + + + + + + + + + + +
Attack Vector (AV)Attack Complexity (AC)Privileges Required (PR)User Interaction (UI)Scope (S)Confidentiality Impact (C)Integrity Impact (I)Availability Impact (A)
{{ severity_vector.vector.attackVector|cvss_printer:"network,adjacent_network,local,physical"}}{{ severity_vector.vector.attackComplexity|cvss_printer:"low,high" }}{{ severity_vector.vector.privilegesRequired|cvss_printer:"none,low,high" }}{{ severity_vector.vector.userInteraction|cvss_printer:"none,required"}}{{ severity_vector.vector.scope|cvss_printer:"unchanged,changed" }}{{ severity_vector.vector.confidentialityImpact|cvss_printer:"high,low,none" }}{{ severity_vector.vector.integrityImpact|cvss_printer:"high,low,none" }}{{ severity_vector.vector.availabilityImpact|cvss_printer:"high,low,none" }}
+ {% elif severity_vector.vector.version == '4' %} + Vector: {{ severity_vector.vector.vectorString }} Found at {{ severity_vector.origin }} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Attack Vector (AV)Attack Complexity (AC)Attack Requirements (AT)Privileges Required (PR)User Interaction (UI)Vulnerable System Impact Confidentiality (VC)Vulnerable System Impact Integrity (VI)Vulnerable System Impact Availability (VA)Subsequent System Impact Confidentiality (SC)Subsequent System Impact Integrity (SI)Subsequent System Impact Availability (SA)
{{ severity_vector.vector.attackVector|cvss_printer:"network,adjacent,local,physical"}}{{ severity_vector.vector.attackComplexity|cvss_printer:"low,high" }}{{ severity_vector.vector.attackRequirement|cvss_printer:"none,present" }}{{ severity_vector.vector.privilegesRequired|cvss_printer:"none,low,high" }}{{ severity_vector.vector.userInteraction|cvss_printer:"none,passive,active"}}{{ severity_vector.vector.vulnerableSystemImpactConfidentiality|cvss_printer:"high,low,none" }}{{ severity_vector.vector.vulnerableSystemImpactIntegrity|cvss_printer:"high,low,none" }}{{ severity_vector.vector.vulnerableSystemImpactAvailability|cvss_printer:"high,low,none" }}{{ severity_vector.vector.subsequentSystemImpactConfidentiality|cvss_printer:"high,low,none" }}{{ severity_vector.vector.subsequentSystemImpactIntegrity|cvss_printer:"high,low,none" }}{{ severity_vector.vector.subsequentSystemImpactAvailability|cvss_printer:"high,low,none" }}
+ {% elif severity_vector.vector.version == 'ssvc' %} +
+ Vector: {{ severity_vector.vector.vectorString }} Found at {{ severity_vector.origin }} +
+ {% endif %} + {% empty %} + + + There are no known vectors. + + + {% endfor %} +
+ + +
+ {% if epss_data %} +
+ Exploit Prediction Scoring System (EPSS) +
+ + + + + + + + + + + {% if epss_data.published_at %} + + + + + {% endif %} + +
+ + Percentile + + {{ epss_data.percentile }}
+ + EPSS Score + + {{ epss_data.score }}
+ + Published At + + {{ epss_data.published_at }}
+ {% else %} +

No EPSS data available for this advisory.

+ {% endif %} +
+ + +
+
+
+{% endif %} + + + + + +{% endblock %} \ No newline at end of file diff --git a/vulnerabilities/templates/advisory_package_details.html b/vulnerabilities/templates/advisory_package_details.html new file mode 100644 index 000000000..0f4c71044 --- /dev/null +++ b/vulnerabilities/templates/advisory_package_details.html @@ -0,0 +1,88 @@ +{% extends "base.html" %} +{% load humanize %} +{% load widget_tweaks %} +{% load static %} +{% load show_cvss %} +{% load url_filters %} + +{% block title %} +VulnerableCode Advisory Package Details - {{ advisory.advisory_id }} +{% endblock %} + +{% block content %} + +{% if advisory %} +
+
+
+
+ Vulnerable and Fixing Package details for Advisory: + + {{ advisory.advisory_id }} + +
+
+
+ + + + + + + + + {% for package in affected_packages %} + + + + + {% empty %} + + + + {% endfor %} + +
AffectedFixed by
+ {{ package.purl }} + + + {% for match in all_affected_fixed_by_matches %} + {% if match.affected_package == package %} + {% if match.matched_fixed_by_packages|length > 0 %} + {% for pkg in match.matched_fixed_by_packages %} + {{ pkg }} +
+ {% endfor %} + {% else %} + There are no reported fixed by versions. + {% endif %} + {% endif %} + {% endfor %} + +
+ This vulnerability is not known to affect any packages. +
+
+
+
+{% endif %} + + + + + +{% endblock %} \ No newline at end of file diff --git a/vulnerabilities/templates/index_v2.html b/vulnerabilities/templates/index_v2.html new file mode 100644 index 000000000..962b5f79f --- /dev/null +++ b/vulnerabilities/templates/index_v2.html @@ -0,0 +1,33 @@ +{% extends "base.html" %} +{% load widget_tweaks %} + +{% block title %} +VulnerableCode Home +{% endblock %} + +{% block content %} +
+
+
+
+ {% include "package_search_box_v2.html" %} +
+
+
+

+ VulnerableCode aggregates software + vulnerabilities from multiple public advisory sources + and presents their details along with their affected + packages and fixed-by packages identified by + Package URLs (PURLs). +

+

+ What's new in this Release: + + Check out latest updates here! + +

+
+
+
+{% endblock %} \ No newline at end of file diff --git a/vulnerabilities/templates/package_details_v2.html b/vulnerabilities/templates/package_details_v2.html new file mode 100644 index 000000000..54cb8ffed --- /dev/null +++ b/vulnerabilities/templates/package_details_v2.html @@ -0,0 +1,365 @@ +{% extends "base.html" %} +{% load humanize %} +{% load widget_tweaks %} +{% load static %} +{% load url_filters %} + +{% block title %} +VulnerableCode Package Details - {{ package.purl }} +{% endblock %} + +{% block content %} +
+ {% include "package_search_box_v2.html"%} +
+ +{% if package %} +
+
+
+
+ Package details: + {{ package.purl }} + +
+
+ +
+ +
+ +
+
+
+ {% if affected_by_advisories|length != 0 %} +
+ {% else %} +
+ {% endif %} + + + + + + + {% if package.is_ghost %} + + + + + {% endif %} + +
+ + purl + + + {{ fixed_package_details.purl.to_string }} +
+ Tags + + + Ghost + +
+
+ {% if affected_by_advisories|length != 0 %} + +
+ + + + + + + + + + + + + + + +
+ Next non-vulnerable version + + {% if fixed_package_details.next_non_vulnerable.version %} + {{ fixed_package_details.next_non_vulnerable.version }} + {% else %} + None. + {% endif %} +
+ Latest non-vulnerable version + + {% if fixed_package_details.latest_non_vulnerable.version %} + {{ fixed_package_details.latest_non_vulnerable.version }} + {% else %} + None. + {% endif %} +
+ Risk score + + {{package.risk_score}} +
+
+ + {% endif %} + +
+
+ Vulnerabilities affecting this package ({{ affected_by_advisories|length }}) +
+ + + + + + + + + + + + + + {% for advisory in affected_by_advisories %} + + + + + + + + {% empty %} + + + + {% endfor %} + +
AdvisorySourceDate PublishedSummaryFixed in package version
+ + {{advisory.avid }} + +
+ {% if advisory.alias|length != 0 %} + Aliases: + {% endif %} +
+ {% for alias in advisory.alias %} + {% if alias.url %} + {{ alias }} +
+ {% else %} + {{ alias }} +
+ {% endif %} + {% endfor %} +
+ {{advisory.url}} + + {{advisory.date_published}} + + {{ advisory.summary }} + + {% if package.purl == fixed_package_details.purl.to_string %} + {% for key, value in fixed_package_details.items %} + {% if key == "advisories" %} + {% for vuln in value %} + {% if vuln.advisory.advisory_id == advisory.advisory_id %} + {% if vuln.fixed_by_package_details is None %} + There are no reported fixed by versions. + {% else %} + {% for fixed_pkg in vuln.fixed_by_package_details %} +
+ {% if fixed_pkg.fixed_by_purl_advisories|length == 0 %} + {{ fixed_pkg.fixed_by_purl.version }} +
+ Subject of 0 other advisories. + {% else %} + {{ fixed_pkg.fixed_by_purl.version }} + {% if fixed_pkg.fixed_by_purl_advisories|length != 1 %} +
+ Subject of {{ fixed_pkg.fixed_by_purl_advisories|length }} other + advisory. + {% else %} +
+ Subject of {{ fixed_pkg.fixed_by_purl_advisories|length }} other + advisory. + {% endif %} + + + {% endif %} +
+ {% endfor %} + {% endif %} + {% endif %} + {% endfor %} + {% endif %} + {% endfor %} + {% endif %} +
+ This package is not known to be subject of any advisories. +
+
+ +
+
+ Vulnerabilities fixed by this package ({{ fixing_advisories|length }}) +
+ + + + + + + + + + + + + {% for advisory in fixing_advisories %} + + + + + + + + {% empty %} + + + + {% endfor %} + +
AdvisorySourceDate PublishedSummaryAliases
+ + {{advisory.avid }} + + + {{advisory.url}} + + {{advisory.date_published}} + + {{ advisory.summary }} + + {% for alias in advisory.alias %} + {% if alias.url %} + {{ alias }} +
+ {% else %} + {{ alias }} +
+ {% endif %} + {% endfor %} +
+ This package is not known to fix any advisories. +
+ +
+
+
+
+ + +
+
+
+
+ +{% endif %} +{% endblock %} diff --git a/vulnerabilities/templates/package_search_box_v2.html b/vulnerabilities/templates/package_search_box_v2.html new file mode 100644 index 000000000..e78d400e6 --- /dev/null +++ b/vulnerabilities/templates/package_search_box_v2.html @@ -0,0 +1,48 @@ +{% load widget_tweaks %} +
+
+ Search for packages + +
+
+
+
+
+
+ {{ package_search_form.search|add_class:"input" }} +
+
+ +
+
+
+
+
+
diff --git a/vulnerabilities/templates/packages_v2.html b/vulnerabilities/templates/packages_v2.html new file mode 100644 index 000000000..fe2b05abe --- /dev/null +++ b/vulnerabilities/templates/packages_v2.html @@ -0,0 +1,84 @@ +{% extends "base.html" %} +{% load humanize %} +{% load widget_tweaks %} + +{% block title %} +VulnerableCode Package Search +{% endblock %} + +{% block content %} +
+ {% include "package_search_box_v2.html" %} +
+ +{% if search %} +
+
+
+
+ {{ page_obj.paginator.count|intcomma }} results +
+ {% if is_paginated %} + {% include 'includes/pagination.html' with page_obj=page_obj %} + {% endif %} +
+
+
+ +
+
+ + + + + + + + + + {% for package in page_obj %} + + + + + + {% empty %} + + + + {% endfor %} + +
+ + Package URL + + + + Affected by vulnerabilities + + + + Fixing vulnerabilities + +
+ {{ package.purl }} + {{ package.vulnerability_count }}{{ package.patched_vulnerability_count }}
+ No Package found. +
+
+ + {% if is_paginated %} + {% include 'includes/pagination.html' with page_obj=page_obj %} + {% endif %} + +
+{% endif %} +{% endblock %} diff --git a/vulnerabilities/tests/pipelines/test_apache_httpd_importer_pipeline_v2.py b/vulnerabilities/tests/pipelines/test_apache_httpd_importer_pipeline_v2.py new file mode 100644 index 000000000..94454c473 --- /dev/null +++ b/vulnerabilities/tests/pipelines/test_apache_httpd_importer_pipeline_v2.py @@ -0,0 +1,161 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import pytest +import requests + +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.pipelines.v2_importers.apache_httpd_importer import ApacheHTTPDImporterPipeline +from vulnerabilities.pipelines.v2_importers.apache_httpd_importer import fetch_links +from vulnerabilities.pipelines.v2_importers.apache_httpd_importer import get_weaknesses + + +# Dummy responses +class DummyResponseContent: + def __init__(self, content_bytes): + self.content = content_bytes + + +class DummyResponseJSON: + def __init__(self, json_data): + self._json = json_data + + def json(self): + return self._json + + +# Tests for fetch_links +@pytest.fixture(autouse=True) +def no_requests(monkeypatch): + # Ensure other tests don't hit real HTTP + monkeypatch.setattr( + requests, + "get", + lambda url: (_ for _ in ()).throw(AssertionError(f"Unexpected HTTP GET call to {url}")), + ) + + +def test_fetch_links_filters_and_resolves(monkeypatch): + html = """ + + A1 + A2 + TXT + + """ + base_url = "https://example.com/base/" + # Monkeypatch HTTP GET for HTML + def fake_get(url): + assert url == base_url + return DummyResponseContent(html.encode("utf-8")) + + monkeypatch.setattr(requests, "get", fake_get) + links = fetch_links(base_url) + assert len(links) == 2 + assert links == [ + "https://example.com/base/advisory1.json", + "https://example.com/json/advisory2.json", + ] + + +# Tests for get_weaknesses +def test_get_weaknesses_with_cna_structure(): + mock_data = { + "containers": {"cna": {"problemTypes": [{"descriptions": [{"cweId": "CWE-125"}]}]}} + } + result = get_weaknesses(mock_data) + assert result == [125] + + +def test_get_weaknesses_with_data_meta_structure(): + mock_data = { + "CVE_data_meta": {"ID": "CVE-2020-0001"}, + "problemtype": { + "problemtype_data": [ + {"description": [{"value": "CWE-190 Integer Overflow"}]}, + {"description": [{"value": "CWE-200 Some Issue"}]}, + ] + }, + } + result = get_weaknesses(mock_data) + assert set(result) == {190, 200} + + +# Tests for ApacheHTTPDImporterPipeline +class DummyPipeline(ApacheHTTPDImporterPipeline): + # Expose protected methods for testing + pass + + +@pytest.fixture +def pipeline(monkeypatch): + pipe = DummyPipeline() + # Prevent real HTTP in fetch_links + monkeypatch.setattr( + "vulnerabilities.pipelines.v2_importers.apache_httpd_importer.fetch_links", + lambda url: ["u1", "u2"], + ) + return pipe + + +def test_advisories_count(monkeypatch, pipeline): + # Should use mocked links + count = pipeline.advisories_count() + assert count == 2 + + +def test_collect_advisories_and_to_advisory(monkeypatch, pipeline): + # Prepare two dummy JSONs + sample1 = { + "CVE_data_meta": {"ID": "CVE-1"}, + "description": {"description_data": [{"lang": "eng", "value": "Test desc"}]}, + "impact": [{"other": "5.0"}], + "affects": {"vendor": {"vendor_data": []}}, + "timeline": [], + } + sample2 = { + "cveMetadata": {"cveId": "CVE-2"}, + "description": {"description_data": [{"lang": "eng", "value": "Other desc"}]}, + "impact": [{"other": "7.5"}], + "affects": {"vendor": {"vendor_data": []}}, + "timeline": [], + } + # Monkeypatch requests.get to return JSON + def fake_get(u): + if u == "u1": + return DummyResponseJSON(sample1) + elif u == "u2": + return DummyResponseJSON(sample2) + else: + raise AssertionError(f"Unexpected URL {u}") + + monkeypatch.setattr(requests, "get", fake_get) + advisories = list(pipeline.collect_advisories()) + assert len(advisories) == 2 + # Validate first advisory + adv1 = advisories[0] + assert isinstance(adv1, AdvisoryData) + assert adv1.advisory_id == "CVE-1" + assert adv1.summary == "Test desc" + assert adv1.severities and adv1.severities[0].value == "5.0" + assert adv1.url.endswith("CVE-1.json") + # Validate second advisory + adv2 = advisories[1] + assert adv2.advisory_id == "CVE-2" + assert adv2.summary == "Other desc" + assert adv2.severities[0].value == "7.5" + + +# Test version range conversion error +def test_to_version_ranges_unknown_comparator(pipeline): + # version_data with bad comparator + versions_data = [{"version_value": "1.0.0", "version_affected": "<>"}] + fixed_versions = [] + with pytest.raises(ValueError): + pipeline.to_version_ranges(versions_data, fixed_versions) diff --git a/vulnerabilities/tests/pipelines/test_collect_commits_v2.py b/vulnerabilities/tests/pipelines/test_collect_commits_v2.py new file mode 100644 index 000000000..dddec9084 --- /dev/null +++ b/vulnerabilities/tests/pipelines/test_collect_commits_v2.py @@ -0,0 +1,131 @@ +from datetime import datetime +from unittest.mock import patch + +import pytest + +from vulnerabilities.models import AdvisoryReference +from vulnerabilities.models import AdvisoryV2 +from vulnerabilities.models import CodeFixV2 +from vulnerabilities.models import PackageV2 +from vulnerabilities.pipelines.v2_improvers.collect_commits import CollectFixCommitsPipeline +from vulnerabilities.pipelines.v2_improvers.collect_commits import is_vcs_url +from vulnerabilities.pipelines.v2_improvers.collect_commits import is_vcs_url_already_processed +from vulnerabilities.pipelines.v2_improvers.collect_commits import normalize_vcs_url + + +@pytest.mark.parametrize( + "url,expected", + [ + ("git://github.com/angular/di.js.git", True), + ("github:user/repo", True), + ("user/repo", True), + ("https://github.com/user/repo.git", True), + ("git@github.com:user/repo.git", True), + ("ftp://example.com/not-a-repo", False), + ("random-string", False), + ("https://example.com/not-a-repo", False), + ], +) +def test_is_vcs_url(url, expected): + assert is_vcs_url(url) is expected + + +@pytest.mark.parametrize( + "url,normalized", + [ + ("git@github.com:user/repo.git", "https://github.com/user/repo.git"), + ("github:user/repo", "https://github.com/user/repo"), + ("bitbucket:example/repo", "https://bitbucket.org/example/repo"), + ("user/repo", "https://github.com/user/repo"), + ("https://gitlab.com/foo/bar.git", "https://gitlab.com/foo/bar.git"), + ], +) +def test_normalize_vcs_url(url, normalized): + assert normalize_vcs_url(url) == normalized + + +@pytest.mark.django_db +def test_is_vcs_url_already_processed_true(): + advisory = AdvisoryV2.objects.create( + advisory_id="CVE-2025-9999", + datasource_id="test-ds", + avid="test-ds/CVE-2025-9999", + url="https://example.com/advisory/CVE-2025-9999", + unique_content_id="11111", + date_collected=datetime.now(), + ) + package = PackageV2.objects.create( + type="bar", + name="foo", + version="1.0", + ) + advisory.affecting_packages.add(package) + advisory.save() + CodeFixV2.objects.create( + commits=["https://github.com/user/repo/commit/abc123"], + advisory=advisory, + affected_package=package, + ) + assert is_vcs_url_already_processed("https://github.com/user/repo/commit/abc123") is True + + +@pytest.mark.django_db +def test_collect_fix_commits_pipeline_creates_entry(): + advisory = AdvisoryV2.objects.create( + advisory_id="CVE-2025-1000", + datasource_id="test-ds", + avid="test-ds/CVE-2025-1000", + url="https://example.com/advisory/CVE-2025-1000", + unique_content_id="11111", + date_collected=datetime.now(), + ) + package = PackageV2.objects.create( + type="foo", + name="testpkg", + version="1.0", + ) + reference = AdvisoryReference.objects.create( + url="https://github.com/test/testpkg/commit/abc123" + ) + advisory.affecting_packages.add(package) + advisory.references.add(reference) + advisory.save() + + pipeline = CollectFixCommitsPipeline() + pipeline.collect_and_store_fix_commits() + + codefixes = CodeFixV2.objects.all() + assert codefixes.count() == 1 + fix = codefixes.first() + assert "abc123" in fix.commits[0] + assert fix.advisory == advisory + assert fix.affected_package == package + + +@pytest.mark.django_db +def test_collect_fix_commits_pipeline_skips_non_commit_urls(): + advisory = AdvisoryV2.objects.create( + advisory_id="CVE-2025-2000", + datasource_id="test-ds", + avid="test-ds/CVE-2025-2000", + url="https://example.com/advisory/CVE-2025-2000", + unique_content_id="11111", + date_collected=datetime.now(), + ) + package = PackageV2.objects.create( + type="pypi", + name="otherpkg", + version="2.0", + ) + + advisory.affecting_packages.add(package) + + reference = AdvisoryReference.objects.create(url="https://github.com/test/testpkg/issues/12") + + advisory.references.add(reference) + advisory.save() + + pipeline = CollectFixCommitsPipeline() + pipeline.collect_and_store_fix_commits() + + assert CodeFixV2.objects.count() == 0 diff --git a/vulnerabilities/tests/pipelines/test_compute_package_risk_v2.py b/vulnerabilities/tests/pipelines/test_compute_package_risk_v2.py new file mode 100644 index 000000000..4dbfb222a --- /dev/null +++ b/vulnerabilities/tests/pipelines/test_compute_package_risk_v2.py @@ -0,0 +1,69 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# +from datetime import datetime +from decimal import Decimal + +import pytest + +from vulnerabilities.models import AdvisorySeverity +from vulnerabilities.models import AdvisoryV2 +from vulnerabilities.models import AdvisoryWeakness +from vulnerabilities.models import PackageV2 +from vulnerabilities.pipelines.v2_improvers.compute_package_risk import ComputePackageRiskPipeline +from vulnerabilities.severity_systems import CVSSV3 +from vulnerabilities.severity_systems import GENERIC + + +@pytest.mark.django_db +def test_simple_risk_pipeline(): + pkg = PackageV2.objects.create(type="pypi", name="foo", version="2.3.0") + assert PackageV2.objects.count() == 1 + + adv = AdvisoryV2( + advisory_id="VCID-Existing", + summary="vulnerability description here", + datasource_id="ds", + avid="ds/VCID-Existing", + unique_content_id="ajkef", + url="https://test.com", + date_collected=datetime.now(), + ) + adv.save() + + severity1 = AdvisorySeverity.objects.create( + url="https://nvd.nist.gov/vuln/detail/CVE-xxxx-xxx1", + scoring_system=CVSSV3.identifier, + scoring_elements="CVSS:3.0/AV:P/AC:H/PR:H/UI:R/S:C/C:H/I:H/A:N/E:H/RL:O/RC:R/CR:H/MAC:H/MC:L", + value="6.5", + ) + + severity2 = AdvisorySeverity.objects.create( + url="https://nvd.nist.gov/vuln/detail/CVE-xxxx-xxx1", + scoring_system=GENERIC.identifier, + value="MODERATE", # 6.9 + ) + adv.severities.add(severity1) + adv.severities.add(severity2) + + weaknesses = AdvisoryWeakness.objects.create(cwe_id=119) + adv.weaknesses.add(weaknesses) + + adv.affecting_packages.add(pkg) + adv.save() + + improver = ComputePackageRiskPipeline() + improver.execute() + + assert pkg.risk_score is None + + improver = ComputePackageRiskPipeline() + improver.execute() + + pkg = PackageV2.objects.get(type="pypi", name="foo", version="2.3.0") + assert pkg.risk_score == Decimal("3.1") diff --git a/vulnerabilities/tests/pipelines/test_compute_version_rank_v2.py b/vulnerabilities/tests/pipelines/test_compute_version_rank_v2.py new file mode 100644 index 000000000..eb8d3aebd --- /dev/null +++ b/vulnerabilities/tests/pipelines/test_compute_version_rank_v2.py @@ -0,0 +1,70 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +from unittest.mock import patch + +import pytest +from univers.versions import Version + +from vulnerabilities.models import PackageV2 +from vulnerabilities.pipelines.v2_improvers.computer_package_version_rank import ( + ComputeVersionRankPipeline, +) + + +@pytest.mark.django_db +class TestComputeVersionRankPipeline: + @pytest.fixture + def pipeline(self): + return ComputeVersionRankPipeline() + + @pytest.fixture + def packages(self, db): + package_type = "pypi" + namespace = "test_namespace" + name = "test_package" + PackageV2.objects.create(type=package_type, namespace=namespace, name=name, version="1.0.0") + PackageV2.objects.create(type=package_type, namespace=namespace, name=name, version="1.1.0") + PackageV2.objects.create(type=package_type, namespace=namespace, name=name, version="0.9.0") + return PackageV2.objects.filter(type=package_type, namespace=namespace, name=name) + + def test_compute_and_store_version_rank(self, pipeline, packages): + with patch.object(pipeline, "log") as mock_log: + pipeline.compute_and_store_version_rank() + assert mock_log.call_count > 0 + for package in packages: + assert package.version_rank is not None + + def test_update_version_rank_for_group(self, pipeline, packages): + with patch.object(PackageV2.objects, "bulk_update") as mock_bulk_update: + pipeline.update_version_rank_for_group(packages) + mock_bulk_update.assert_called_once() + updated_packages = mock_bulk_update.call_args[0][0] + assert len(updated_packages) == len(packages) + for idx, package in enumerate(sorted(packages, key=lambda p: Version(p.version))): + assert updated_packages[idx].version_rank == idx + + def test_sort_packages_by_version(self, pipeline, packages): + sorted_packages = pipeline.sort_packages_by_version(packages) + versions = [p.version for p in sorted_packages] + assert versions == sorted(versions, key=Version) + + def test_sort_packages_by_version_empty(self, pipeline): + assert pipeline.sort_packages_by_version([]) == [] + + def test_sort_packages_by_version_invalid_scheme(self, pipeline, packages): + for package in packages: + package.type = "invalid" + assert pipeline.sort_packages_by_version(packages) == [] + + def test_compute_and_store_version_rank_invalid_scheme(self, pipeline): + PackageV2.objects.create(type="invalid", namespace="test", name="package", version="1.0.0") + with patch.object(pipeline, "log") as mock_log: + pipeline.compute_and_store_version_rank() + mock_log.assert_any_call("Successfully populated `version_rank` for all packages.") diff --git a/vulnerabilities/tests/pipelines/test_elixir_security_v2_importer.py b/vulnerabilities/tests/pipelines/test_elixir_security_v2_importer.py new file mode 100644 index 000000000..96359ca3c --- /dev/null +++ b/vulnerabilities/tests/pipelines/test_elixir_security_v2_importer.py @@ -0,0 +1,108 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import shutil +from pathlib import Path +from unittest.mock import MagicMock +from unittest.mock import patch + +import pytest + +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.pipelines.v2_importers.elixir_security_importer import ( + ElixirSecurityImporterPipeline, +) + + +@pytest.fixture +def mock_vcs_response(tmp_path): + repo_dir = tmp_path / "repo" + repo_dir.mkdir() + packages_dir = repo_dir / "packages" / "some_package" + packages_dir.mkdir(parents=True) + + advisory_file = packages_dir / "CVE-2022-9999.yml" + advisory_file.write_text( + """ + cve: "2022-9999" + package: "plug" + description: "Cross-site scripting vulnerability in plug < 1.11.1" + patched_versions: + - ">= 1.11.1" + unaffected_versions: + - "< 1.0.0" + disclosure_date: "2022-12-01" + link: "https://github.com/plug/plug/security/advisories/GHSA-xxxx-yyyy" + """ + ) + + mock = MagicMock() + mock.dest_dir = str(repo_dir) + mock.delete = MagicMock() + return mock + + +@pytest.fixture +def mock_fetch_via_vcs(mock_vcs_response): + with patch( + "vulnerabilities.pipelines.v2_importers.elixir_security_importer.fetch_via_vcs" + ) as mock: + mock.return_value = mock_vcs_response + yield mock + + +def test_advisories_count(mock_fetch_via_vcs, mock_vcs_response): + importer = ElixirSecurityImporterPipeline() + importer.clone() + count = importer.advisories_count() + assert count == 1 + + +def test_collect_advisories(mock_fetch_via_vcs, mock_vcs_response): + importer = ElixirSecurityImporterPipeline() + importer.clone() + advisories = list(importer.collect_advisories()) + + assert len(advisories) == 1 + + advisory: AdvisoryData = advisories[0] + assert advisory.advisory_id == "CVE-2022-9999" + assert advisory.summary.startswith("Cross-site scripting vulnerability") + assert advisory.affected_packages[0].package.name == "plug" + assert advisory.affected_packages[0].package.type == "hex" + assert ( + advisory.references_v2[0].url + == "https://github.com/plug/plug/security/advisories/GHSA-xxxx-yyyy" + ) + assert advisory.date_published.isoformat().startswith("2022-12-01") + + +def test_collect_advisories_skips_invalid_cve(mock_fetch_via_vcs, tmp_path): + repo_dir = tmp_path / "repo" + packages_dir = repo_dir / "packages" + + if packages_dir.exists(): + shutil.rmtree(packages_dir) + packages_dir.mkdir(parents=True, exist_ok=True) + + advisory_file = packages_dir / "bad_advisory.yml" + advisory_file.write_text("cve: BAD-ID\npackage: x\n") + + mock_response = MagicMock() + mock_response.dest_dir = str(repo_dir) + mock_response.delete = MagicMock() + + with patch( + "vulnerabilities.pipelines.v2_importers.elixir_security_importer.fetch_via_vcs" + ) as mock: + mock.return_value = mock_response + importer = ElixirSecurityImporterPipeline() + importer.clone() + advisories = list(importer.collect_advisories()) + assert len(advisories) == 0 diff --git a/vulnerabilities/tests/pipelines/test_enhance_with_exploitdb_v2.py b/vulnerabilities/tests/pipelines/test_enhance_with_exploitdb_v2.py new file mode 100644 index 000000000..865356158 --- /dev/null +++ b/vulnerabilities/tests/pipelines/test_enhance_with_exploitdb_v2.py @@ -0,0 +1,56 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import os +from datetime import datetime +from unittest import mock +from unittest.mock import Mock + +import pytest + +from vulnerabilities.models import AdvisoryAlias +from vulnerabilities.models import AdvisoryExploit +from vulnerabilities.models import AdvisoryV2 +from vulnerabilities.pipelines.v2_improvers.enhance_with_exploitdb import ExploitDBImproverPipeline + +BASE_DIR = os.path.dirname(os.path.abspath(__file__)) +TEST_DATA = os.path.join(BASE_DIR, "../test_data", "exploitdb_improver/files_exploits.csv") + + +@pytest.mark.django_db +@mock.patch("requests.get") +def test_exploit_db_improver(mock_get): + mock_response = Mock(status_code=200) + with open(TEST_DATA, "r") as f: + mock_response.text = f.read() + mock_get.return_value = mock_response + + improver = ExploitDBImproverPipeline() + + # Run the improver when there is no matching aliases + improver.execute() + + assert AdvisoryExploit.objects.count() == 0 + + adv1 = AdvisoryV2.objects.create( + advisory_id="VCIO-123-2002", + datasource_id="ds", + avid="ds/VCIO-123-2002", + unique_content_id="i3giu", + url="https://test.com", + date_collected=datetime.now(), + ) + + alias = AdvisoryAlias.objects.create(alias="CVE-2009-3699") + + adv1.aliases.add(alias) + + # Run Exploit-DB Improver again when there are matching aliases. + improver.execute() + assert AdvisoryExploit.objects.count() == 1 diff --git a/vulnerabilities/tests/pipelines/test_enhance_with_kev_v2.py b/vulnerabilities/tests/pipelines/test_enhance_with_kev_v2.py new file mode 100644 index 000000000..bd58fa5fd --- /dev/null +++ b/vulnerabilities/tests/pipelines/test_enhance_with_kev_v2.py @@ -0,0 +1,57 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import os +from datetime import datetime +from unittest import mock +from unittest.mock import Mock + +import pytest + +from vulnerabilities.models import AdvisoryAlias +from vulnerabilities.models import AdvisoryExploit +from vulnerabilities.models import AdvisoryV2 +from vulnerabilities.pipelines.v2_improvers.enhance_with_kev import VulnerabilityKevPipeline +from vulnerabilities.utils import load_json + +BASE_DIR = os.path.dirname(os.path.abspath(__file__)) +TEST_DATA = os.path.join(BASE_DIR, "../test_data", "kev_data.json") + + +@pytest.mark.django_db +@mock.patch("requests.get") +def test_kev_improver(mock_get): + mock_response = Mock(status_code=200) + mock_response.json.return_value = load_json(TEST_DATA) + mock_get.return_value = mock_response + + improver = VulnerabilityKevPipeline() + + # Run the improver when there is no matching aliases + improver.execute() + + assert AdvisoryExploit.objects.count() == 0 + + adv1 = AdvisoryV2.objects.create( + advisory_id="VCIO-123-2002", + datasource_id="ds", + avid="ds/VCIO-123-2002", + unique_content_id="i3giu", + url="https://test.com", + date_collected=datetime.now(), + ) + adv1.save() + + alias = AdvisoryAlias.objects.create(alias="CVE-2021-38647") + + adv1.aliases.add(alias) + + # Run Kev Improver again when there are matching aliases. + improver.execute() + assert AdvisoryExploit.objects.count() == 1 diff --git a/vulnerabilities/tests/pipelines/test_enhance_with_metasploit_v2.py b/vulnerabilities/tests/pipelines/test_enhance_with_metasploit_v2.py new file mode 100644 index 000000000..c20437145 --- /dev/null +++ b/vulnerabilities/tests/pipelines/test_enhance_with_metasploit_v2.py @@ -0,0 +1,56 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import os +from datetime import datetime +from unittest import mock +from unittest.mock import Mock + +import pytest + +from vulnerabilities.models import AdvisoryAlias +from vulnerabilities.models import AdvisoryExploit +from vulnerabilities.models import AdvisoryV2 +from vulnerabilities.pipelines.v2_improvers.enhance_with_metasploit import ( + MetasploitImproverPipeline, +) +from vulnerabilities.utils import load_json + +BASE_DIR = os.path.dirname(os.path.abspath(__file__)) +TEST_DATA = os.path.join(BASE_DIR, "../test_data", "metasploit_improver/modules_metadata_base.json") + + +@pytest.mark.django_db +@mock.patch("requests.get") +def test_metasploit_improver(mock_get): + mock_response = Mock(status_code=200) + mock_response.json.return_value = load_json(TEST_DATA) + mock_get.return_value = mock_response + + improver = MetasploitImproverPipeline() + + # Run the improver when there is no matching aliases + improver.execute() + assert AdvisoryExploit.objects.count() == 0 + + adv1 = AdvisoryV2.objects.create( + advisory_id="VCIO-123-2002", + datasource_id="ds", + avid="ds/VCIO-123-2002", + unique_content_id="i3giu", + url="https://test.com", + date_collected=datetime.now(), + ) + alias = AdvisoryAlias.objects.create(alias="CVE-2007-4387") + + adv1.aliases.add(alias) + + # Run metasploit Improver again when there are matching aliases. + improver.execute() + assert AdvisoryExploit.objects.count() == 1 diff --git a/vulnerabilities/tests/pipelines/test_flag_ghost_packages_v2.py b/vulnerabilities/tests/pipelines/test_flag_ghost_packages_v2.py new file mode 100644 index 000000000..d082fdc3a --- /dev/null +++ b/vulnerabilities/tests/pipelines/test_flag_ghost_packages_v2.py @@ -0,0 +1,111 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +from unittest.mock import patch + +import pytest +from packageurl import PackageURL + +from vulnerabilities.models import PackageV2 +from vulnerabilities.pipelines.v2_improvers.flag_ghost_packages import ( + detect_and_flag_ghost_packages, +) +from vulnerabilities.pipelines.v2_improvers.flag_ghost_packages import flag_ghost_packages + + +@pytest.mark.django_db +def test_flag_ghost_package_marked_correctly(): + pkg = PackageV2.objects.create( + type="pypi", + namespace=None, + name="requests", + version="999.999.999", + ) + + with patch( + "vulnerabilities.pipelines.v2_improvers.flag_ghost_packages.get_versions" + ) as mock_get_versions: + mock_get_versions.return_value = {"2.25.1", "2.26.0"} + + base_purl = PackageURL(type="pypi", namespace=None, name="requests") + ghost_count = flag_ghost_packages(base_purl, [pkg]) + + pkg.refresh_from_db() + assert ghost_count == 1 + assert pkg.is_ghost is True + + +@pytest.mark.django_db +def test_flag_non_ghost_package_not_marked(): + pkg = PackageV2.objects.create( + type="pypi", + namespace=None, + name="requests", + version="2.26.0", + ) + + with patch( + "vulnerabilities.pipelines.v2_improvers.flag_ghost_packages.get_versions" + ) as mock_get_versions: + mock_get_versions.return_value = {"2.25.1", "2.26.0"} + + base_purl = PackageURL(type="pypi", namespace=None, name="requests") + ghost_count = flag_ghost_packages(base_purl, [pkg]) + + pkg.refresh_from_db() + assert ghost_count == 0 + assert pkg.is_ghost is False + + +@pytest.mark.django_db +def test_flag_ghost_packages_gracefully_handles_version_fetch_failure(): + pkg = PackageV2.objects.create( + type="pypi", + namespace=None, + name="some-lib", + version="1.0.0", + ) + + with patch( + "vulnerabilities.pipelines.v2_improvers.flag_ghost_packages.get_versions" + ) as mock_get_versions: + mock_get_versions.return_value = None + + base_purl = PackageURL(type="pypi", namespace=None, name="some-lib") + ghost_count = flag_ghost_packages(base_purl, [pkg]) + + pkg.refresh_from_db() + assert ghost_count == 0 + assert pkg.is_ghost is False + + +@pytest.mark.django_db +def test_detect_and_flag_ghost_packages(monkeypatch): + ghost_pkg = PackageV2.objects.create(type="pypi", name="fakepkg", version="9.9.9") + real_pkg = PackageV2.objects.create(type="pypi", name="realpkg", version="1.0.0") + + def fake_versions(purl, logger=None): + if purl.name == "realpkg": + return {"1.0.0"} + if purl.name == "fakepkg": + return {"0.1.0", "0.2.0"} + return set() + + monkeypatch.setattr( + "vulnerabilities.pipelines.v2_improvers.flag_ghost_packages.get_versions", + fake_versions, + ) + + detect_and_flag_ghost_packages() + + ghost_pkg.refresh_from_db() + real_pkg.refresh_from_db() + + assert ghost_pkg.is_ghost is True + assert real_pkg.is_ghost is False diff --git a/vulnerabilities/tests/pipelines/test_github_importer_v2.py b/vulnerabilities/tests/pipelines/test_github_importer_v2.py new file mode 100644 index 000000000..ec3ab5a04 --- /dev/null +++ b/vulnerabilities/tests/pipelines/test_github_importer_v2.py @@ -0,0 +1,174 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +from unittest.mock import patch + +import pytest +from packageurl import PackageURL + +from vulnerabilities.pipelines.v2_importers.github_importer import GitHubAPIImporterPipeline +from vulnerabilities.pipelines.v2_importers.github_importer import get_cwes_from_github_advisory +from vulnerabilities.pipelines.v2_importers.github_importer import get_purl +from vulnerabilities.utils import get_item + + +@pytest.fixture +def mock_fetch(): + with patch( + "vulnerabilities.pipelines.v2_importers.github_importer.utils.fetch_github_graphql_query" + ) as mock: + yield mock + + +def test_advisories_count(mock_fetch): + # Mock the GraphQL query response for advisory count + mock_fetch.return_value = {"data": {"securityVulnerabilities": {"totalCount": 10}}} + + pipeline = GitHubAPIImporterPipeline() + + count = pipeline.advisories_count() + + # Assert that the count is correct + assert count == 10 + + +def test_collect_advisories(mock_fetch): + # Mock advisory data for GitHub + advisory_data = { + "data": { + "securityVulnerabilities": { + "edges": [ + { + "node": { + "advisory": { + "identifiers": [{"type": "GHSA", "value": "GHSA-1234-ABCD"}], + "summary": "Sample advisory description", + "references": [ + {"url": "https://github.com/advisories/GHSA-1234-ABCD"} + ], + "severity": "HIGH", + "cwes": {"nodes": [{"cweId": "CWE-123"}]}, + "publishedAt": "2023-01-01T00:00:00Z", + }, + "firstPatchedVersion": {"identifier": "1.2.3"}, + "package": {"name": "example-package"}, + "vulnerableVersionRange": ">=1.0.0,<=1.2.0", + } + } + ], + "pageInfo": {"hasNextPage": False, "endCursor": None}, + } + } + } + + # Mock the response from GitHub GraphQL query + mock_fetch.return_value = advisory_data + + # Instantiate the pipeline + pipeline = GitHubAPIImporterPipeline() + + # Collect advisories + advisories = list(pipeline.collect_advisories()) + + # Check if advisories were correctly parsed + assert len(advisories) == 1 + advisory = advisories[0] + + # Validate advisory fields + assert advisory.advisory_id == "GHSA-1234-ABCD" + assert advisory.summary == "Sample advisory description" + assert advisory.url == "https://github.com/advisories/GHSA-1234-ABCD" + assert len(advisory.references_v2) == 1 + assert advisory.references_v2[0].reference_id == "GHSA-1234-ABCD" + assert advisory.severities[0].value == "HIGH" + + # Validate affected package and version range + affected_package = advisory.affected_packages[0] + assert isinstance(affected_package.package, PackageURL) + assert affected_package.package.name == "example-package" + + # Check CWE extraction + assert advisory.weaknesses == [123] + + +def test_get_purl(mock_fetch): + # Test for package URL generation + result = get_purl("cargo", "example/package-name") + + # Validate that the correct PackageURL is generated + assert isinstance(result, PackageURL) + assert result.type == "cargo" + assert result.namespace == None + assert result.name == "example/package-name" + + +def test_process_response(mock_fetch): + # Mock advisory data as input for the process_response function + advisory_data = { + "data": { + "securityVulnerabilities": { + "edges": [ + { + "node": { + "advisory": { + "identifiers": [{"type": "GHSA", "value": "GHSA-5678-EFGH"}], + "summary": "Another advisory", + "references": [ + {"url": "https://github.com/advisories/GHSA-5678-EFGH"} + ], + "severity": "MEDIUM", + "cwes": {"nodes": [{"cweId": "CWE-200"}]}, + "publishedAt": "2023-02-01T00:00:00Z", + }, + "firstPatchedVersion": {"identifier": "2.0.0"}, + "package": {"name": "another-package"}, + "vulnerableVersionRange": ">=2.0.0,<=3.0.0", + } + } + ], + "pageInfo": {"hasNextPage": False, "endCursor": None}, + } + } + } + + # Mock the response from GitHub GraphQL query + mock_fetch.return_value = advisory_data + + # Process the mock response + result = list(GitHubAPIImporterPipeline().collect_advisories()) + + # Check the results + assert len(result) == 1 + advisory = result[0] + + # Validate the advisory data + assert advisory.advisory_id == "GHSA-5678-EFGH" + assert advisory.summary == "Another advisory" + assert advisory.url == "https://github.com/advisories/GHSA-5678-EFGH" + + # Check CWE extraction + assert advisory.weaknesses == [200] + + +def test_get_cwes_from_github_advisory(mock_fetch): + # Mock CWEs extraction from GitHub advisory + advisory_data = {"cwes": {"nodes": [{"cweId": "CWE-522"}]}} + + cwes = get_cwes_from_github_advisory(advisory_data) + + # Validate the CWE ID extraction + assert cwes == [522] + + +def test_invalid_package_type_in_get_purl(mock_fetch): + # Test for invalid package type + result = get_purl("invalidpkg", "example/package-name") + + # Assert that None is returned for an invalid package type + assert result is None diff --git a/vulnerabilities/tests/pipelines/test_gitlab_v2_importer.py b/vulnerabilities/tests/pipelines/test_gitlab_v2_importer.py new file mode 100644 index 000000000..6e5c8eb15 --- /dev/null +++ b/vulnerabilities/tests/pipelines/test_gitlab_v2_importer.py @@ -0,0 +1,153 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# + +from pathlib import Path +from unittest.mock import MagicMock +from unittest.mock import patch + +import pytest + +from vulnerabilities.importer import AdvisoryData + + +@pytest.fixture +def mock_vcs_response(tmp_path): + mock_response = MagicMock() + mock_response.dest_dir = str(tmp_path) + mock_response.delete = MagicMock() + return mock_response + + +@pytest.fixture +def mock_fetch_via_vcs(mock_vcs_response): + with patch("vulnerabilities.pipelines.v2_importers.gitlab_importer.fetch_via_vcs") as mock: + mock.return_value = mock_vcs_response + yield mock + + +@pytest.fixture +def mock_gitlab_yaml(tmp_path): + advisory_dir = tmp_path / "pypi" / "package_name" + advisory_dir.mkdir(parents=True) + + advisory_file = advisory_dir / "CVE-2022-0001.yml" + advisory_file.write_text( + """ + identifier: "CVE-2022-0001" + package_slug: "pypi/package_name" + title: "Example vulnerability" + description: "Example description" + pubdate: "2022-06-15" + affected_range: "<2.0.0" + fixed_versions: + - "2.0.0" + urls: + - "https://example.com/advisory" + cwe_ids: + - "CWE-79" + identifiers: + - "CVE-2022-0001" + """ + ) + return tmp_path + + +def test_clone(mock_fetch_via_vcs, mock_vcs_response): + from vulnerabilities.pipelines.v2_importers.gitlab_importer import GitLabImporterPipeline + + pipeline = GitLabImporterPipeline() + pipeline.clone() + + mock_fetch_via_vcs.assert_called_once_with(pipeline.repo_url) + assert pipeline.vcs_response == mock_vcs_response + + +def test_advisories_count(mock_gitlab_yaml, mock_vcs_response, mock_fetch_via_vcs): + from vulnerabilities.pipelines.v2_importers.gitlab_importer import GitLabImporterPipeline + + mock_vcs_response.dest_dir = str(mock_gitlab_yaml) + + pipeline = GitLabImporterPipeline() + pipeline.clone() + mock_fetch_via_vcs.assert_called_once() + + count = pipeline.advisories_count() + assert count == 1 + + +def test_collect_advisories(mock_gitlab_yaml, mock_vcs_response, mock_fetch_via_vcs): + from vulnerabilities.pipelines.v2_importers.gitlab_importer import GitLabImporterPipeline + + mock_vcs_response.dest_dir = str(mock_gitlab_yaml) + + pipeline = GitLabImporterPipeline() + pipeline.clone() + + advisories = list(pipeline.collect_advisories()) + assert len(advisories) == 1 + advisory = advisories[0] + + assert isinstance(advisory, AdvisoryData) + assert advisory.advisory_id == "CVE-2022-0001" + assert advisory.summary == "Example vulnerability\nExample description" + assert advisory.references_v2[0].url == "https://example.com/advisory" + assert advisory.affected_packages[0].package.name == "package-name" + assert advisory.affected_packages[0].fixed_version + assert advisory.weaknesses[0] == 79 + + +def test_clean_downloads(mock_vcs_response): + from vulnerabilities.pipelines.v2_importers.gitlab_importer import GitLabImporterPipeline + + pipeline = GitLabImporterPipeline() + pipeline.vcs_response = mock_vcs_response + + pipeline.clean_downloads() + mock_vcs_response.delete.assert_called_once() + + +def test_on_failure(mock_vcs_response): + from vulnerabilities.pipelines.v2_importers.gitlab_importer import GitLabImporterPipeline + + pipeline = GitLabImporterPipeline() + pipeline.vcs_response = mock_vcs_response + + with patch.object(pipeline, "clean_downloads") as mock_clean: + pipeline.on_failure() + mock_clean.assert_called_once() + + +def test_collect_advisories_with_invalid_yaml( + mock_gitlab_yaml, mock_vcs_response, mock_fetch_via_vcs +): + from vulnerabilities.pipelines.v2_importers.gitlab_importer import GitLabImporterPipeline + + # Add an invalid YAML file + invalid_file = Path(mock_gitlab_yaml) / "pypi" / "package_name" / "invalid.yml" + invalid_file.write_text(":::invalid_yaml") + + mock_vcs_response.dest_dir = str(mock_gitlab_yaml) + + pipeline = GitLabImporterPipeline() + pipeline.clone() + + # Should not raise but skip invalid YAML + advisories = list(pipeline.collect_advisories()) + assert len(advisories) == 1 # Only one valid advisory is parsed + + +def test_advisories_count_empty(mock_vcs_response, mock_fetch_via_vcs, tmp_path): + from vulnerabilities.pipelines.v2_importers.gitlab_importer import GitLabImporterPipeline + + mock_vcs_response.dest_dir = str(tmp_path) + + pipeline = GitLabImporterPipeline() + pipeline.clone() + mock_fetch_via_vcs.assert_called_once() + + count = pipeline.advisories_count() + assert count == 0 diff --git a/vulnerabilities/tests/pipelines/test_npm_importer_pipeline_v2.py b/vulnerabilities/tests/pipelines/test_npm_importer_pipeline_v2.py new file mode 100644 index 000000000..7941c9b69 --- /dev/null +++ b/vulnerabilities/tests/pipelines/test_npm_importer_pipeline_v2.py @@ -0,0 +1,128 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import json +from types import SimpleNamespace + +import pytz +from packageurl import PackageURL +from univers.version_range import NpmVersionRange +from univers.versions import SemverVersion + +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.pipelines.v2_importers.npm_importer import NpmImporterPipeline +from vulnerabilities.severity_systems import CVSSV2 +from vulnerabilities.severity_systems import CVSSV3 + + +def test_clone(monkeypatch): + import vulnerabilities.pipelines.v2_importers.npm_importer as npm_mod + + dummy = SimpleNamespace(dest_dir="dummy", delete=lambda: None) + # Patch the name in the npm_importer module, not fetchcode.vcs + monkeypatch.setattr(npm_mod, "fetch_via_vcs", lambda url: dummy) + + p = NpmImporterPipeline() + p.clone() + + assert p.vcs_response is dummy + + +def test_clean_downloads_and_on_failure(): + called = {} + + def delete(): + called["deleted"] = True + + dummy = SimpleNamespace(dest_dir="dummy", delete=delete) + p = NpmImporterPipeline() + p.vcs_response = dummy + p.clean_downloads() + assert called.get("deleted", False) + called.clear() + p.on_failure() + assert called.get("deleted", False) + + +def test_advisories_count_and_collect(tmp_path): + base = tmp_path + vuln_dir = base / "vuln" / "npm" + vuln_dir.mkdir(parents=True) + (vuln_dir / "index.json").write_text("{}") + (vuln_dir / "001.json").write_text(json.dumps({"id": "001"})) + p = NpmImporterPipeline() + p.vcs_response = SimpleNamespace(dest_dir=str(base), delete=lambda: None) + assert p.advisories_count() == 2 + advisories = list(p.collect_advisories()) + # Should yield None for index.json and one AdvisoryData + real = [a for a in advisories if isinstance(a, AdvisoryData)] + assert len(real) == 1 + assert real[0].advisory_id == "npm-001" + + +def test_to_advisory_data_skips_index(tmp_path): + p = NpmImporterPipeline() + file = tmp_path / "index.json" + file.write_text("{}") + assert p.to_advisory_data(file) is None + + +def test_to_advisory_data_full(tmp_path): + data = { + "id": "123", + "overview": "desc", + "title": "ti", + "created_at": "2021-01-01T00:00:00Z", + "cvss_vector": "CVSS:3.0/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H", + "cvss_score": "9.8", + "references": ["http://ref1"], + "module_name": "mypkg", + "vulnerable_versions": "<=1.2.3", + "patched_versions": ">=1.2.4", + "cves": ["CVE-123", "CVE-124"], + } + file = tmp_path / "123.json" + file.write_text(json.dumps(data)) + p = NpmImporterPipeline() + adv = p.to_advisory_data(file) + assert isinstance(adv, AdvisoryData) + assert adv.advisory_id == "npm-123" + assert "ti" in adv.summary and "desc" in adv.summary + assert adv.date_published.tzinfo == pytz.UTC + assert len(adv.severities) == 1 and adv.severities[0].system == CVSSV3 + urls = [r.url for r in adv.references_v2] + assert "http://ref1" in urls + assert f"https://github.com/nodejs/security-wg/blob/main/vuln/npm/123.json" in urls + pkg = adv.affected_packages[0] + assert pkg.package == PackageURL(type="npm", name="mypkg") + assert isinstance(pkg.affected_version_range, NpmVersionRange) + assert pkg.fixed_version == SemverVersion("1.2.4") + assert set(adv.aliases) == {"CVE-123", "CVE-124"} + + +def test_to_advisory_data_cvss_v2(tmp_path): + data = {"id": "124", "cvss_vector": "CVSS:2.0/AV:N/AC:L/Au:N/C:P/I:P/A:P", "cvss_score": "5.5"} + file = tmp_path / "124.json" + file.write_text(json.dumps(data)) + p = NpmImporterPipeline() + adv = p.to_advisory_data(file) + assert len(adv.severities) == 1 and adv.severities[0].system == CVSSV2 + + +def test_get_affected_package_special_and_standard(): + p = NpmImporterPipeline() + pkg = p.get_affected_package( + {"vulnerable_versions": "<=99.999.99999", "patched_versions": "<0.0.0"}, "pkg" + ) + assert isinstance(pkg.affected_version_range, NpmVersionRange) + assert pkg.fixed_version is None + data2 = {"vulnerable_versions": "<=2.0.0", "patched_versions": ">=2.0.1"} + pkg2 = p.get_affected_package(data2, "pkg2") + assert isinstance(pkg2.affected_version_range, NpmVersionRange) + assert pkg2.fixed_version == SemverVersion("2.0.1") diff --git a/vulnerabilities/tests/pipelines/test_postgresql_v2_importer.py b/vulnerabilities/tests/pipelines/test_postgresql_v2_importer.py new file mode 100644 index 000000000..da077f3ed --- /dev/null +++ b/vulnerabilities/tests/pipelines/test_postgresql_v2_importer.py @@ -0,0 +1,154 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +from unittest.mock import MagicMock +from unittest.mock import patch + +import pytest +from univers.versions import SemverVersion + +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.pipelines.v2_importers.postgresql_importer import PostgreSQLImporterPipeline + +HTML_PAGE_WITH_LINKS = """ + + +

Security Advisory

+

Advisory 1

+

Another Advisory

+

Advisory 2

+ + +""" + +HTML_ADVISORY = """ + + + + + + + + + + + + +
+ CVE-2022-1234
+ Announcement
+
10.0, 10.110.29.8Description of the issue
+ + +""" + + +@pytest.fixture +def importer(): + return PostgreSQLImporterPipeline() + + +@patch("vulnerabilities.pipelines.v2_importers.postgresql_importer.requests.get") +def test_collect_links(mock_get, importer): + mock_get.return_value.content = HTML_PAGE_WITH_LINKS.encode("utf-8") + + importer.collect_links() + + assert len(importer.links) == 3 # base + 2 new + assert any("advisory1.html" in link for link in importer.links) + assert any("advisory2.html" in link for link in importer.links) + + +@patch("vulnerabilities.pipelines.v2_importers.postgresql_importer.requests.get") +def test_advisories_count(mock_get, importer): + mock_get.return_value.content = HTML_PAGE_WITH_LINKS.encode("utf-8") + + count = importer.advisories_count() + assert count >= 3 + + +@patch("vulnerabilities.pipelines.v2_importers.postgresql_importer.requests.get") +def test_collect_advisories(mock_get, importer): + importer.links = { + "https://www.postgresql.org/support/security/advisory1.html", + "https://www.postgresql.org/support/security/advisory2.html", + } + + mock_get.return_value.content = HTML_ADVISORY.encode("utf-8") + + advisories = list(importer.collect_advisories()) + + assert len(advisories) == 2 + advisory = advisories[0] + assert isinstance(advisory, AdvisoryData) + assert advisory.advisory_id == "CVE-2022-1234" + assert "Description of the issue" in advisory.summary + assert len(advisory.references_v2) > 0 + assert advisory.affected_packages[0].package.name == "postgresql" + assert str(advisory.affected_packages[0].fixed_version) == "10.2" + assert advisory.affected_packages[0].affected_version_range.contains(SemverVersion("10.0.0")) + assert advisory.affected_packages[0].affected_version_range.contains(SemverVersion("10.1.0")) + + +@patch("vulnerabilities.pipelines.v2_importers.postgresql_importer.requests.get") +def test_collect_advisories_with_no_fixed_version(mock_get, importer): + no_fix_html = """ + + + + + + + + + + + + +
+ CVE-2023-5678
+ Announcement
+
9.5, 9.6Unpatched issue
+ + + """ + + def side_effect(url, *args, **kwargs): + if "advisory" not in url: + return MagicMock(content=HTML_PAGE_WITH_LINKS.encode("utf-8")) + return MagicMock(content=no_fix_html.encode("utf-8")) + + mock_get.side_effect = side_effect + + advisories = list(importer.collect_advisories()) + + assert len(advisories) == 2 + advisory = advisories[0] + assert advisory.advisory_id == "CVE-2023-5678" + assert advisory.affected_packages[0].fixed_version is None + assert advisory.affected_packages[0].affected_version_range.contains(SemverVersion("9.5")) + + +@patch("vulnerabilities.pipelines.v2_importers.postgresql_importer.requests.get") +def test_cvss_parsing(mock_get, importer): + mock_get.side_effect = lambda url, *args, **kwargs: MagicMock( + content=HTML_ADVISORY.encode("utf-8") + ) + + importer.links = {"https://www.postgresql.org/support/security/advisory1.html"} + + advisories = list(importer.collect_advisories()) + + assert len(advisories) == 1 + reference = advisories[0].references_v2[0] + + severity = reference.severities[0] + assert severity.system.identifier == "cvssv3" + assert severity.value == "9.8" + assert "AV:N/AC:L/PR:N/UI:N" in severity.scoring_elements diff --git a/vulnerabilities/tests/pipelines/test_pypa_v2_importer_pipeline.py b/vulnerabilities/tests/pipelines/test_pypa_v2_importer_pipeline.py new file mode 100644 index 000000000..20aa63387 --- /dev/null +++ b/vulnerabilities/tests/pipelines/test_pypa_v2_importer_pipeline.py @@ -0,0 +1,173 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +from unittest.mock import MagicMock +from unittest.mock import patch + +import pytest +import saneyaml + +from vulnerabilities.importer import AdvisoryData + + +@pytest.fixture +def mock_vcs_response(): + # Mock the vcs_response from fetch_via_vcs + mock_response = MagicMock() + mock_response.dest_dir = "/mock/repo" + mock_response.delete = MagicMock() + return mock_response + + +@pytest.fixture +def mock_fetch_via_vcs(mock_vcs_response): + with patch("vulnerabilities.pipelines.v2_importers.pypa_importer.fetch_via_vcs") as mock: + mock.return_value = mock_vcs_response + yield mock + + +@pytest.fixture +def mock_pathlib(tmp_path): + # Mock the Path structure to simulate the `vulns` directory and advisory files + vulns_dir = tmp_path / "vulns" + vulns_dir.mkdir() + + advisory_file = vulns_dir / "CVE-2021-1234.yaml" + advisory_file.write_text( + """ + id: CVE-2021-1234 + summary: Sample PyPI vulnerability + references: + - https://pypi.org/advisory/CVE-2021-1234 + """ + ) + return vulns_dir + + +def test_clone(mock_fetch_via_vcs, mock_vcs_response): + # Import inside the test function to avoid circular import + from vulnerabilities.pipelines.v2_importers.pypa_importer import PyPaImporterPipeline + + # Test the `clone` method to ensure it calls `fetch_via_vcs` + pipeline = PyPaImporterPipeline() + pipeline.clone() + + mock_fetch_via_vcs.assert_called_once_with(pipeline.repo_url) + assert pipeline.vcs_response == mock_vcs_response + + +def test_advisories_count(mock_pathlib, mock_vcs_response, mock_fetch_via_vcs): + # Import inside the test function to avoid circular import + from vulnerabilities.pipelines.v2_importers.pypa_importer import PyPaImporterPipeline + + # Mock `vcs_response.dest_dir` to point to the temporary directory + mock_vcs_response.dest_dir = str(mock_pathlib.parent) + + pipeline = PyPaImporterPipeline() + + # Call clone() to set the vcs_response attribute + pipeline.clone() + mock_fetch_via_vcs.assert_called_once_with(pipeline.repo_url) + + count = pipeline.advisories_count() + + # Check that the count matches the number of YAML files in the `vulns` directory + assert count == 1 + + +def test_collect_advisories(mock_pathlib, mock_vcs_response, mock_fetch_via_vcs): + # Import inside the test function to avoid circular import + from vulnerabilities.pipelines.v2_importers.pypa_importer import PyPaImporterPipeline + + # Mock `vcs_response.dest_dir` to point to the temporary directory + mock_vcs_response.dest_dir = str(mock_pathlib.parent) + + # Mock `parse_advisory_data` to return an AdvisoryData object + with patch("vulnerabilities.importers.osv.parse_advisory_data_v2") as mock_parse: + mock_parse.return_value = AdvisoryData( + advisory_id="CVE-2021-1234", + summary="Sample PyPI vulnerability", + references_v2=[{"url": "https://pypi.org/advisory/CVE-2021-1234"}], + affected_packages=[], + weaknesses=[], + url="https://pypi.org/advisory/CVE-2021-1234", + ) + + pipeline = PyPaImporterPipeline() + pipeline.clone() + mock_fetch_via_vcs.assert_called_once_with(pipeline.repo_url) + advisories = list(pipeline.collect_advisories()) + + # Ensure that advisories are parsed correctly + assert len(advisories) == 1 + advisory = advisories[0] + assert advisory.advisory_id == "CVE-2021-1234" + assert advisory.summary == "Sample PyPI vulnerability" + assert advisory.url == "https://pypi.org/advisory/CVE-2021-1234" + + +def test_clean_downloads(mock_vcs_response): + # Import inside the test function to avoid circular import + from vulnerabilities.pipelines.v2_importers.pypa_importer import PyPaImporterPipeline + + # Test the `clean_downloads` method to ensure the repository is deleted + pipeline = PyPaImporterPipeline() + pipeline.vcs_response = mock_vcs_response + + pipeline.clean_downloads() + + mock_vcs_response.delete.assert_called_once() + + +def test_on_failure(mock_vcs_response): + # Import inside the test function to avoid circular import + from vulnerabilities.pipelines.v2_importers.pypa_importer import PyPaImporterPipeline + + # Test the `on_failure` method to ensure `clean_downloads` is called on failure + pipeline = PyPaImporterPipeline() + pipeline.vcs_response = mock_vcs_response + + with patch.object(pipeline, "clean_downloads") as mock_clean: + pipeline.on_failure() + + mock_clean.assert_called_once() + + +def test_collect_advisories_with_invalid_yaml(mock_pathlib, mock_vcs_response, mock_fetch_via_vcs): + # Import inside the test function to avoid circular import + from vulnerabilities.pipelines.v2_importers.pypa_importer import PyPaImporterPipeline + + # Create an invalid YAML file + invalid_file = mock_pathlib / "invalid_file.yaml" + invalid_file.write_text("invalid_yaml") + + mock_vcs_response.dest_dir = str(mock_pathlib.parent) + + with patch("vulnerabilities.importers.osv.parse_advisory_data_v2") as mock_parse: + # Mock parse_advisory_data to raise an error on invalid YAML + mock_parse.side_effect = saneyaml.YAMLError("Invalid YAML") + + pipeline = PyPaImporterPipeline() + pipeline.clone() + mock_fetch_via_vcs.assert_called_once_with(pipeline.repo_url) + with pytest.raises(saneyaml.YAMLError): + list(pipeline.collect_advisories()) + + +def test_advisories_count_empty(mock_vcs_response, mock_fetch_via_vcs): + # Import inside the test function to avoid circular import + from vulnerabilities.pipelines.v2_importers.pypa_importer import PyPaImporterPipeline + + # Mock an empty 'vulns' directory + mock_vcs_response.dest_dir = "/mock/empty_repo" + pipeline = PyPaImporterPipeline() + pipeline.clone() + # Test that advisories_count returns 0 for an empty directory + count = pipeline.advisories_count() + assert count == 0 diff --git a/vulnerabilities/tests/pipelines/test_pysec_v2_importer.py b/vulnerabilities/tests/pipelines/test_pysec_v2_importer.py new file mode 100644 index 000000000..33c716889 --- /dev/null +++ b/vulnerabilities/tests/pipelines/test_pysec_v2_importer.py @@ -0,0 +1,137 @@ +import json +from io import BytesIO +from unittest.mock import patch +from zipfile import ZipFile + +import pytest + +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.pipelines.v2_importers.pysec_importer import ( + PyPIImporterPipeline, # Path to the PyPI Importer +) + + +@pytest.fixture +def mock_zip_data(): + # Create mock zip data for testing + zip_buffer = BytesIO() + with ZipFile(zip_buffer, mode="w") as zip_file: + # Create a sample advisory file inside the zip + advisory_data = { + "advisory_id": "PYSEC-1234", + "summary": "Sample PyPI advisory", + "references": [{"url": "https://pypi.org/advisory/PYSEC-1234"}], + "package": {"name": "example-package"}, + "affected_versions": ">=1.0.0,<=2.0.0", + } + # Save the sample advisory as a JSON file + with zip_file.open("PYSEC-1234.json", "w") as f: + f.write(json.dumps(advisory_data).encode("utf-8")) + zip_buffer.seek(0) + return zip_buffer + + +@pytest.fixture +def mock_requests_get(): + with patch("requests.get") as mock: + yield mock + + +def test_fetch_zip(mock_requests_get, mock_zip_data): + # Mock the `requests.get` to return the mock zip data + mock_requests_get.return_value.content = mock_zip_data.read() + + pipeline = PyPIImporterPipeline() + + # Call the `fetch_zip` method + pipeline.fetch_zip() + + # Reset the position of mock_zip_data to 0 before comparing + mock_zip_data.seek(0) + + # Verify that the zip file content is correctly assigned + assert pipeline.advisory_zip == mock_zip_data.read() + + +def test_advisories_count(mock_requests_get, mock_zip_data): + # Mock the `requests.get` to return the mock zip data + mock_requests_get.return_value.content = mock_zip_data.read() + + pipeline = PyPIImporterPipeline() + + # Fetch the zip data + pipeline.fetch_zip() + + # Test advisories count + count = pipeline.advisories_count() + + # Verify that it correctly counts the number of advisory files starting with 'PYSEC-' + assert count == 1 + + +def test_collect_advisories(mock_requests_get, mock_zip_data): + # Mock the `requests.get` to return the mock zip data + mock_requests_get.return_value.content = mock_zip_data.read() + + pipeline = PyPIImporterPipeline() + + # Fetch the zip data + pipeline.fetch_zip() + + # Mock the `parse_advisory_data_v2` function to return a dummy AdvisoryData + with patch("vulnerabilities.importers.osv.parse_advisory_data_v2") as mock_parse: + mock_parse.return_value = AdvisoryData( + advisory_id="PYSEC-1234", + summary="Sample PyPI advisory", + references_v2=[{"url": "https://pypi.org/advisory/PYSEC-1234"}], + affected_packages=[], + weaknesses=[], + url="https://pypi.org/advisory/PYSEC-1234", + ) + + # Call the `collect_advisories` method + advisories = list(pipeline.collect_advisories()) + + # Ensure we have 1 advisory + assert len(advisories) == 1 + + # Verify advisory data + advisory = advisories[0] + assert advisory.advisory_id == "PYSEC-1234" + assert advisory.summary == "Sample PyPI advisory" + assert advisory.url == "https://pypi.org/advisory/PYSEC-1234" + + +def test_collect_advisories_invalid_file(mock_requests_get, mock_zip_data): + # Create a mock zip with an invalid file name + zip_buffer = BytesIO() + with ZipFile(zip_buffer, mode="w") as zip_file: + zip_file.writestr("INVALID_FILE.txt", "Invalid content") + + zip_buffer.seek(0) + mock_requests_get.return_value.content = zip_buffer.read() + + pipeline = PyPIImporterPipeline() + + # Fetch the zip data + pipeline.fetch_zip() + + # Mock the `parse_advisory_data_v2` function + with patch("vulnerabilities.importers.osv.parse_advisory_data_v2") as mock_parse: + mock_parse.return_value = AdvisoryData( + advisory_id="PYSEC-1234", + summary="Sample PyPI advisory", + references_v2=[{"url": "https://pypi.org/advisory/PYSEC-1234"}], + affected_packages=[], + weaknesses=[], + url="https://pypi.org/advisory/PYSEC-1234", + ) + + # Call the `collect_advisories` method and check the logging for invalid file + with patch( + "vulnerabilities.pipelines.VulnerableCodeBaseImporterPipelineV2.log" + ) as mock_log: + advisories = list(pipeline.collect_advisories()) + + # Ensure no advisories were yielded due to the invalid file + assert len(advisories) == 0 diff --git a/vulnerabilities/tests/pipelines/test_vulnerablecode_importer_v2_pipeline.py b/vulnerabilities/tests/pipelines/test_vulnerablecode_importer_v2_pipeline.py new file mode 100644 index 000000000..f995f0c1f --- /dev/null +++ b/vulnerabilities/tests/pipelines/test_vulnerablecode_importer_v2_pipeline.py @@ -0,0 +1,180 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import logging +from datetime import datetime +from datetime import timedelta +from unittest import mock + +import pytest +from packageurl import PackageURL + +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.importer import UnMergeablePackageError +from vulnerabilities.models import AdvisoryV2 +from vulnerabilities.models import PackageV2 +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2 + + +class DummyImporter(VulnerableCodeBaseImporterPipelineV2): + pipeline_id = "dummy" + log_messages = [] + + def log(self, message, level=logging.INFO): + self.log_messages.append((level, message)) + + def collect_advisories(self): + yield from self._advisories + + def advisories_count(self): + return len(self._advisories) + + +@pytest.fixture +def dummy_advisory(): + return AdvisoryData( + summary="Test advisory", + aliases=["CVE-2025-0001"], + references_v2=[], + severities=[], + weaknesses=[], + affected_packages=[], + advisory_id="ADV-123", + date_published=datetime.now() - timedelta(days=10), + url="https://example.com/advisory/1", + ) + + +@pytest.fixture +def dummy_importer(dummy_advisory): + importer = DummyImporter() + importer._advisories = [dummy_advisory] + return importer + + +@pytest.mark.django_db +def test_collect_and_store_advisories(dummy_importer): + dummy_importer.collect_and_store_advisories() + assert len(dummy_importer.log_messages) >= 2 + assert "Successfully collected" in dummy_importer.log_messages[-1][1] + assert AdvisoryV2.objects.count() == 1 + + +def test_get_advisory_packages_basic(dummy_importer): + purl = PackageURL("pypi", None, "dummy", "1.0.0") + affected_package = mock.Mock() + affected_package.package = purl + dummy_importer.unfurl_version_ranges = False + + with mock.patch( + "vulnerabilities.improvers.default.get_exact_purls", return_value=([purl], [purl]) + ): + with mock.patch.object( + PackageV2.objects, "get_or_create_from_purl", return_value=(mock.Mock(), True) + ) as mock_get: + dummy_importer.get_advisory_packages( + advisory_data=mock.Mock(affected_packages=[affected_package]) + ) + assert mock_get.call_count == 2 # one affected, one fixed + + +def test_get_published_package_versions_filters(dummy_importer): + purl = PackageURL("pypi", None, "example", None) + + dummy_versions = [ + mock.Mock(value="1.0.0", release_date=datetime.now() - timedelta(days=5)), + mock.Mock(value="2.0.0", release_date=datetime.now() + timedelta(days=5)), # future + ] + + with mock.patch( + "vulnerabilities.pipelines.package_versions.versions", return_value=dummy_versions + ): + versions = dummy_importer.get_published_package_versions(purl, until=datetime.now()) + assert "1.0.0" in versions + assert "2.0.0" not in versions + + +def test_get_published_package_versions_failure_logs(dummy_importer): + purl = PackageURL("pypi", None, "example", None) + with mock.patch( + "vulnerabilities.pipelines.package_versions.versions", side_effect=Exception("fail") + ): + versions = dummy_importer.get_published_package_versions(purl) + assert versions == [] + assert any("Failed to fetch versions" in msg for lvl, msg in dummy_importer.log_messages) + + +def test_expand_version_range_to_purls(dummy_importer): + purls = list( + dummy_importer.expand_verion_range_to_purls("npm", "lodash", "lodash", ["1.0.0", "1.1.0"]) + ) + assert all(isinstance(p, PackageURL) for p in purls) + assert purls[0].name == "lodash" + + +def test_resolve_package_versions(dummy_importer): + dummy_importer.ignorable_versions = [] + dummy_importer.expand_verion_range_to_purls = lambda *args, **kwargs: [ + PackageURL("npm", None, "a", "1.0.0") + ] + + with mock.patch( + "vulnerabilities.pipelines.resolve_version_range", return_value=(["1.0.0"], ["1.1.0"]) + ), mock.patch( + "vulnerabilities.pipelines.get_affected_packages_by_patched_package", + return_value={None: [PackageURL("npm", None, "a", "1.0.0")]}, + ), mock.patch( + "vulnerabilities.pipelines.nearest_patched_package", return_value=[] + ): + aff, fix = dummy_importer.resolve_package_versions( + affected_version_range=">=1.0.0", + pkg_type="npm", + pkg_namespace=None, + pkg_name="a", + valid_versions=["1.0.0", "1.1.0"], + ) + assert any(isinstance(p, PackageURL) for p in aff) + + +def test_get_impacted_packages_mergeable(dummy_importer): + ap = mock.Mock() + ap.package = PackageURL("npm", None, "abc", None) + dummy_importer.get_published_package_versions = lambda package_url, until: ["1.0.0", "1.1.0"] + dummy_importer.resolve_package_versions = lambda **kwargs: ( + [PackageURL("npm", None, "abc", "1.0.0")], + [PackageURL("npm", None, "abc", "1.1.0")], + ) + + with mock.patch( + "vulnerabilities.importer.AffectedPackage.merge", + return_value=(ap.package, [">=1.0.0"], ["1.1.0"]), + ): + aff, fix = dummy_importer.get_impacted_packages([ap], datetime.now()) + assert len(aff) == 1 and aff[0].version == "1.0.0" + assert len(fix) == 1 and fix[0].version == "1.1.0" + + +def test_get_impacted_packages_unmergeable(dummy_importer): + ap = mock.Mock() + ap.package = PackageURL("npm", None, "abc", None) + ap.affected_version_range = ">=1.0.0" + ap.fixed_version = None + + dummy_importer.get_published_package_versions = lambda package_url, until: ["1.0.0", "1.1.0"] + dummy_importer.resolve_package_versions = lambda **kwargs: ( + [PackageURL("npm", None, "abc", "1.0.0")], + [PackageURL("npm", None, "abc", "1.1.0")], + ) + + with mock.patch( + "vulnerabilities.importer.AffectedPackage.merge", side_effect=UnMergeablePackageError + ): + aff, fix = dummy_importer.get_impacted_packages([ap], datetime.utcnow()) + assert len(aff) == 1 + assert aff[0].version == "1.0.0" diff --git a/vulnerabilities/tests/pipelines/test_vulnrichment_v2_importer.py b/vulnerabilities/tests/pipelines/test_vulnrichment_v2_importer.py new file mode 100644 index 000000000..f926058c2 --- /dev/null +++ b/vulnerabilities/tests/pipelines/test_vulnrichment_v2_importer.py @@ -0,0 +1,205 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import json +from pathlib import Path +from unittest.mock import MagicMock +from unittest.mock import patch + +import pytest + +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.importer import VulnerabilitySeverity +from vulnerabilities.pipelines.v2_importers.vulnrichment_importer import VulnrichImporterPipeline + + +@pytest.fixture +def mock_vcs_response(): + # Mock the vcs_response from fetch_via_vcs + mock_response = MagicMock() + mock_response.dest_dir = "/mock/repo" + mock_response.delete = MagicMock() + return mock_response + + +@pytest.fixture +def mock_fetch_via_vcs(mock_vcs_response): + with patch( + "vulnerabilities.pipelines.v2_importers.vulnrichment_importer.fetch_via_vcs" + ) as mock: + mock.return_value = mock_vcs_response + yield mock + + +@pytest.fixture +def mock_pathlib(tmp_path): + # Create a mock filesystem with a 'vulns' directory and JSON files + vulns_dir = tmp_path / "vulns" + vulns_dir.mkdir() + + advisory_file = vulns_dir / "CVE-2021-1234.json" + advisory_file.write_text( + json.dumps( + { + "cveMetadata": { + "cveId": "CVE-2021-1234", + "state": "PUBLIC", + "datePublished": "2021-01-01", + }, + "containers": { + "cna": { + "descriptions": [{"lang": "en", "value": "Sample PyPI vulnerability"}], + "metrics": [ + { + "cvssV4_0": { + "baseScore": 7.5, + "vectorString": "AV:N/AC:L/PR:L/UI:N/S:U/C:H/I:H/A:H", + } + } + ], + "affected": [{"cpes": ["cpe:/a:example:package"]}], + "references": [{"url": "https://example.com", "tags": ["exploit"]}], + } + }, + } + ) + ) + return vulns_dir + + +def test_clone(mock_fetch_via_vcs, mock_vcs_response): + # Test the `clone` method to ensure the repository is cloned correctly + pipeline = VulnrichImporterPipeline() + pipeline.clone() + + mock_fetch_via_vcs.assert_called_once_with(pipeline.repo_url) + assert pipeline.vcs_response == mock_vcs_response + + +def test_advisories_count(mock_pathlib, mock_vcs_response, mock_fetch_via_vcs): + mock_vcs_response.dest_dir = str(mock_pathlib.parent) + + pipeline = VulnrichImporterPipeline() + pipeline.clone() + count = pipeline.advisories_count() + + assert count == 0 + + +def test_collect_advisories(mock_pathlib, mock_vcs_response, mock_fetch_via_vcs): + # Mock `vcs_response.dest_dir` to point to the temporary directory + mock_vcs_response.dest_dir = str(mock_pathlib.parent) + + # Mock `parse_cve_advisory` to return an AdvisoryData object + with patch( + "vulnerabilities.pipelines.v2_importers.vulnrichment_importer.VulnrichImporterPipeline.parse_cve_advisory" + ) as mock_parse: + mock_parse.return_value = AdvisoryData( + advisory_id="CVE-2021-1234", + summary="Sample PyPI vulnerability", + references_v2=[{"url": "https://example.com"}], + affected_packages=[], + weaknesses=[], + url="https://example.com", + severities=[ + VulnerabilitySeverity( + system="cvssv4", + value=7.5, + scoring_elements="AV:N/AC:L/PR:L/UI:N/S:U/C:H/I:H/A:H", + ) + ], + ) + + pipeline = VulnrichImporterPipeline() + pipeline.clone() + advisories = list(pipeline.collect_advisories()) + + # Ensure that advisories are parsed correctly + assert len(advisories) == 1 + advisory = advisories[0] + assert advisory.advisory_id == "CVE-2021-1234" + assert advisory.summary == "Sample PyPI vulnerability" + assert advisory.url == "https://example.com" + + +def test_clean_downloads(mock_vcs_response, mock_fetch_via_vcs): + # Test the `clean_downloads` method to ensure the repository is deleted + pipeline = VulnrichImporterPipeline() + pipeline.clone() + pipeline.vcs_response = mock_vcs_response + + pipeline.clean_downloads() + + mock_vcs_response.delete.assert_called_once() + + +def test_on_failure(mock_vcs_response, mock_fetch_via_vcs): + pipeline = VulnrichImporterPipeline() + pipeline.clone() + pipeline.vcs_response = mock_vcs_response + + with patch.object(pipeline, "clean_downloads") as mock_clean: + pipeline.on_failure() + + mock_clean.assert_called_once() + + +def test_parse_cve_advisory(mock_pathlib, mock_vcs_response, mock_fetch_via_vcs): + from vulnerabilities.pipelines.v2_importers.vulnrichment_importer import ( + VulnrichImporterPipeline, + ) + + mock_vcs_response.dest_dir = str(mock_pathlib.parent) + + raw_data = { + "cveMetadata": {"cveId": "CVE-2021-1234", "state": "PUBLIC", "datePublished": "2021-01-01"}, + "containers": { + "cna": { + "descriptions": [{"lang": "en", "value": "Sample PyPI vulnerability"}], + "metrics": [ + { + "cvssV4_0": { + "baseScore": 7.5, + "vectorString": "AV:N/AC:L/PR:L/UI:N/S:U/C:H/I:H/A:H", + } + } + ], + "affected": [{"cpes": ["cpe:/a:example:package"]}], + "references": [{"url": "https://example.com", "tags": ["exploit"]}], + } + }, + } + advisory_url = "https://github.com/cisagov/vulnrichment/blob/develop/CVE-2021-1234.json" + + pipeline = VulnrichImporterPipeline() + pipeline.clone() + advisory = pipeline.parse_cve_advisory(raw_data, advisory_url) + + assert advisory.advisory_id == "CVE-2021-1234" + assert advisory.summary == "Sample PyPI vulnerability" + assert advisory.url == advisory_url + assert len(advisory.severities) == 1 + assert advisory.severities[0].value == 7.5 + + +def test_collect_advisories_with_invalid_json(mock_pathlib, mock_vcs_response, mock_fetch_via_vcs): + invalid_file = mock_pathlib / "invalid_file.json" + invalid_file.write_text("invalid_json") + + mock_vcs_response.dest_dir = str(mock_pathlib.parent) + + with patch( + "vulnerabilities.pipelines.v2_importers.vulnrichment_importer.VulnrichImporterPipeline.parse_cve_advisory" + ) as mock_parse: + mock_parse.side_effect = json.JSONDecodeError("Invalid JSON", "", 0) + + pipeline = VulnrichImporterPipeline() + pipeline.clone() + with pytest.raises(json.JSONDecodeError): + list(pipeline.collect_advisories()) diff --git a/vulnerabilities/tests/pipes/test_advisory.py b/vulnerabilities/tests/pipes/test_advisory.py index ee29a4b8d..72c477455 100644 --- a/vulnerabilities/tests/pipes/test_advisory.py +++ b/vulnerabilities/tests/pipes/test_advisory.py @@ -9,6 +9,7 @@ from datetime import datetime +import pytest from django.core.exceptions import ValidationError from django.test import TestCase from django.utils import timezone @@ -19,6 +20,14 @@ from vulnerabilities.importer import AdvisoryData from vulnerabilities.importer import AffectedPackage from vulnerabilities.importer import Reference +from vulnerabilities.models import AdvisoryAlias +from vulnerabilities.models import AdvisoryReference +from vulnerabilities.models import AdvisorySeverity +from vulnerabilities.models import AdvisoryWeakness +from vulnerabilities.pipes.advisory import get_or_create_advisory_aliases +from vulnerabilities.pipes.advisory import get_or_create_advisory_references +from vulnerabilities.pipes.advisory import get_or_create_advisory_severities +from vulnerabilities.pipes.advisory import get_or_create_advisory_weaknesses from vulnerabilities.pipes.advisory import get_or_create_aliases from vulnerabilities.pipes.advisory import import_advisory from vulnerabilities.utils import compute_content_id @@ -134,3 +143,85 @@ def test_advisory_insert_no_duplicate_content_id(self): date_collected=date, created_by="test_pipeline", ) + + +@pytest.fixture +def advisory_aliases(): + return ["CVE-2021-12345", "GHSA-xyz"] + + +@pytest.fixture +def advisory_references(): + return [ + Reference(reference_id="REF-1", url="https://example.com/advisory/1"), + Reference(reference_id="REF-2", url="https://example.com/advisory/2"), + Reference(reference_id="", url="https://example.com/advisory/3"), + Reference(url="https://example.com/advisory/4"), + ] + + +@pytest.fixture +def advisory_severities(): + class Severity: + def __init__(self, system, value, scoring_elements, published_at=None, url=None): + self.system = system + self.value = value + self.scoring_elements = scoring_elements + self.published_at = published_at + self.url = url + + class System: + def __init__(self, identifier): + self.identifier = identifier + + return [ + Severity( + System("CVSSv3"), + "7.5", + "AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H", + timezone.now(), + "https://cvss.example.com", + ), + ] + + +@pytest.fixture +def advisory_weaknesses(): + return [79, 89] + + +@pytest.mark.django_db +def test_get_or_create_advisory_aliases(advisory_aliases): + aliases = get_or_create_advisory_aliases(advisory_aliases) + assert len(aliases) == len(advisory_aliases) + for alias_obj in aliases: + assert isinstance(alias_obj, AdvisoryAlias) + assert alias_obj.alias in advisory_aliases + + +@pytest.mark.django_db +def test_get_or_create_advisory_references(advisory_references): + refs = get_or_create_advisory_references(advisory_references) + assert len(refs) == len(advisory_references) + for ref in refs: + assert isinstance(ref, AdvisoryReference) + assert ref.url in [r.url for r in advisory_references] + + +@pytest.mark.django_db +def test_get_or_create_advisory_severities(advisory_severities): + sevs = get_or_create_advisory_severities(advisory_severities) + assert len(sevs) == len(advisory_severities) + for sev in sevs: + assert isinstance(sev, AdvisorySeverity) + assert sev.scoring_system == advisory_severities[0].system.identifier + assert sev.value == advisory_severities[0].value + + +@pytest.mark.django_db +def test_get_or_create_advisory_weaknesses(advisory_weaknesses): + weaknesses = get_or_create_advisory_weaknesses(advisory_weaknesses) + assert len(weaknesses) == len(advisory_weaknesses) + for w in weaknesses: + assert isinstance(w, AdvisoryWeakness) + assert w.cwe_id in advisory_weaknesses diff --git a/vulnerabilities/utils.py b/vulnerabilities/utils.py index 52104b556..3aec1f56c 100644 --- a/vulnerabilities/utils.py +++ b/vulnerabilities/utils.py @@ -39,7 +39,7 @@ from univers.version_range import NginxVersionRange from univers.version_range import VersionRange -from aboutcode.hashid import build_vcid # NOQA +from aboutcode.hashid import build_vcid logger = logging.getLogger(__name__) @@ -249,6 +249,11 @@ def fetch_github_graphql_query(graphql_query: dict): response = _get_gh_response(gh_token=gh_token, graphql_query=graphql_query) + if not response: + msg = "No response received from GitHub API." + logger.error(msg) + raise GraphQLError(msg) + message = response.get("message") if message and message == "Bad credentials": raise GitHubTokenError(f"Invalid GitHub token: {message}") @@ -266,7 +271,10 @@ def _get_gh_response(gh_token, graphql_query): """ endpoint = "https://api.github.com/graphql" headers = {"Authorization": f"bearer {gh_token}"} - return requests.post(endpoint, headers=headers, json=graphql_query).json() + try: + return requests.post(endpoint, headers=headers, json=graphql_query).json() + except Exception as e: + logger.error(f"Failed to fetch data from GitHub GraphQL API: {e}") def dedupe(original: List) -> List: @@ -287,9 +295,10 @@ def get_affected_packages_by_patched_package( """ affected_packages_by_patched_package = defaultdict(list) for package in affected_packages: - affected_packages_by_patched_package[package.patched_package].append( - package.vulnerable_package - ) + if package.vulnerable_package: + affected_packages_by_patched_package[package.patched_package].append( + package.vulnerable_package + ) return affected_packages_by_patched_package @@ -595,6 +604,7 @@ def compute_content_id(advisory_data): # Normalize fields from vulnerabilities.importer import AdvisoryData + from vulnerabilities.importer import AdvisoryDataV2 from vulnerabilities.models import Advisory if isinstance(advisory_data, Advisory): @@ -610,20 +620,62 @@ def compute_content_id(advisory_data): normalized_data["url"] = advisory_data.url elif isinstance(advisory_data, AdvisoryData): - normalized_data = { - "aliases": normalize_list(advisory_data.aliases), - "summary": normalize_text(advisory_data.summary), - "affected_packages": [ - pkg.to_dict() for pkg in normalize_list(advisory_data.affected_packages) if pkg - ], - "references": [ - ref.to_dict() for ref in normalize_list(advisory_data.references) if ref - ], - "weaknesses": normalize_list(advisory_data.weaknesses), - } + if advisory_data.references_v2: + normalized_data = { + "aliases": normalize_list(advisory_data.aliases), + "summary": normalize_text(advisory_data.summary), + "affected_packages": [ + pkg.to_dict() for pkg in normalize_list(advisory_data.affected_packages) if pkg + ], + "references": [ + ref.to_dict() for ref in normalize_list(advisory_data.references_v2) if ref + ], + "severities": [ + sev.to_dict() for sev in normalize_list(advisory_data.severities) if sev + ], + "weaknesses": normalize_list(advisory_data.weaknesses), + } + elif advisory_data.references or advisory_data.references == []: + normalized_data = { + "aliases": normalize_list(advisory_data.aliases), + "summary": normalize_text(advisory_data.summary), + "affected_packages": [ + pkg.to_dict() for pkg in normalize_list(advisory_data.affected_packages) if pkg + ], + "references": [ + ref.to_dict() for ref in normalize_list(advisory_data.references) if ref + ], + "weaknesses": normalize_list(advisory_data.weaknesses), + } + normalized_data["url"] = advisory_data.url normalized_json = json.dumps(normalized_data, separators=(",", ":"), sort_keys=True) content_id = hashlib.sha256(normalized_json.encode("utf-8")).hexdigest() return content_id + + +def create_registry(pipelines): + """ + Return a mapping of {pipeline ID: pipeline class} for a list of pipelines. + """ + from vulnerabilities.pipelines import VulnerableCodePipeline + + registry = {} + for pipeline in pipelines: + if issubclass(pipeline, VulnerableCodePipeline): + key = pipeline.pipeline_id + else: + # For everything legacy use qualified_name + key = pipeline.qualified_name + + if not key: + raise Exception(f"Pipeline ID can not be empty: {pipeline!r}") + + if key in registry: + raise Exception(f"Duplicate pipeline found: {key}") + + registry[key] = pipeline + + return registry diff --git a/vulnerabilities/views.py b/vulnerabilities/views.py index e6fb95a94..71534f9fb 100644 --- a/vulnerabilities/views.py +++ b/vulnerabilities/views.py @@ -29,6 +29,7 @@ from vulnerabilities import models from vulnerabilities.forms import AdminLoginForm +from vulnerabilities.forms import AdvisorySearchForm from vulnerabilities.forms import ApiUserCreationForm from vulnerabilities.forms import PackageSearchForm from vulnerabilities.forms import PipelineSchedulePackageForm @@ -71,6 +72,34 @@ def get_queryset(self, query=None): ) +class PackageSearchV2(ListView): + model = models.PackageV2 + template_name = "packages_v2.html" + ordering = ["type", "namespace", "name", "version"] + paginate_by = PAGE_SIZE + + def get_context_data(self, **kwargs): + context = super().get_context_data(**kwargs) + request_query = self.request.GET + context["package_search_form"] = PackageSearchForm(request_query) + context["search"] = request_query.get("search") + return context + + def get_queryset(self, query=None): + """ + Return a Package queryset for the ``query``. + Make a best effort approach to find matching packages either based + on exact purl, partial purl or just name and namespace. + """ + query = query or self.request.GET.get("search") or "" + return ( + self.model.objects.search(query) + .with_vulnerability_counts() + .prefetch_related() + .order_by("package_url") + ) + + class VulnerabilitySearch(ListView): model = models.Vulnerability template_name = "vulnerabilities.html" @@ -89,6 +118,24 @@ def get_queryset(self, query=None): return self.model.objects.search(query=query).with_package_counts() +class AdvisorySearch(ListView): + model = models.AdvisoryV2 + template_name = "vulnerabilities.html" + ordering = ["advisory_id"] + paginate_by = PAGE_SIZE + + def get_context_data(self, **kwargs): + context = super().get_context_data(**kwargs) + request_query = self.request.GET + context["advisory_search_form"] = VulnerabilitySearchForm(request_query) + context["search"] = request_query.get("search") + return context + + def get_queryset(self, query=None): + query = query or self.request.GET.get("search") or "" + return self.model.objects.search(query=query).with_package_counts() + + class PackageDetails(DetailView): model = models.Package template_name = "package_details.html" @@ -130,6 +177,47 @@ def get_object(self, queryset=None): return package +class PackageV2Details(DetailView): + model = models.PackageV2 + template_name = "package_details_v2.html" + slug_url_kwarg = "purl" + slug_field = "purl" + + def get_context_data(self, **kwargs): + context = super().get_context_data(**kwargs) + package = self.object + context["package"] = package + context["affected_by_advisories"] = package.affected_by_advisories.order_by("advisory_id") + # Ghost package should not fix any vulnerability. + context["fixing_advisories"] = ( + None if package.is_ghost else package.fixing_advisories.order_by("advisory_id") + ) + context["package_search_form"] = PackageSearchForm(self.request.GET) + context["fixed_package_details"] = package.fixed_package_details + + # context["history"] = list(package.history) + return context + + def get_object(self, queryset=None): + if queryset is None: + queryset = self.get_queryset() + + purl = self.kwargs.get(self.slug_url_kwarg) + if purl: + queryset = queryset.for_purl(purl) + else: + cls = self.__class__.__name__ + raise AttributeError( + f"Package details view {cls} must be called with a purl, " f"but got: {purl!r}" + ) + + try: + package = queryset.get() + except queryset.model.DoesNotExist: + raise Http404(f"No Package found for purl: {purl}") + return package + + class VulnerabilityDetails(DetailView): model = models.Vulnerability template_name = "vulnerability_details.html" @@ -193,9 +281,11 @@ def get_context_data(self, **kwargs): for severity in valid_severities: try: - vector_values = SCORING_SYSTEMS[severity.scoring_system].get( - severity.scoring_elements - ) + vector_values_system = SCORING_SYSTEMS[severity.scoring_system] + if not vector_values_system: + logging.error(f"Unknown scoring system: {severity.scoring_system}") + continue + vector_values = vector_values_system.get(severity.scoring_elements) if vector_values: severity_vectors.append({"vector": vector_values, "origin": severity.url}) except ( @@ -232,6 +322,112 @@ def get_context_data(self, **kwargs): return context +class AdvisoryDetails(DetailView): + model = models.AdvisoryV2 + template_name = "advisory_detail.html" + slug_url_kwarg = "id" + slug_field = "id" + + def get_queryset(self): + return ( + super() + .get_queryset() + .select_related() + .prefetch_related( + Prefetch( + "references", + queryset=models.AdvisoryReference.objects.only( + "reference_id", "reference_type", "url" + ), + ), + Prefetch( + "aliases", + queryset=models.AdvisoryAlias.objects.only("alias"), + ), + Prefetch( + "weaknesses", + queryset=models.AdvisoryWeakness.objects.only("cwe_id"), + ), + Prefetch( + "severities", + queryset=models.AdvisorySeverity.objects.only( + "scoring_system", "value", "url", "scoring_elements", "published_at" + ), + ), + Prefetch( + "exploits", + queryset=models.AdvisoryExploit.objects.only( + "data_source", "description", "required_action", "due_date", "notes" + ), + ), + ) + ) + + def get_context_data(self, **kwargs): + """ + Build context with preloaded QuerySets and minimize redundant queries. + """ + context = super().get_context_data(**kwargs) + advisory = self.object + + # Pre-fetch and process data in Python instead of the template + weaknesses_present_in_db = [ + weakness_object + for weakness_object in advisory.weaknesses.all() + if weakness_object.weakness + ] + + valid_severities = self.object.severities.exclude(scoring_system=EPSS.identifier).filter( + scoring_elements__isnull=False, scoring_system__in=SCORING_SYSTEMS.keys() + ) + + severity_vectors = [] + + for severity in valid_severities: + try: + vector_values_system = SCORING_SYSTEMS.get(severity.scoring_system) + if not vector_values_system: + logging.error(f"Unknown scoring system: {severity.scoring_system}") + continue + if vector_values_system.identifier in ["cvssv3.1_qr"]: + continue + vector_values = vector_values_system.get(severity.scoring_elements) + if vector_values: + severity_vectors.append({"vector": vector_values, "origin": severity.url}) + logging.error(f"Error processing scoring elements: {severity.scoring_elements}") + except ( + CVSS2MalformedError, + CVSS3MalformedError, + CVSS4MalformedError, + NotImplementedError, + ): + logging.error(f"CVSSMalformedError for {severity.scoring_elements}") + + epss_severity = advisory.severities.filter(scoring_system="epss").first() + epss_data = None + if epss_severity: + epss_data = { + "percentile": epss_severity.scoring_elements, + "score": epss_severity.value, + "published_at": epss_severity.published_at, + } + print(severity_vectors) + context.update( + { + "advisory": advisory, + "severities": list(advisory.severities.all()), + "severity_vectors": severity_vectors, + "references": list(advisory.references.all()), + "aliases": list(advisory.aliases.all()), + "weaknesses": weaknesses_present_in_db, + "status": advisory.get_status_label, + # "history": advisory.history, + "epss_data": epss_data, + } + ) + return context + + class HomePage(View): template_name = "index.html" @@ -245,6 +441,19 @@ def get(self, request): return render(request=request, template_name=self.template_name, context=context) +class HomePageV2(View): + template_name = "index_v2.html" + + def get(self, request): + request_query = request.GET + context = { + "vulnerability_search_form": AdvisorySearchForm(request_query), + "package_search_form": PackageSearchForm(request_query), + "release_url": f"https://github.com/aboutcode-org/vulnerablecode/releases/tag/v{VULNERABLECODE_VERSION}", + } + return render(request=request, template_name=self.template_name, context=context) + + email_template = """ Dear VulnerableCode.io user: @@ -353,6 +562,58 @@ def get_context_data(self, **kwargs): return context +class AdvisoryPackagesDetails(DetailView): + """ + View to display all packages affected by or fixing a specific vulnerability. + URL: /advisories/{id}/packages + """ + + model = models.AdvisoryV2 + template_name = "advisory_package_details.html" + slug_url_kwarg = "id" + slug_field = "id" + + def get_queryset(self): + """ + Prefetch and optimize related data to minimize database hits. + """ + return ( + super() + .get_queryset() + .prefetch_related( + Prefetch( + "affecting_packages", + queryset=models.PackageV2.objects.only("type", "namespace", "name", "version"), + ), + Prefetch( + "fixed_by_packages", + queryset=models.PackageV2.objects.only("type", "namespace", "name", "version"), + ), + ) + ) + + def get_context_data(self, **kwargs): + """ + Build context with preloaded QuerySets and minimize redundant queries. + """ + context = super().get_context_data(**kwargs) + advisory = self.object + ( + sorted_fixed_by_packages, + sorted_affected_packages, + all_affected_fixed_by_matches, + ) = advisory.aggregate_fixed_and_affected_packages() + context.update( + { + "affected_packages": sorted_affected_packages, + "fixed_by_packages": sorted_fixed_by_packages, + "all_affected_fixed_by_matches": all_affected_fixed_by_matches, + "advisory": advisory, + } + ) + return context + + class PipelineScheduleListView(ListView, FormMixin): model = PipelineSchedule context_object_name = "schedule_list" diff --git a/vulnerablecode/urls.py b/vulnerablecode/urls.py index 45a03a28d..245b8e917 100644 --- a/vulnerablecode/urls.py +++ b/vulnerablecode/urls.py @@ -20,15 +20,21 @@ from vulnerabilities.api import CPEViewSet from vulnerabilities.api import PackageViewSet from vulnerabilities.api import VulnerabilityViewSet +from vulnerabilities.api_v2 import AdvisoriesPackageV2ViewSet from vulnerabilities.api_v2 import CodeFixViewSet from vulnerabilities.api_v2 import PackageV2ViewSet from vulnerabilities.api_v2 import PipelineScheduleV2ViewSet from vulnerabilities.api_v2 import VulnerabilityV2ViewSet from vulnerabilities.views import AdminLoginView +from vulnerabilities.views import AdvisoryDetails +from vulnerabilities.views import AdvisoryPackagesDetails from vulnerabilities.views import ApiUserCreateView from vulnerabilities.views import HomePage +from vulnerabilities.views import HomePageV2 from vulnerabilities.views import PackageDetails from vulnerabilities.views import PackageSearch +from vulnerabilities.views import PackageSearchV2 +from vulnerabilities.views import PackageV2Details from vulnerabilities.views import PipelineRunDetailView from vulnerabilities.views import PipelineRunListView from vulnerabilities.views import PipelineScheduleListView @@ -55,6 +61,9 @@ def __init__(self, *args, **kwargs): api_v2_router = OptionalSlashRouter() api_v2_router.register("packages", PackageV2ViewSet, basename="package-v2") +api_v2_router.register( + "advisories-packages", AdvisoriesPackageV2ViewSet, basename="advisories-package-v2" +) api_v2_router.register("vulnerabilities", VulnerabilityV2ViewSet, basename="vulnerability-v2") api_v2_router.register("codefixes", CodeFixViewSet, basename="codefix") api_v2_router.register("schedule", PipelineScheduleV2ViewSet, basename="schedule") @@ -87,16 +96,36 @@ def __init__(self, *args, **kwargs): PipelineRunDetailView.as_view(), name="run-details", ), + path( + "v2", + HomePageV2.as_view(), + name="home", + ), + path( + "advisories/", + AdvisoryDetails.as_view(), + name="advisory_details", + ), path( "packages/search/", PackageSearch.as_view(), name="package_search", ), + path( + "packages/v2/search/", + PackageSearchV2.as_view(), + name="package_search_v2", + ), re_path( r"^packages/(?Ppkg:.+)$", PackageDetails.as_view(), name="package_details", ), + re_path( + r"^packages/v2/(?Ppkg:.+)$", + PackageV2Details.as_view(), + name="package_details_v2", + ), path( "vulnerabilities/search/", VulnerabilitySearch.as_view(), @@ -112,6 +141,11 @@ def __init__(self, *args, **kwargs): VulnerabilityPackagesDetails.as_view(), name="vulnerability_package_details", ), + path( + "advisories//packages", + AdvisoryPackagesDetails.as_view(), + name="advisory_package_details", + ), path( "api/", include(api_router.urls), From 42673a76e181d08c32f23d8bec9c28f5a3e20028 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Wed, 11 Jun 2025 14:03:38 +0530 Subject: [PATCH 021/390] Throttle API users based on user group Signed-off-by: Keshav Priyadarshi --- vulnerabilities/api.py | 6 +++--- vulnerabilities/api_extension.py | 10 +++++----- vulnerabilities/models.py | 5 +++++ vulnerabilities/throttling.py | 25 +++++++++++++++++++------ vulnerablecode/settings.py | 17 ++++++++++++----- 5 files changed, 44 insertions(+), 19 deletions(-) diff --git a/vulnerabilities/api.py b/vulnerabilities/api.py index 1fd480ce9..d23dd7adb 100644 --- a/vulnerabilities/api.py +++ b/vulnerabilities/api.py @@ -34,7 +34,7 @@ from vulnerabilities.models import get_purl_query_lookups from vulnerabilities.severity_systems import EPSS from vulnerabilities.severity_systems import SCORING_SYSTEMS -from vulnerabilities.throttling import StaffUserRateThrottle +from vulnerabilities.throttling import GroupUserRateThrottle from vulnerabilities.utils import get_severity_range @@ -471,7 +471,7 @@ class PackageViewSet(viewsets.ReadOnlyModelViewSet): serializer_class = PackageSerializer filter_backends = (filters.DjangoFilterBackend,) filterset_class = PackageFilterSet - throttle_classes = [StaffUserRateThrottle, AnonRateThrottle] + throttle_classes = [AnonRateThrottle, GroupUserRateThrottle] def get_queryset(self): return super().get_queryset().with_is_vulnerable() @@ -688,7 +688,7 @@ def get_queryset(self): serializer_class = VulnerabilitySerializer filter_backends = (filters.DjangoFilterBackend,) filterset_class = VulnerabilityFilterSet - throttle_classes = [StaffUserRateThrottle, AnonRateThrottle] + throttle_classes = [AnonRateThrottle, GroupUserRateThrottle] class CPEFilterSet(filters.FilterSet): diff --git a/vulnerabilities/api_extension.py b/vulnerabilities/api_extension.py index 7a13baf42..df765137c 100644 --- a/vulnerabilities/api_extension.py +++ b/vulnerabilities/api_extension.py @@ -33,7 +33,7 @@ from vulnerabilities.models import VulnerabilitySeverity from vulnerabilities.models import Weakness from vulnerabilities.models import get_purl_query_lookups -from vulnerabilities.throttling import StaffUserRateThrottle +from vulnerabilities.throttling import GroupUserRateThrottle class SerializerExcludeFieldsMixin: @@ -259,7 +259,7 @@ class V2PackageViewSet(viewsets.ReadOnlyModelViewSet): lookup_field = "purl" filter_backends = (filters.DjangoFilterBackend,) filterset_class = V2PackageFilterSet - throttle_classes = [StaffUserRateThrottle, AnonRateThrottle] + throttle_classes = [GroupUserRateThrottle, AnonRateThrottle] def get_queryset(self): return super().get_queryset().with_is_vulnerable().prefetch_related("vulnerabilities") @@ -345,7 +345,7 @@ class VulnerabilityViewSet(viewsets.ReadOnlyModelViewSet): lookup_field = "vulnerability_id" filter_backends = (filters.DjangoFilterBackend,) filterset_class = V2VulnerabilityFilterSet - throttle_classes = [StaffUserRateThrottle, AnonRateThrottle] + throttle_classes = [GroupUserRateThrottle, AnonRateThrottle] def get_queryset(self): """ @@ -381,7 +381,7 @@ class CPEViewSet(viewsets.ReadOnlyModelViewSet): ).distinct() serializer_class = V2VulnerabilitySerializer filter_backends = (filters.DjangoFilterBackend,) - throttle_classes = [StaffUserRateThrottle, AnonRateThrottle] + throttle_classes = [GroupUserRateThrottle, AnonRateThrottle] filterset_class = CPEFilterSet @action(detail=False, methods=["post"]) @@ -420,4 +420,4 @@ class AliasViewSet(viewsets.ReadOnlyModelViewSet): serializer_class = V2VulnerabilitySerializer filter_backends = (filters.DjangoFilterBackend,) filterset_class = AliasFilterSet - throttle_classes = [StaffUserRateThrottle, AnonRateThrottle] + throttle_classes = [GroupUserRateThrottle, AnonRateThrottle] diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index ab01010d7..7a2705e16 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -28,6 +28,7 @@ from cwe2.mappings import xml_database_path from cwe2.weakness import Weakness as DBWeakness from django.contrib.auth import get_user_model +from django.contrib.auth.models import Group from django.contrib.auth.models import UserManager from django.core import exceptions from django.core.exceptions import ValidationError @@ -1472,6 +1473,10 @@ def create_api_user(self, username, first_name="", last_name="", **extra_fields) user.set_unusable_password() user.save() + # Assign the default basic group + default_group, _ = Group.objects.get_or_create(name="silver") + user.groups.add(default_group) + Token._default_manager.get_or_create(user=user) return user diff --git a/vulnerabilities/throttling.py b/vulnerabilities/throttling.py index 99b1d7756..ce3a17176 100644 --- a/vulnerabilities/throttling.py +++ b/vulnerabilities/throttling.py @@ -6,18 +6,31 @@ # See https://github.com/aboutcode-org/vulnerablecode for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # + from rest_framework.exceptions import Throttled from rest_framework.throttling import UserRateThrottle from rest_framework.views import exception_handler -class StaffUserRateThrottle(UserRateThrottle): +class GroupUserRateThrottle(UserRateThrottle): + scope = "bronze" + def allow_request(self, request, view): - """ - Do not apply throttling for superusers and admins. - """ - if request.user.is_superuser or request.user.is_staff: - return True + user = request.user + + if user and user.is_authenticated: + if user.is_superuser or user.is_staff: + return True + + user_groups = user.groups.all() + if any([group.name == "gold" for group in user_groups]): + return True + + if any([group.name == "silver" for group in user_groups]): + self.scope = "silver" + + self.rate = self.THROTTLE_RATES.get(self.scope) + self.num_requests, self.duration = self.parse_rate(self.rate) return super().allow_request(request, view) diff --git a/vulnerablecode/settings.py b/vulnerablecode/settings.py index 6040f99b9..2db44bee8 100644 --- a/vulnerablecode/settings.py +++ b/vulnerablecode/settings.py @@ -190,12 +190,20 @@ LOGIN_REDIRECT_URL = "/" LOGOUT_REDIRECT_URL = "/" -REST_FRAMEWORK_DEFAULT_THROTTLE_RATES = {"anon": "3600/hour", "user": "10800/hour"} +REST_FRAMEWORK_DEFAULT_THROTTLE_RATES = { + # No throttling for users in gold group. + "silver": "10800/hour", + "bronze": "7200/hour", + "anon": "3600/hour", +} if IS_TESTS: VULNERABLECODEIO_REQUIRE_AUTHENTICATION = False - REST_FRAMEWORK_DEFAULT_THROTTLE_RATES = {"anon": "10/day", "user": "20/day"} - + REST_FRAMEWORK_DEFAULT_THROTTLE_RATES = { + "silver": "20/day", + "bronze": "15/day", + "anon": "10/day", + } USE_L10N = True @@ -235,9 +243,8 @@ "rest_framework.filters.SearchFilter", ), "DEFAULT_THROTTLE_CLASSES": [ - "vulnerabilities.throttling.StaffUserRateThrottle", + "vulnerabilities.throttling.GroupUserRateThrottle", "rest_framework.throttling.AnonRateThrottle", - "rest_framework.throttling.UserRateThrottle", ], "DEFAULT_THROTTLE_RATES": REST_FRAMEWORK_DEFAULT_THROTTLE_RATES, "EXCEPTION_HANDLER": "vulnerabilities.throttling.throttled_exception_handler", From 677ff99dd87b9c92327aa199d5728625bfff0c26 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Wed, 11 Jun 2025 14:12:46 +0530 Subject: [PATCH 022/390] Add test for group based throttling Signed-off-by: Keshav Priyadarshi --- vulnerabilities/tests/test_api.py | 4 +- vulnerabilities/tests/test_api_v2.py | 4 +- vulnerabilities/tests/test_throttling.py | 58 +++++++++++++++++++----- 3 files changed, 51 insertions(+), 15 deletions(-) diff --git a/vulnerabilities/tests/test_api.py b/vulnerabilities/tests/test_api.py index a5f80aa06..bad51a121 100644 --- a/vulnerabilities/tests/test_api.py +++ b/vulnerabilities/tests/test_api.py @@ -452,7 +452,7 @@ def add_aliases(vuln, aliases): class APIPerformanceTest(TestCase): def setUp(self): - self.user = ApiUser.objects.create_api_user(username="e@mail.com") + self.user = ApiUser.objects.create_api_user(username="e@mail.com", is_staff=True) self.auth = f"Token {self.user.auth_token.key}" self.csrf_client = APIClient(enforce_csrf_checks=True) self.csrf_client.credentials(HTTP_AUTHORIZATION=self.auth) @@ -572,7 +572,7 @@ def test_api_packages_bulk_lookup(self): class APITestCasePackage(TestCase): def setUp(self): - self.user = ApiUser.objects.create_api_user(username="e@mail.com") + self.user = ApiUser.objects.create_api_user(username="e@mail.com", is_staff=True) self.auth = f"Token {self.user.auth_token.key}" self.csrf_client = APIClient(enforce_csrf_checks=True) self.csrf_client.credentials(HTTP_AUTHORIZATION=self.auth) diff --git a/vulnerabilities/tests/test_api_v2.py b/vulnerabilities/tests/test_api_v2.py index 071a4450c..ff7f53bdf 100644 --- a/vulnerabilities/tests/test_api_v2.py +++ b/vulnerabilities/tests/test_api_v2.py @@ -61,7 +61,7 @@ def setUp(self): ) self.reference2.vulnerabilities.add(self.vuln2) - self.user = ApiUser.objects.create_api_user(username="e@mail.com") + self.user = ApiUser.objects.create_api_user(username="e@mail.com", is_staff=True) self.auth = f"Token {self.user.auth_token.key}" self.client = APIClient(enforce_csrf_checks=True) self.client.credentials(HTTP_AUTHORIZATION=self.auth) @@ -210,7 +210,7 @@ def setUp(self): self.package1.affected_by_vulnerabilities.add(self.vuln1) self.package2.fixing_vulnerabilities.add(self.vuln2) - self.user = ApiUser.objects.create_api_user(username="e@mail.com") + self.user = ApiUser.objects.create_api_user(username="e@mail.com", is_staff=True) self.auth = f"Token {self.user.auth_token.key}" self.client = APIClient(enforce_csrf_checks=True) self.client.credentials(HTTP_AUTHORIZATION=self.auth) diff --git a/vulnerabilities/tests/test_throttling.py b/vulnerabilities/tests/test_throttling.py index 174761045..8be404db2 100644 --- a/vulnerabilities/tests/test_throttling.py +++ b/vulnerabilities/tests/test_throttling.py @@ -9,6 +9,7 @@ import json +from django.contrib.auth.models import Group from django.core.cache import cache from rest_framework.test import APIClient from rest_framework.test import APITestCase @@ -16,18 +17,35 @@ from vulnerabilities.models import ApiUser -class ThrottleApiTests(APITestCase): +class GroupUserRateThrottleApiTests(APITestCase): def setUp(self): # Reset the api throttling to properly test the rate limit on anon users. # DRF stores throttling state in cache, clear cache to reset throttling. # See https://www.django-rest-framework.org/api-guide/throttling/#setting-up-the-cache cache.clear() - # create a basic user - self.user = ApiUser.objects.create_api_user(username="e@mail.com") - self.auth = f"Token {self.user.auth_token.key}" - self.csrf_client = APIClient(enforce_csrf_checks=True) - self.csrf_client.credentials(HTTP_AUTHORIZATION=self.auth) + # User in bronze group + self.bronze_user = ApiUser.objects.create_api_user(username="bronze@mail.com") + bronze, _ = Group.objects.get_or_create(name="bronze") + self.bronze_user.groups.clear() + self.bronze_user.groups.add(bronze) + self.bronze_auth = f"Token {self.bronze_user.auth_token.key}" + self.bronze_user_csrf_client = APIClient(enforce_csrf_checks=True) + self.bronze_user_csrf_client.credentials(HTTP_AUTHORIZATION=self.bronze_auth) + + # User in silver group (default group for api user) + self.silver_user = ApiUser.objects.create_api_user(username="silver@mail.com") + self.silver_auth = f"Token {self.silver_user.auth_token.key}" + self.silver_user_csrf_client = APIClient(enforce_csrf_checks=True) + self.silver_user_csrf_client.credentials(HTTP_AUTHORIZATION=self.silver_auth) + + # User in gold group + self.gold_user = ApiUser.objects.create_api_user(username="gold@mail.com") + gold, _ = Group.objects.get_or_create(name="gold") + self.gold_user.groups.add(gold) + self.gold_auth = f"Token {self.gold_user.auth_token.key}" + self.gold_user_csrf_client = APIClient(enforce_csrf_checks=True) + self.gold_user_csrf_client.credentials(HTTP_AUTHORIZATION=self.gold_auth) # create a staff user self.staff_user = ApiUser.objects.create_api_user(username="staff@mail.com", is_staff=True) @@ -39,16 +57,34 @@ def setUp(self): self.csrf_client_anon_1 = APIClient(enforce_csrf_checks=True) def test_package_endpoint_throttling(self): - for i in range(0, 20): - response = self.csrf_client.get("/api/packages") + for i in range(0, 15): + response = self.bronze_user_csrf_client.get("/api/packages") self.assertEqual(response.status_code, 200) - response = self.staff_csrf_client.get("/api/packages") + + response = self.bronze_user_csrf_client.get("/api/packages") + # 429 - too many requests for bronze user + self.assertEqual(response.status_code, 429) + + for i in range(0, 20): + response = self.silver_user_csrf_client.get("/api/packages") self.assertEqual(response.status_code, 200) - response = self.csrf_client.get("/api/packages") - # 429 - too many requests for basic user + response = self.silver_user_csrf_client.get("/api/packages") + # 429 - too many requests for silver user self.assertEqual(response.status_code, 429) + for i in range(0, 30): + response = self.gold_user_csrf_client.get("/api/packages") + self.assertEqual(response.status_code, 200) + + response = self.gold_user_csrf_client.get("/api/packages", format="json") + # 200 - gold user can access API unlimited times + self.assertEqual(response.status_code, 200) + + for i in range(0, 30): + response = self.staff_csrf_client.get("/api/packages") + self.assertEqual(response.status_code, 200) + response = self.staff_csrf_client.get("/api/packages", format="json") # 200 - staff user can access API unlimited times self.assertEqual(response.status_code, 200) From 8e9607b8b79aa03868f0f9b248c42cf70a762fbb Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Fri, 13 Jun 2025 17:30:07 +0530 Subject: [PATCH 023/390] Throttle API requests based on user permissions Signed-off-by: Keshav Priyadarshi --- vulnerabilities/api.py | 6 ++--- vulnerabilities/api_extension.py | 10 ++++---- .../migrations/0093_alter_apiuser_options.py | 23 +++++++++++++++++++ vulnerabilities/models.py | 13 +++++------ vulnerabilities/throttling.py | 20 ++++++---------- vulnerablecode/settings.py | 15 +++--------- 6 files changed, 47 insertions(+), 40 deletions(-) create mode 100644 vulnerabilities/migrations/0093_alter_apiuser_options.py diff --git a/vulnerabilities/api.py b/vulnerabilities/api.py index d23dd7adb..50403583d 100644 --- a/vulnerabilities/api.py +++ b/vulnerabilities/api.py @@ -34,7 +34,7 @@ from vulnerabilities.models import get_purl_query_lookups from vulnerabilities.severity_systems import EPSS from vulnerabilities.severity_systems import SCORING_SYSTEMS -from vulnerabilities.throttling import GroupUserRateThrottle +from vulnerabilities.throttling import PermissionBasedUserRateThrottle from vulnerabilities.utils import get_severity_range @@ -471,7 +471,7 @@ class PackageViewSet(viewsets.ReadOnlyModelViewSet): serializer_class = PackageSerializer filter_backends = (filters.DjangoFilterBackend,) filterset_class = PackageFilterSet - throttle_classes = [AnonRateThrottle, GroupUserRateThrottle] + throttle_classes = [AnonRateThrottle, PermissionBasedUserRateThrottle] def get_queryset(self): return super().get_queryset().with_is_vulnerable() @@ -688,7 +688,7 @@ def get_queryset(self): serializer_class = VulnerabilitySerializer filter_backends = (filters.DjangoFilterBackend,) filterset_class = VulnerabilityFilterSet - throttle_classes = [AnonRateThrottle, GroupUserRateThrottle] + throttle_classes = [AnonRateThrottle, PermissionBasedUserRateThrottle] class CPEFilterSet(filters.FilterSet): diff --git a/vulnerabilities/api_extension.py b/vulnerabilities/api_extension.py index df765137c..89ee644bf 100644 --- a/vulnerabilities/api_extension.py +++ b/vulnerabilities/api_extension.py @@ -33,7 +33,7 @@ from vulnerabilities.models import VulnerabilitySeverity from vulnerabilities.models import Weakness from vulnerabilities.models import get_purl_query_lookups -from vulnerabilities.throttling import GroupUserRateThrottle +from vulnerabilities.throttling import PermissionBasedUserRateThrottle class SerializerExcludeFieldsMixin: @@ -259,7 +259,7 @@ class V2PackageViewSet(viewsets.ReadOnlyModelViewSet): lookup_field = "purl" filter_backends = (filters.DjangoFilterBackend,) filterset_class = V2PackageFilterSet - throttle_classes = [GroupUserRateThrottle, AnonRateThrottle] + throttle_classes = [PermissionBasedUserRateThrottle, AnonRateThrottle] def get_queryset(self): return super().get_queryset().with_is_vulnerable().prefetch_related("vulnerabilities") @@ -345,7 +345,7 @@ class VulnerabilityViewSet(viewsets.ReadOnlyModelViewSet): lookup_field = "vulnerability_id" filter_backends = (filters.DjangoFilterBackend,) filterset_class = V2VulnerabilityFilterSet - throttle_classes = [GroupUserRateThrottle, AnonRateThrottle] + throttle_classes = [PermissionBasedUserRateThrottle, AnonRateThrottle] def get_queryset(self): """ @@ -381,7 +381,7 @@ class CPEViewSet(viewsets.ReadOnlyModelViewSet): ).distinct() serializer_class = V2VulnerabilitySerializer filter_backends = (filters.DjangoFilterBackend,) - throttle_classes = [GroupUserRateThrottle, AnonRateThrottle] + throttle_classes = [PermissionBasedUserRateThrottle, AnonRateThrottle] filterset_class = CPEFilterSet @action(detail=False, methods=["post"]) @@ -420,4 +420,4 @@ class AliasViewSet(viewsets.ReadOnlyModelViewSet): serializer_class = V2VulnerabilitySerializer filter_backends = (filters.DjangoFilterBackend,) filterset_class = AliasFilterSet - throttle_classes = [GroupUserRateThrottle, AnonRateThrottle] + throttle_classes = [PermissionBasedUserRateThrottle, AnonRateThrottle] diff --git a/vulnerabilities/migrations/0093_alter_apiuser_options.py b/vulnerabilities/migrations/0093_alter_apiuser_options.py new file mode 100644 index 000000000..771a3779b --- /dev/null +++ b/vulnerabilities/migrations/0093_alter_apiuser_options.py @@ -0,0 +1,23 @@ +# Generated by Django 4.2.22 on 2025-06-13 08:07 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ("vulnerabilities", "0092_pipelineschedule_pipelinerun"), + ] + + operations = [ + migrations.AlterModelOptions( + name="apiuser", + options={ + "permissions": [ + ("throttle_unrestricted", "Exempt from API throttling limits"), + ("throttle_18000_hour", "Can make 18000 API requests per hour"), + ("throttle_14400_hour", "Can make 14400 API requests per hour"), + ] + }, + ), + ] diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index 7a2705e16..6a792e80b 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -1473,10 +1473,6 @@ def create_api_user(self, username, first_name="", last_name="", **extra_fields) user.set_unusable_password() user.save() - # Assign the default basic group - default_group, _ = Group.objects.get_or_create(name="silver") - user.groups.add(default_group) - Token._default_manager.get_or_create(user=user) return user @@ -1494,14 +1490,17 @@ def _validate_username(self, email): class ApiUser(UserModel): - """ - A User proxy model to facilitate simplified admin API user creation. - """ + """A User proxy model to facilitate simplified admin API user creation.""" objects = ApiUserManager() class Meta: proxy = True + permissions = [ + ("throttle_unrestricted", "Exempt from API throttling limits"), + ("throttle_18000_hour", "Can make 18000 API requests per hour"), + ("throttle_14400_hour", "Can make 14400 API requests per hour"), + ] class ChangeLog(models.Model): diff --git a/vulnerabilities/throttling.py b/vulnerabilities/throttling.py index ce3a17176..d6b0840eb 100644 --- a/vulnerabilities/throttling.py +++ b/vulnerabilities/throttling.py @@ -12,25 +12,19 @@ from rest_framework.views import exception_handler -class GroupUserRateThrottle(UserRateThrottle): - scope = "bronze" - +class PermissionBasedUserRateThrottle(UserRateThrottle): def allow_request(self, request, view): user = request.user if user and user.is_authenticated: - if user.is_superuser or user.is_staff: - return True - - user_groups = user.groups.all() - if any([group.name == "gold" for group in user_groups]): + if user.has_perm("vulnerabilities.throttle_unrestricted"): return True + elif user.has_perm("vulnerabilities.throttle_18000_hour"): + self.rate = "18000/hour" + elif user.has_perm("vulnerabilities.throttle_14400_hour"): + self.rate = "14400/hour" - if any([group.name == "silver" for group in user_groups]): - self.scope = "silver" - - self.rate = self.THROTTLE_RATES.get(self.scope) - self.num_requests, self.duration = self.parse_rate(self.rate) + self.num_requests, self.duration = self.parse_rate(self.rate) return super().allow_request(request, view) diff --git a/vulnerablecode/settings.py b/vulnerablecode/settings.py index 2db44bee8..63810397c 100644 --- a/vulnerablecode/settings.py +++ b/vulnerablecode/settings.py @@ -190,20 +190,11 @@ LOGIN_REDIRECT_URL = "/" LOGOUT_REDIRECT_URL = "/" -REST_FRAMEWORK_DEFAULT_THROTTLE_RATES = { - # No throttling for users in gold group. - "silver": "10800/hour", - "bronze": "7200/hour", - "anon": "3600/hour", -} +REST_FRAMEWORK_DEFAULT_THROTTLE_RATES = {"anon": "3600/hour", "user": "10800/hour"} + if IS_TESTS: VULNERABLECODEIO_REQUIRE_AUTHENTICATION = False - REST_FRAMEWORK_DEFAULT_THROTTLE_RATES = { - "silver": "20/day", - "bronze": "15/day", - "anon": "10/day", - } USE_L10N = True @@ -243,7 +234,7 @@ "rest_framework.filters.SearchFilter", ), "DEFAULT_THROTTLE_CLASSES": [ - "vulnerabilities.throttling.GroupUserRateThrottle", + "vulnerabilities.throttling.PermissionBasedUserRateThrottle", "rest_framework.throttling.AnonRateThrottle", ], "DEFAULT_THROTTLE_RATES": REST_FRAMEWORK_DEFAULT_THROTTLE_RATES, From dba5b6c6c4b583f77bbb2cb81bda9a6df4d05123 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Fri, 13 Jun 2025 17:37:56 +0530 Subject: [PATCH 024/390] Enable throttling for v2 API endpoint Signed-off-by: Keshav Priyadarshi --- vulnerabilities/api_v2.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/vulnerabilities/api_v2.py b/vulnerabilities/api_v2.py index 4915dda63..e9f967b79 100644 --- a/vulnerabilities/api_v2.py +++ b/vulnerabilities/api_v2.py @@ -23,6 +23,7 @@ from rest_framework.permissions import BasePermission from rest_framework.response import Response from rest_framework.reverse import reverse +from rest_framework.throttling import AnonRateThrottle from vulnerabilities.models import AdvisoryReference from vulnerabilities.models import AdvisorySeverity @@ -38,6 +39,7 @@ from vulnerabilities.models import VulnerabilityReference from vulnerabilities.models import VulnerabilitySeverity from vulnerabilities.models import Weakness +from vulnerabilities.throttling import PermissionBasedUserRateThrottle class WeaknessV2Serializer(serializers.ModelSerializer): @@ -199,6 +201,7 @@ class VulnerabilityV2ViewSet(viewsets.ReadOnlyModelViewSet): queryset = Vulnerability.objects.all() serializer_class = VulnerabilityV2Serializer lookup_field = "vulnerability_id" + throttle_classes = [AnonRateThrottle, PermissionBasedUserRateThrottle] def get_queryset(self): queryset = super().get_queryset() @@ -394,6 +397,7 @@ class PackageV2ViewSet(viewsets.ReadOnlyModelViewSet): serializer_class = PackageV2Serializer filter_backends = (filters.DjangoFilterBackend,) filterset_class = PackageV2FilterSet + throttle_classes = [AnonRateThrottle, PermissionBasedUserRateThrottle] def get_queryset(self): queryset = super().get_queryset() @@ -721,6 +725,7 @@ class CodeFixViewSet(viewsets.ReadOnlyModelViewSet): queryset = CodeFix.objects.all() serializer_class = CodeFixSerializer + throttle_classes = [AnonRateThrottle, PermissionBasedUserRateThrottle] def get_queryset(self): """ @@ -863,6 +868,7 @@ class PipelineScheduleV2ViewSet(CreateListRetrieveUpdateViewSet): serializer_class = PipelineScheduleAPISerializer lookup_field = "pipeline_id" lookup_value_regex = r"[\w.]+" + throttle_classes = [AnonRateThrottle, PermissionBasedUserRateThrottle] def get_serializer_class(self): if self.action == "create": From 68a375c84636c642f02e4c83fa081e4b32eaa144 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Fri, 13 Jun 2025 17:41:01 +0530 Subject: [PATCH 025/390] Add tests for user permission based API throttling Signed-off-by: Keshav Priyadarshi --- vulnerabilities/tests/test_api.py | 25 ++-- vulnerabilities/tests/test_api_v2.py | 62 ++++---- vulnerabilities/tests/test_throttling.py | 173 ++++++++++++++--------- 3 files changed, 150 insertions(+), 110 deletions(-) diff --git a/vulnerabilities/tests/test_api.py b/vulnerabilities/tests/test_api.py index bad51a121..9ed647099 100644 --- a/vulnerabilities/tests/test_api.py +++ b/vulnerabilities/tests/test_api.py @@ -11,6 +11,7 @@ import os from urllib.parse import quote +from django.core.cache import cache from django.test import TestCase from django.test import TransactionTestCase from django.test.client import RequestFactory @@ -452,10 +453,8 @@ def add_aliases(vuln, aliases): class APIPerformanceTest(TestCase): def setUp(self): - self.user = ApiUser.objects.create_api_user(username="e@mail.com", is_staff=True) - self.auth = f"Token {self.user.auth_token.key}" + cache.clear() self.csrf_client = APIClient(enforce_csrf_checks=True) - self.csrf_client.credentials(HTTP_AUTHORIZATION=self.auth) # This setup creates the following data: # vulnerabilities: vul1, vul2, vul3 @@ -503,7 +502,7 @@ def setUp(self): set_as_fixing(package=self.pkg_2_13_2, vulnerability=self.vul1) def test_api_packages_all_num_queries(self): - with self.assertNumQueries(4): + with self.assertNumQueries(3): # There are 4 queries: # 1. SAVEPOINT # 2. Authenticating user @@ -519,22 +518,22 @@ def test_api_packages_all_num_queries(self): ] def test_api_packages_single_num_queries(self): - with self.assertNumQueries(8): + with self.assertNumQueries(7): self.csrf_client.get(f"/api/packages/{self.pkg_2_14_0_rc1.id}", format="json") def test_api_packages_single_with_purl_in_query_num_queries(self): - with self.assertNumQueries(9): + with self.assertNumQueries(8): self.csrf_client.get(f"/api/packages/?purl={self.pkg_2_14_0_rc1.purl}", format="json") def test_api_packages_single_with_purl_no_version_in_query_num_queries(self): - with self.assertNumQueries(64): + with self.assertNumQueries(63): self.csrf_client.get( f"/api/packages/?purl=pkg:maven/com.fasterxml.jackson.core/jackson-databind", format="json", ) def test_api_packages_bulk_search(self): - with self.assertNumQueries(45): + with self.assertNumQueries(44): packages = [self.pkg_2_12_6, self.pkg_2_12_6_1, self.pkg_2_13_1] purls = [p.purl for p in packages] @@ -547,7 +546,7 @@ def test_api_packages_bulk_search(self): ).json() def test_api_packages_with_lookup(self): - with self.assertNumQueries(14): + with self.assertNumQueries(13): data = {"purl": self.pkg_2_12_6.purl} resp = self.csrf_client.post( @@ -557,7 +556,7 @@ def test_api_packages_with_lookup(self): ).json() def test_api_packages_bulk_lookup(self): - with self.assertNumQueries(45): + with self.assertNumQueries(44): packages = [self.pkg_2_12_6, self.pkg_2_12_6_1, self.pkg_2_13_1] purls = [p.purl for p in packages] @@ -572,10 +571,8 @@ def test_api_packages_bulk_lookup(self): class APITestCasePackage(TestCase): def setUp(self): - self.user = ApiUser.objects.create_api_user(username="e@mail.com", is_staff=True) - self.auth = f"Token {self.user.auth_token.key}" + cache.clear() self.csrf_client = APIClient(enforce_csrf_checks=True) - self.csrf_client.credentials(HTTP_AUTHORIZATION=self.auth) # This setup creates the following data: # vulnerabilities: vul1, vul2, vul3 @@ -766,7 +763,7 @@ def test_api_with_wrong_namespace_filter(self): self.assertEqual(response["count"], 0) def test_api_with_all_vulnerable_packages(self): - with self.assertNumQueries(4): + with self.assertNumQueries(3): # There are 4 queries: # 1. SAVEPOINT # 2. Authenticating user diff --git a/vulnerabilities/tests/test_api_v2.py b/vulnerabilities/tests/test_api_v2.py index ff7f53bdf..6bdfa77f8 100644 --- a/vulnerabilities/tests/test_api_v2.py +++ b/vulnerabilities/tests/test_api_v2.py @@ -61,10 +61,8 @@ def setUp(self): ) self.reference2.vulnerabilities.add(self.vuln2) - self.user = ApiUser.objects.create_api_user(username="e@mail.com", is_staff=True) - self.auth = f"Token {self.user.auth_token.key}" + cache.clear() self.client = APIClient(enforce_csrf_checks=True) - self.client.credentials(HTTP_AUTHORIZATION=self.auth) def test_list_vulnerabilities(self): """ @@ -73,7 +71,7 @@ def test_list_vulnerabilities(self): """ url = reverse("vulnerability-v2-list") response = self.client.get(url, format="json") - with self.assertNumQueries(5): + with self.assertNumQueries(4): response = self.client.get(url, format="json") self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertIn("results", response.data) @@ -88,7 +86,7 @@ def test_retrieve_vulnerability_detail(self): Test retrieving vulnerability details by vulnerability_id. """ url = reverse("vulnerability-v2-detail", kwargs={"vulnerability_id": "VCID-1234"}) - with self.assertNumQueries(8): + with self.assertNumQueries(7): response = self.client.get(url, format="json") self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertEqual(response.data["vulnerability_id"], "VCID-1234") @@ -102,7 +100,7 @@ def test_filter_vulnerability_by_vulnerability_id(self): Test filtering vulnerabilities by vulnerability_id. """ url = reverse("vulnerability-v2-list") - with self.assertNumQueries(4): + with self.assertNumQueries(3): response = self.client.get(url, {"vulnerability_id": "VCID-1234"}, format="json") self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertEqual(response.data["vulnerability_id"], "VCID-1234") @@ -112,7 +110,7 @@ def test_filter_vulnerability_by_alias(self): Test filtering vulnerabilities by alias. """ url = reverse("vulnerability-v2-list") - with self.assertNumQueries(5): + with self.assertNumQueries(4): response = self.client.get(url, {"alias": "CVE-2021-5678"}, format="json") self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertIn("results", response.data) @@ -127,7 +125,7 @@ def test_filter_vulnerabilities_multiple_ids(self): Test filtering vulnerabilities by multiple vulnerability_ids. """ url = reverse("vulnerability-v2-list") - with self.assertNumQueries(5): + with self.assertNumQueries(4): response = self.client.get( url, {"vulnerability_id": ["VCID-1234", "VCID-5678"]}, format="json" ) @@ -139,7 +137,7 @@ def test_filter_vulnerabilities_multiple_aliases(self): Test filtering vulnerabilities by multiple aliases. """ url = reverse("vulnerability-v2-list") - with self.assertNumQueries(5): + with self.assertNumQueries(4): response = self.client.get( url, {"alias": ["CVE-2021-1234", "CVE-2021-5678"]}, format="json" ) @@ -152,7 +150,7 @@ def test_invalid_vulnerability_id(self): Should return 404 Not Found. """ url = reverse("vulnerability-v2-detail", kwargs={"vulnerability_id": "VCID-9999"}) - with self.assertNumQueries(5): + with self.assertNumQueries(4): response = self.client.get(url, format="json") self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) @@ -210,10 +208,8 @@ def setUp(self): self.package1.affected_by_vulnerabilities.add(self.vuln1) self.package2.fixing_vulnerabilities.add(self.vuln2) - self.user = ApiUser.objects.create_api_user(username="e@mail.com", is_staff=True) - self.auth = f"Token {self.user.auth_token.key}" + cache.clear() self.client = APIClient(enforce_csrf_checks=True) - self.client.credentials(HTTP_AUTHORIZATION=self.auth) def test_list_packages(self): """ @@ -221,7 +217,7 @@ def test_list_packages(self): Should return a list of packages with their details and associated vulnerabilities. """ url = reverse("package-v2-list") - with self.assertNumQueries(32): + with self.assertNumQueries(31): response = self.client.get(url, format="json") self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertIn("results", response.data) @@ -243,7 +239,7 @@ def test_filter_packages_by_purl(self): Test filtering packages by one or more PURLs. """ url = reverse("package-v2-list") - with self.assertNumQueries(20): + with self.assertNumQueries(19): response = self.client.get(url, {"purl": "pkg:pypi/django@3.2"}, format="json") self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertEqual(len(response.data["results"]["packages"]), 1) @@ -254,7 +250,7 @@ def test_filter_packages_by_affected_vulnerability(self): Test filtering packages by affected_by_vulnerability. """ url = reverse("package-v2-list") - with self.assertNumQueries(20): + with self.assertNumQueries(19): response = self.client.get( url, {"affected_by_vulnerability": "VCID-1234"}, format="json" ) @@ -267,7 +263,7 @@ def test_filter_packages_by_fixing_vulnerability(self): Test filtering packages by fixing_vulnerability. """ url = reverse("package-v2-list") - with self.assertNumQueries(18): + with self.assertNumQueries(17): response = self.client.get(url, {"fixing_vulnerability": "VCID-5678"}, format="json") self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertEqual(len(response.data["results"]["packages"]), 1) @@ -356,7 +352,7 @@ def test_invalid_vulnerability_filter(self): Should return an empty list. """ url = reverse("package-v2-list") - with self.assertNumQueries(4): + with self.assertNumQueries(3): response = self.client.get( url, {"affected_by_vulnerability": "VCID-9999"}, format="json" ) @@ -369,7 +365,7 @@ def test_invalid_purl_filter(self): Should return an empty list. """ url = reverse("package-v2-list") - with self.assertNumQueries(4): + with self.assertNumQueries(3): response = self.client.get( url, {"purl": "pkg:nonexistent/package@1.0.0"}, format="json" ) @@ -421,7 +417,7 @@ def test_bulk_lookup_with_valid_purls(self): """ url = reverse("package-v2-bulk-lookup") data = {"purls": ["pkg:pypi/django@3.2", "pkg:npm/lodash@4.17.20"]} - with self.assertNumQueries(28): + with self.assertNumQueries(27): response = self.client.post(url, data, format="json") self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertIn("packages", response.data) @@ -446,7 +442,7 @@ def test_bulk_lookup_with_invalid_purls(self): """ url = reverse("package-v2-bulk-lookup") data = {"purls": ["pkg:pypi/nonexistent@1.0.0", "pkg:npm/unknown@0.0.1"]} - with self.assertNumQueries(4): + with self.assertNumQueries(3): response = self.client.post(url, data, format="json") self.assertEqual(response.status_code, status.HTTP_200_OK) # Since the packages don't exist, the response should be empty @@ -460,7 +456,7 @@ def test_bulk_lookup_with_empty_purls(self): """ url = reverse("package-v2-bulk-lookup") data = {"purls": []} - with self.assertNumQueries(3): + with self.assertNumQueries(2): response = self.client.post(url, data, format="json") self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) self.assertIn("error", response.data) @@ -474,7 +470,7 @@ def test_bulk_search_with_valid_purls(self): """ url = reverse("package-v2-bulk-search") data = {"purls": ["pkg:pypi/django@3.2", "pkg:npm/lodash@4.17.20"]} - with self.assertNumQueries(28): + with self.assertNumQueries(27): response = self.client.post(url, data, format="json") self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertIn("packages", response.data) @@ -502,7 +498,7 @@ def test_bulk_search_with_purl_only_true(self): "purls": ["pkg:pypi/django@3.2", "pkg:npm/lodash@4.17.20"], "purl_only": True, } - with self.assertNumQueries(17): + with self.assertNumQueries(16): response = self.client.post(url, data, format="json") self.assertEqual(response.status_code, status.HTTP_200_OK) # Since purl_only=True, response should be a list of PURLs @@ -529,7 +525,7 @@ def test_bulk_search_with_plain_purl_true(self): "purls": ["pkg:pypi/django@3.2", "pkg:pypi/django@3.2?extension=tar.gz"], "plain_purl": True, } - with self.assertNumQueries(16): + with self.assertNumQueries(15): response = self.client.post(url, data, format="json") self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertIn("packages", response.data) @@ -550,7 +546,7 @@ def test_bulk_search_with_purl_only_and_plain_purl_true(self): "purl_only": True, "plain_purl": True, } - with self.assertNumQueries(11): + with self.assertNumQueries(10): response = self.client.post(url, data, format="json") self.assertEqual(response.status_code, status.HTTP_200_OK) # Response should be a list of plain PURLs @@ -566,7 +562,7 @@ def test_bulk_search_with_invalid_purls(self): """ url = reverse("package-v2-bulk-search") data = {"purls": ["pkg:pypi/nonexistent@1.0.0", "pkg:npm/unknown@0.0.1"]} - with self.assertNumQueries(4): + with self.assertNumQueries(3): response = self.client.post(url, data, format="json") self.assertEqual(response.status_code, status.HTTP_200_OK) # Since the packages don't exist, the response should be empty @@ -580,7 +576,7 @@ def test_bulk_search_with_empty_purls(self): """ url = reverse("package-v2-bulk-search") data = {"purls": []} - with self.assertNumQueries(3): + with self.assertNumQueries(2): response = self.client.post(url, data, format="json") self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) self.assertIn("error", response.data) @@ -592,7 +588,7 @@ def test_all_vulnerable_packages(self): Test the 'all' endpoint that returns all vulnerable package URLs. """ url = reverse("package-v2-all") - with self.assertNumQueries(4): + with self.assertNumQueries(3): response = self.client.get(url, format="json") self.assertEqual(response.status_code, status.HTTP_200_OK) # Since package1 is vulnerable, it should be returned @@ -606,7 +602,7 @@ def test_lookup_with_valid_purl(self): """ url = reverse("package-v2-lookup") data = {"purl": "pkg:pypi/django@3.2"} - with self.assertNumQueries(13): + with self.assertNumQueries(12): response = self.client.post(url, data, format="json") self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertEqual(1, len(response.data)) @@ -635,7 +631,7 @@ def test_lookup_with_invalid_purl(self): """ url = reverse("package-v2-lookup") data = {"purl": "pkg:pypi/nonexistent@1.0.0"} - with self.assertNumQueries(4): + with self.assertNumQueries(3): response = self.client.post(url, data, format="json") self.assertEqual(response.status_code, status.HTTP_200_OK) # No packages or vulnerabilities should be returned @@ -648,7 +644,7 @@ def test_lookup_with_missing_purl(self): """ url = reverse("package-v2-lookup") data = {} - with self.assertNumQueries(3): + with self.assertNumQueries(2): response = self.client.post(url, data, format="json") self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) self.assertIn("error", response.data) @@ -662,7 +658,7 @@ def test_lookup_with_invalid_purl_format(self): """ url = reverse("package-v2-lookup") data = {"purl": "invalid_purl_format"} - with self.assertNumQueries(4): + with self.assertNumQueries(3): response = self.client.post(url, data, format="json") self.assertEqual(response.status_code, status.HTTP_200_OK) # No packages or vulnerabilities should be returned diff --git a/vulnerabilities/tests/test_throttling.py b/vulnerabilities/tests/test_throttling.py index 8be404db2..62fbde4ef 100644 --- a/vulnerabilities/tests/test_throttling.py +++ b/vulnerabilities/tests/test_throttling.py @@ -9,94 +9,141 @@ import json -from django.contrib.auth.models import Group +from django.contrib.auth.models import Permission from django.core.cache import cache +from rest_framework import status from rest_framework.test import APIClient from rest_framework.test import APITestCase +from rest_framework.throttling import AnonRateThrottle +from vulnerabilities.api import PermissionBasedUserRateThrottle from vulnerabilities.models import ApiUser -class GroupUserRateThrottleApiTests(APITestCase): +def simulate_throttle_usage( + url, + client, + mock_use_count, + throttle_cls=PermissionBasedUserRateThrottle, +): + throttle = throttle_cls() + request = client.get(url).wsgi_request + + if cache_key := throttle.get_cache_key(request, view=None): + now = throttle.timer() + cache.set(cache_key, [now] * mock_use_count) + + +class PermissionBasedRateThrottleApiTests(APITestCase): def setUp(self): # Reset the api throttling to properly test the rate limit on anon users. # DRF stores throttling state in cache, clear cache to reset throttling. # See https://www.django-rest-framework.org/api-guide/throttling/#setting-up-the-cache cache.clear() - # User in bronze group - self.bronze_user = ApiUser.objects.create_api_user(username="bronze@mail.com") - bronze, _ = Group.objects.get_or_create(name="bronze") - self.bronze_user.groups.clear() - self.bronze_user.groups.add(bronze) - self.bronze_auth = f"Token {self.bronze_user.auth_token.key}" - self.bronze_user_csrf_client = APIClient(enforce_csrf_checks=True) - self.bronze_user_csrf_client.credentials(HTTP_AUTHORIZATION=self.bronze_auth) - - # User in silver group (default group for api user) - self.silver_user = ApiUser.objects.create_api_user(username="silver@mail.com") - self.silver_auth = f"Token {self.silver_user.auth_token.key}" - self.silver_user_csrf_client = APIClient(enforce_csrf_checks=True) - self.silver_user_csrf_client.credentials(HTTP_AUTHORIZATION=self.silver_auth) - - # User in gold group - self.gold_user = ApiUser.objects.create_api_user(username="gold@mail.com") - gold, _ = Group.objects.get_or_create(name="gold") - self.gold_user.groups.add(gold) - self.gold_auth = f"Token {self.gold_user.auth_token.key}" - self.gold_user_csrf_client = APIClient(enforce_csrf_checks=True) - self.gold_user_csrf_client.credentials(HTTP_AUTHORIZATION=self.gold_auth) - - # create a staff user - self.staff_user = ApiUser.objects.create_api_user(username="staff@mail.com", is_staff=True) - self.staff_auth = f"Token {self.staff_user.auth_token.key}" - self.staff_csrf_client = APIClient(enforce_csrf_checks=True) - self.staff_csrf_client.credentials(HTTP_AUTHORIZATION=self.staff_auth) + permission_14400 = Permission.objects.get(codename="throttle_14400_hour") + permission_18000 = Permission.objects.get(codename="throttle_18000_hour") + permission_unrestricted = Permission.objects.get(codename="throttle_unrestricted") + + # basic user without any special throttling perm + self.basic_user = ApiUser.objects.create_api_user(username="a@mail.com") + self.basic_user_auth = f"Token {self.basic_user.auth_token.key}" + self.basic_user_csrf_client = APIClient(enforce_csrf_checks=True) + self.basic_user_csrf_client.credentials(HTTP_AUTHORIZATION=self.basic_user_auth) + + # 14400/hour permission + self.th_14400_user = ApiUser.objects.create_api_user(username="b@mail.com") + self.th_14400_user.user_permissions.add(permission_14400) + self.th_14400_user_auth = f"Token {self.th_14400_user.auth_token.key}" + self.th_14400_user_csrf_client = APIClient(enforce_csrf_checks=True) + self.th_14400_user_csrf_client.credentials(HTTP_AUTHORIZATION=self.th_14400_user_auth) + + # 18000/hour permission + self.th_18000_user = ApiUser.objects.create_api_user(username="c@mail.com") + self.th_18000_user.user_permissions.add(permission_18000) + self.th_18000_user_auth = f"Token {self.th_18000_user.auth_token.key}" + self.th_18000_user_csrf_client = APIClient(enforce_csrf_checks=True) + self.th_18000_user_csrf_client.credentials(HTTP_AUTHORIZATION=self.th_18000_user_auth) + + # unrestricted throttling perm + self.th_unrestricted_user = ApiUser.objects.create_api_user(username="d@mail.com") + self.th_unrestricted_user.user_permissions.add(permission_unrestricted) + self.th_unrestricted_user_auth = f"Token {self.th_unrestricted_user.auth_token.key}" + self.th_unrestricted_user_csrf_client = APIClient(enforce_csrf_checks=True) + self.th_unrestricted_user_csrf_client.credentials( + HTTP_AUTHORIZATION=self.th_unrestricted_user_auth + ) self.csrf_client_anon = APIClient(enforce_csrf_checks=True) self.csrf_client_anon_1 = APIClient(enforce_csrf_checks=True) - def test_package_endpoint_throttling(self): - for i in range(0, 15): - response = self.bronze_user_csrf_client.get("/api/packages") - self.assertEqual(response.status_code, 200) + def test_basic_user_throttling(self): + simulate_throttle_usage( + url="/api/packages", + client=self.basic_user_csrf_client, + mock_use_count=10799, + ) - response = self.bronze_user_csrf_client.get("/api/packages") - # 429 - too many requests for bronze user - self.assertEqual(response.status_code, 429) + response = self.basic_user_csrf_client.get("/api/packages") + self.assertEqual(response.status_code, status.HTTP_200_OK) - for i in range(0, 20): - response = self.silver_user_csrf_client.get("/api/packages") - self.assertEqual(response.status_code, 200) + # exhausted 10800/hr allowed requests for basic user. + response = self.basic_user_csrf_client.get("/api/packages") + self.assertEqual(response.status_code, status.HTTP_429_TOO_MANY_REQUESTS) - response = self.silver_user_csrf_client.get("/api/packages") - # 429 - too many requests for silver user - self.assertEqual(response.status_code, 429) + def test_user_with_14400_perm_throttling(self): + simulate_throttle_usage( + url="/api/packages", + client=self.th_14400_user_csrf_client, + mock_use_count=14399, + ) - for i in range(0, 30): - response = self.gold_user_csrf_client.get("/api/packages") - self.assertEqual(response.status_code, 200) + response = self.th_14400_user_csrf_client.get("/api/packages") + self.assertEqual(response.status_code, status.HTTP_200_OK) - response = self.gold_user_csrf_client.get("/api/packages", format="json") - # 200 - gold user can access API unlimited times - self.assertEqual(response.status_code, 200) + # exhausted 14400/hr allowed requests for user with 14400 perm. + response = self.th_14400_user_csrf_client.get("/api/packages") + self.assertEqual(response.status_code, status.HTTP_429_TOO_MANY_REQUESTS) - for i in range(0, 30): - response = self.staff_csrf_client.get("/api/packages") - self.assertEqual(response.status_code, 200) + def test_user_with_18000_perm_throttling(self): + simulate_throttle_usage( + url="/api/packages", + client=self.th_18000_user_csrf_client, + mock_use_count=17999, + ) - response = self.staff_csrf_client.get("/api/packages", format="json") - # 200 - staff user can access API unlimited times - self.assertEqual(response.status_code, 200) + response = self.th_18000_user_csrf_client.get("/api/packages") + self.assertEqual(response.status_code, status.HTTP_200_OK) - # A anonymous user can only access /packages endpoint 10 times a day - for _i in range(0, 10): - response = self.csrf_client_anon.get("/api/packages") - self.assertEqual(response.status_code, 200) + # exhausted 18000/hr allowed requests for user with 18000 perm. + response = self.th_18000_user_csrf_client.get("/api/packages") + self.assertEqual(response.status_code, status.HTTP_429_TOO_MANY_REQUESTS) + + def test_user_with_unrestricted_perm_throttling(self): + simulate_throttle_usage( + url="/api/packages", + client=self.th_unrestricted_user_csrf_client, + mock_use_count=20000, + ) + + # no throttling for user with unrestricted perm. + response = self.th_unrestricted_user_csrf_client.get("/api/packages") + self.assertEqual(response.status_code, status.HTTP_200_OK) + + def test_anon_throttling(self): + simulate_throttle_usage( + throttle_cls=AnonRateThrottle, + url="/api/packages", + client=self.csrf_client_anon, + mock_use_count=3599, + ) response = self.csrf_client_anon.get("/api/packages") - # 429 - too many requests for anon user - self.assertEqual(response.status_code, 429) + self.assertEqual(response.status_code, status.HTTP_200_OK) + + # exhausted 3600/hr allowed requests for anon. + response = self.csrf_client_anon.get("/api/packages") + self.assertEqual(response.status_code, status.HTTP_429_TOO_MANY_REQUESTS) self.assertEqual( response.data.get("message"), "Your request has been throttled. Please contact support@nexb.com", @@ -104,7 +151,7 @@ def test_package_endpoint_throttling(self): response = self.csrf_client_anon.get("/api/vulnerabilities") # 429 - too many requests for anon user - self.assertEqual(response.status_code, 429) + self.assertEqual(response.status_code, status.HTTP_429_TOO_MANY_REQUESTS) self.assertEqual( response.data.get("message"), "Your request has been throttled. Please contact support@nexb.com", @@ -116,7 +163,7 @@ def test_package_endpoint_throttling(self): "/api/packages/bulk_search", data=data, content_type="application/json" ) # 429 - too many requests for anon user - self.assertEqual(response.status_code, 429) + self.assertEqual(response.status_code, status.HTTP_429_TOO_MANY_REQUESTS) self.assertEqual( response.data.get("message"), "Your request has been throttled. Please contact support@nexb.com", From f3c04370e5d1428484ffd7957384f44f2762f509 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Fri, 13 Jun 2025 17:42:15 +0530 Subject: [PATCH 026/390] Enable admin login page Signed-off-by: Keshav Priyadarshi --- vulnerablecode/urls.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/vulnerablecode/urls.py b/vulnerablecode/urls.py index 245b8e917..81ba5cadb 100644 --- a/vulnerablecode/urls.py +++ b/vulnerablecode/urls.py @@ -171,10 +171,10 @@ def __init__(self, *args, **kwargs): TemplateView.as_view(template_name="tos.html"), name="api_tos", ), - # path( - # "admin/", - # admin.site.urls, - # ), + path( + "admin/", + admin.site.urls, + ), ] if DEBUG: From 94dd104efa747545c23c526b37f413f1fa9add53 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Fri, 13 Jun 2025 18:34:49 +0530 Subject: [PATCH 027/390] Add perm to demote user to anon throttle rate Signed-off-by: Keshav Priyadarshi --- .../migrations/0093_alter_apiuser_options.py | 9 +++---- vulnerabilities/models.py | 7 +++--- vulnerabilities/tests/test_throttling.py | 24 ++++++++++++++++++- vulnerabilities/throttling.py | 8 +++++++ 4 files changed, 40 insertions(+), 8 deletions(-) diff --git a/vulnerabilities/migrations/0093_alter_apiuser_options.py b/vulnerabilities/migrations/0093_alter_apiuser_options.py index 771a3779b..9709439cc 100644 --- a/vulnerabilities/migrations/0093_alter_apiuser_options.py +++ b/vulnerabilities/migrations/0093_alter_apiuser_options.py @@ -1,4 +1,4 @@ -# Generated by Django 4.2.22 on 2025-06-13 08:07 +# Generated by Django 4.2.22 on 2025-06-13 12:44 from django.db import migrations @@ -14,9 +14,10 @@ class Migration(migrations.Migration): name="apiuser", options={ "permissions": [ - ("throttle_unrestricted", "Exempt from API throttling limits"), - ("throttle_18000_hour", "Can make 18000 API requests per hour"), - ("throttle_14400_hour", "Can make 14400 API requests per hour"), + ("throttle_unrestricted", "Can make api requests without throttling limits"), + ("throttle_18000_hour", "Can make 18000 api requests per hour"), + ("throttle_14400_hour", "Can make 14400 api requests per hour"), + ("throttle_3600_hour", "Can make 3600 api requests per hour"), ] }, ), diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index 6a792e80b..777781e40 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -1497,9 +1497,10 @@ class ApiUser(UserModel): class Meta: proxy = True permissions = [ - ("throttle_unrestricted", "Exempt from API throttling limits"), - ("throttle_18000_hour", "Can make 18000 API requests per hour"), - ("throttle_14400_hour", "Can make 14400 API requests per hour"), + ("throttle_unrestricted", "Can make api requests without throttling limits"), + ("throttle_18000_hour", "Can make 18000 api requests per hour"), + ("throttle_14400_hour", "Can make 14400 api requests per hour"), + ("throttle_3600_hour", "Can make 3600 api requests per hour"), ] diff --git a/vulnerabilities/tests/test_throttling.py b/vulnerabilities/tests/test_throttling.py index 62fbde4ef..d89b69c11 100644 --- a/vulnerabilities/tests/test_throttling.py +++ b/vulnerabilities/tests/test_throttling.py @@ -41,10 +41,18 @@ def setUp(self): # See https://www.django-rest-framework.org/api-guide/throttling/#setting-up-the-cache cache.clear() + permission_3600 = Permission.objects.get(codename="throttle_3600_hour") permission_14400 = Permission.objects.get(codename="throttle_14400_hour") permission_18000 = Permission.objects.get(codename="throttle_18000_hour") permission_unrestricted = Permission.objects.get(codename="throttle_unrestricted") + # user with 3600/hour permission + self.th_3600_user = ApiUser.objects.create_api_user(username="z@mail.com") + self.th_3600_user.user_permissions.add(permission_3600) + self.th_3600_user_auth = f"Token {self.th_3600_user.auth_token.key}" + self.th_3600_user_csrf_client = APIClient(enforce_csrf_checks=True) + self.th_3600_user_csrf_client.credentials(HTTP_AUTHORIZATION=self.th_3600_user_auth) + # basic user without any special throttling perm self.basic_user = ApiUser.objects.create_api_user(username="a@mail.com") self.basic_user_auth = f"Token {self.basic_user.auth_token.key}" @@ -77,6 +85,20 @@ def setUp(self): self.csrf_client_anon = APIClient(enforce_csrf_checks=True) self.csrf_client_anon_1 = APIClient(enforce_csrf_checks=True) + def test_user_with_3600_perm_throttling(self): + simulate_throttle_usage( + url="/api/packages", + client=self.th_3600_user_csrf_client, + mock_use_count=3599, + ) + + response = self.th_3600_user_csrf_client.get("/api/packages") + self.assertEqual(response.status_code, status.HTTP_200_OK) + + # exhausted 3600/hr allowed requests. + response = self.th_3600_user_csrf_client.get("/api/packages") + self.assertEqual(response.status_code, status.HTTP_429_TOO_MANY_REQUESTS) + def test_basic_user_throttling(self): simulate_throttle_usage( url="/api/packages", @@ -87,7 +109,7 @@ def test_basic_user_throttling(self): response = self.basic_user_csrf_client.get("/api/packages") self.assertEqual(response.status_code, status.HTTP_200_OK) - # exhausted 10800/hr allowed requests for basic user. + # exhausted 10800/hr allowed requests. response = self.basic_user_csrf_client.get("/api/packages") self.assertEqual(response.status_code, status.HTTP_429_TOO_MANY_REQUESTS) diff --git a/vulnerabilities/throttling.py b/vulnerabilities/throttling.py index d6b0840eb..fd96acdef 100644 --- a/vulnerabilities/throttling.py +++ b/vulnerabilities/throttling.py @@ -13,6 +13,12 @@ class PermissionBasedUserRateThrottle(UserRateThrottle): + """ + Throttle authenticated users based on their assigned permissions. + If no throttling permission is assigned, default to rate for `user` + scope provided via `DEFAULT_THROTTLE_RATES` in settings.py. + """ + def allow_request(self, request, view): user = request.user @@ -23,6 +29,8 @@ def allow_request(self, request, view): self.rate = "18000/hour" elif user.has_perm("vulnerabilities.throttle_14400_hour"): self.rate = "14400/hour" + elif user.has_perm("vulnerabilities.throttle_3600_hour"): + self.rate = "3600/hour" self.num_requests, self.duration = self.parse_rate(self.rate) From 2f03f11503f6f5997e80af57cd2b87d18cf0cdd6 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Sat, 21 Jun 2025 00:38:57 +0530 Subject: [PATCH 028/390] Add custom group admin with user selection Signed-off-by: Keshav Priyadarshi --- vulnerabilities/admin.py | 50 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/vulnerabilities/admin.py b/vulnerabilities/admin.py index eecef0276..176e3c0c0 100644 --- a/vulnerabilities/admin.py +++ b/vulnerabilities/admin.py @@ -9,6 +9,10 @@ from django import forms from django.contrib import admin +from django.contrib.admin.widgets import FilteredSelectMultiple +from django.contrib.auth.admin import GroupAdmin as BasicGroupAdmin +from django.contrib.auth.models import Group +from django.contrib.auth.models import User from django.core.validators import validate_email from vulnerabilities.models import ApiUser @@ -97,3 +101,49 @@ def get_form(self, request, obj=None, **kwargs): defaults["form"] = self.add_form defaults.update(kwargs) return super().get_form(request, obj, **defaults) + + +class GroupWithUsersForm(forms.ModelForm): + users = forms.ModelMultipleChoiceField( + queryset=User.objects.all(), + required=False, + widget=FilteredSelectMultiple("Users", is_stacked=False), + label="Users", + ) + + class Meta: + model = Group + fields = "__all__" + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.fields["users"].label_from_instance = lambda user: ( + f"{user.username} | {user.email}" if user.email else user.username + ) + if self.instance.pk: + self.fields["users"].initial = self.instance.user_set.all() + + def save(self, commit=True): + group = super().save(commit=commit) + self.save_m2m() + group.user_set.set(self.cleaned_data["users"]) + return group + + +admin.site.unregister(Group) + + +@admin.register(Group) +class GroupAdmin(admin.ModelAdmin): + form = GroupWithUsersForm + search_fields = ("name",) + ordering = ("name",) + filter_horizontal = ("permissions",) + + def formfield_for_manytomany(self, db_field, request=None, **kwargs): + if db_field.name == "permissions": + qs = kwargs.get("queryset", db_field.remote_field.model.objects) + # Avoid a major performance hit resolving permission names which + # triggers a content_type load: + kwargs["queryset"] = qs.select_related("content_type") + return super().formfield_for_manytomany(db_field, request=request, **kwargs) From 103afbbb2e0f6566fe9c061a4380658ca97000dc Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Thu, 26 Jun 2025 14:20:32 +0530 Subject: [PATCH 029/390] Add high, medium, and low tier based throttling permissions Signed-off-by: Keshav Priyadarshi --- vulnerabilities/api.py | 5 +- vulnerabilities/api_extension.py | 9 +- .../migrations/0093_alter_apiuser_options.py | 22 +++-- vulnerabilities/models.py | 20 ++++- vulnerabilities/tests/test_throttling.py | 88 +++++++++---------- vulnerabilities/throttling.py | 40 ++++++--- vulnerablecode/settings.py | 13 ++- 7 files changed, 118 insertions(+), 79 deletions(-) diff --git a/vulnerabilities/api.py b/vulnerabilities/api.py index 50403583d..d994b297d 100644 --- a/vulnerabilities/api.py +++ b/vulnerabilities/api.py @@ -22,7 +22,6 @@ from rest_framework import viewsets from rest_framework.decorators import action from rest_framework.response import Response -from rest_framework.throttling import AnonRateThrottle from vulnerabilities.models import Alias from vulnerabilities.models import Exploit @@ -471,7 +470,7 @@ class PackageViewSet(viewsets.ReadOnlyModelViewSet): serializer_class = PackageSerializer filter_backends = (filters.DjangoFilterBackend,) filterset_class = PackageFilterSet - throttle_classes = [AnonRateThrottle, PermissionBasedUserRateThrottle] + throttle_classes = [PermissionBasedUserRateThrottle] def get_queryset(self): return super().get_queryset().with_is_vulnerable() @@ -688,7 +687,7 @@ def get_queryset(self): serializer_class = VulnerabilitySerializer filter_backends = (filters.DjangoFilterBackend,) filterset_class = VulnerabilityFilterSet - throttle_classes = [AnonRateThrottle, PermissionBasedUserRateThrottle] + throttle_classes = [PermissionBasedUserRateThrottle] class CPEFilterSet(filters.FilterSet): diff --git a/vulnerabilities/api_extension.py b/vulnerabilities/api_extension.py index 89ee644bf..01d98ca99 100644 --- a/vulnerabilities/api_extension.py +++ b/vulnerabilities/api_extension.py @@ -23,7 +23,6 @@ from rest_framework.serializers import ModelSerializer from rest_framework.serializers import Serializer from rest_framework.serializers import ValidationError -from rest_framework.throttling import AnonRateThrottle from vulnerabilities.api import BaseResourceSerializer from vulnerabilities.models import Exploit @@ -259,7 +258,7 @@ class V2PackageViewSet(viewsets.ReadOnlyModelViewSet): lookup_field = "purl" filter_backends = (filters.DjangoFilterBackend,) filterset_class = V2PackageFilterSet - throttle_classes = [PermissionBasedUserRateThrottle, AnonRateThrottle] + throttle_classes = [PermissionBasedUserRateThrottle] def get_queryset(self): return super().get_queryset().with_is_vulnerable().prefetch_related("vulnerabilities") @@ -345,7 +344,7 @@ class VulnerabilityViewSet(viewsets.ReadOnlyModelViewSet): lookup_field = "vulnerability_id" filter_backends = (filters.DjangoFilterBackend,) filterset_class = V2VulnerabilityFilterSet - throttle_classes = [PermissionBasedUserRateThrottle, AnonRateThrottle] + throttle_classes = [PermissionBasedUserRateThrottle] def get_queryset(self): """ @@ -381,7 +380,7 @@ class CPEViewSet(viewsets.ReadOnlyModelViewSet): ).distinct() serializer_class = V2VulnerabilitySerializer filter_backends = (filters.DjangoFilterBackend,) - throttle_classes = [PermissionBasedUserRateThrottle, AnonRateThrottle] + throttle_classes = [PermissionBasedUserRateThrottle] filterset_class = CPEFilterSet @action(detail=False, methods=["post"]) @@ -420,4 +419,4 @@ class AliasViewSet(viewsets.ReadOnlyModelViewSet): serializer_class = V2VulnerabilitySerializer filter_backends = (filters.DjangoFilterBackend,) filterset_class = AliasFilterSet - throttle_classes = [PermissionBasedUserRateThrottle, AnonRateThrottle] + throttle_classes = [PermissionBasedUserRateThrottle] diff --git a/vulnerabilities/migrations/0093_alter_apiuser_options.py b/vulnerabilities/migrations/0093_alter_apiuser_options.py index 9709439cc..61c5b183d 100644 --- a/vulnerabilities/migrations/0093_alter_apiuser_options.py +++ b/vulnerabilities/migrations/0093_alter_apiuser_options.py @@ -1,4 +1,4 @@ -# Generated by Django 4.2.22 on 2025-06-13 12:44 +# Generated by Django 4.2.22 on 2025-06-25 18:56 from django.db import migrations @@ -14,10 +14,22 @@ class Migration(migrations.Migration): name="apiuser", options={ "permissions": [ - ("throttle_unrestricted", "Can make api requests without throttling limits"), - ("throttle_18000_hour", "Can make 18000 api requests per hour"), - ("throttle_14400_hour", "Can make 14400 api requests per hour"), - ("throttle_3600_hour", "Can make 3600 api requests per hour"), + ( + "throttle_3_unrestricted", + "Can make unlimited API requests without any throttling limits", + ), + ( + "throttle_2_high", + "Can make high number of API requests with minimal throttling", + ), + ( + "throttle_1_medium", + "Can make medium number of API requests with standard throttling", + ), + ( + "throttle_0_low", + "Can make low number of API requests with strict throttling", + ), ] }, ), diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index 777781e40..e1c656e2b 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -1497,10 +1497,22 @@ class ApiUser(UserModel): class Meta: proxy = True permissions = [ - ("throttle_unrestricted", "Can make api requests without throttling limits"), - ("throttle_18000_hour", "Can make 18000 api requests per hour"), - ("throttle_14400_hour", "Can make 14400 api requests per hour"), - ("throttle_3600_hour", "Can make 3600 api requests per hour"), + ( + "throttle_3_unrestricted", + "Can make unlimited API requests without any throttling limits", + ), + ( + "throttle_2_high", + "Can make high number of API requests with minimal throttling", + ), + ( + "throttle_1_medium", + "Can make medium number of API requests with standard throttling", + ), + ( + "throttle_0_low", + "Can make low number of API requests with strict throttling", + ), ] diff --git a/vulnerabilities/tests/test_throttling.py b/vulnerabilities/tests/test_throttling.py index d89b69c11..25af231f8 100644 --- a/vulnerabilities/tests/test_throttling.py +++ b/vulnerabilities/tests/test_throttling.py @@ -14,22 +14,17 @@ from rest_framework import status from rest_framework.test import APIClient from rest_framework.test import APITestCase -from rest_framework.throttling import AnonRateThrottle from vulnerabilities.api import PermissionBasedUserRateThrottle from vulnerabilities.models import ApiUser -def simulate_throttle_usage( - url, - client, - mock_use_count, - throttle_cls=PermissionBasedUserRateThrottle, -): - throttle = throttle_cls() +def simulate_throttle_usage(url, client, mock_use_count): + throttle = PermissionBasedUserRateThrottle() request = client.get(url).wsgi_request if cache_key := throttle.get_cache_key(request, view=None): + print(cache_key) now = throttle.timer() cache.set(cache_key, [now] * mock_use_count) @@ -41,17 +36,17 @@ def setUp(self): # See https://www.django-rest-framework.org/api-guide/throttling/#setting-up-the-cache cache.clear() - permission_3600 = Permission.objects.get(codename="throttle_3600_hour") - permission_14400 = Permission.objects.get(codename="throttle_14400_hour") - permission_18000 = Permission.objects.get(codename="throttle_18000_hour") - permission_unrestricted = Permission.objects.get(codename="throttle_unrestricted") + permission_low = Permission.objects.get(codename="throttle_0_low") + permission_medium = Permission.objects.get(codename="throttle_1_medium") + permission_high = Permission.objects.get(codename="throttle_2_high") + permission_unrestricted = Permission.objects.get(codename="throttle_3_unrestricted") - # user with 3600/hour permission - self.th_3600_user = ApiUser.objects.create_api_user(username="z@mail.com") - self.th_3600_user.user_permissions.add(permission_3600) - self.th_3600_user_auth = f"Token {self.th_3600_user.auth_token.key}" - self.th_3600_user_csrf_client = APIClient(enforce_csrf_checks=True) - self.th_3600_user_csrf_client.credentials(HTTP_AUTHORIZATION=self.th_3600_user_auth) + # user with low permission + self.th_low_user = ApiUser.objects.create_api_user(username="z@mail.com") + self.th_low_user.user_permissions.add(permission_low) + self.th_low_user_auth = f"Token {self.th_low_user.auth_token.key}" + self.th_low_user_csrf_client = APIClient(enforce_csrf_checks=True) + self.th_low_user_csrf_client.credentials(HTTP_AUTHORIZATION=self.th_low_user_auth) # basic user without any special throttling perm self.basic_user = ApiUser.objects.create_api_user(username="a@mail.com") @@ -59,19 +54,19 @@ def setUp(self): self.basic_user_csrf_client = APIClient(enforce_csrf_checks=True) self.basic_user_csrf_client.credentials(HTTP_AUTHORIZATION=self.basic_user_auth) - # 14400/hour permission - self.th_14400_user = ApiUser.objects.create_api_user(username="b@mail.com") - self.th_14400_user.user_permissions.add(permission_14400) - self.th_14400_user_auth = f"Token {self.th_14400_user.auth_token.key}" - self.th_14400_user_csrf_client = APIClient(enforce_csrf_checks=True) - self.th_14400_user_csrf_client.credentials(HTTP_AUTHORIZATION=self.th_14400_user_auth) + # medium permission + self.th_medium_user = ApiUser.objects.create_api_user(username="b@mail.com") + self.th_medium_user.user_permissions.add(permission_medium) + self.th_medium_user_auth = f"Token {self.th_medium_user.auth_token.key}" + self.th_medium_user_csrf_client = APIClient(enforce_csrf_checks=True) + self.th_medium_user_csrf_client.credentials(HTTP_AUTHORIZATION=self.th_medium_user_auth) - # 18000/hour permission - self.th_18000_user = ApiUser.objects.create_api_user(username="c@mail.com") - self.th_18000_user.user_permissions.add(permission_18000) - self.th_18000_user_auth = f"Token {self.th_18000_user.auth_token.key}" - self.th_18000_user_csrf_client = APIClient(enforce_csrf_checks=True) - self.th_18000_user_csrf_client.credentials(HTTP_AUTHORIZATION=self.th_18000_user_auth) + # high permission + self.th_high_user = ApiUser.objects.create_api_user(username="c@mail.com") + self.th_high_user.user_permissions.add(permission_high) + self.th_high_user_auth = f"Token {self.th_high_user.auth_token.key}" + self.th_high_user_csrf_client = APIClient(enforce_csrf_checks=True) + self.th_high_user_csrf_client.credentials(HTTP_AUTHORIZATION=self.th_high_user_auth) # unrestricted throttling perm self.th_unrestricted_user = ApiUser.objects.create_api_user(username="d@mail.com") @@ -85,60 +80,60 @@ def setUp(self): self.csrf_client_anon = APIClient(enforce_csrf_checks=True) self.csrf_client_anon_1 = APIClient(enforce_csrf_checks=True) - def test_user_with_3600_perm_throttling(self): + def test_user_with_low_perm_throttling(self): simulate_throttle_usage( url="/api/packages", - client=self.th_3600_user_csrf_client, - mock_use_count=3599, + client=self.th_low_user_csrf_client, + mock_use_count=10799, ) - response = self.th_3600_user_csrf_client.get("/api/packages") + response = self.th_low_user_csrf_client.get("/api/packages") self.assertEqual(response.status_code, status.HTTP_200_OK) - # exhausted 3600/hr allowed requests. - response = self.th_3600_user_csrf_client.get("/api/packages") + # exhausted 10800/hr allowed requests. + response = self.th_low_user_csrf_client.get("/api/packages") self.assertEqual(response.status_code, status.HTTP_429_TOO_MANY_REQUESTS) def test_basic_user_throttling(self): simulate_throttle_usage( url="/api/packages", client=self.basic_user_csrf_client, - mock_use_count=10799, + mock_use_count=14399, ) response = self.basic_user_csrf_client.get("/api/packages") self.assertEqual(response.status_code, status.HTTP_200_OK) - # exhausted 10800/hr allowed requests. + # exhausted 14400/hr allowed requests. response = self.basic_user_csrf_client.get("/api/packages") self.assertEqual(response.status_code, status.HTTP_429_TOO_MANY_REQUESTS) - def test_user_with_14400_perm_throttling(self): + def test_user_with_medium_perm_throttling(self): simulate_throttle_usage( url="/api/packages", - client=self.th_14400_user_csrf_client, + client=self.th_medium_user_csrf_client, mock_use_count=14399, ) - response = self.th_14400_user_csrf_client.get("/api/packages") + response = self.th_medium_user_csrf_client.get("/api/packages") self.assertEqual(response.status_code, status.HTTP_200_OK) # exhausted 14400/hr allowed requests for user with 14400 perm. - response = self.th_14400_user_csrf_client.get("/api/packages") + response = self.th_medium_user_csrf_client.get("/api/packages") self.assertEqual(response.status_code, status.HTTP_429_TOO_MANY_REQUESTS) - def test_user_with_18000_perm_throttling(self): + def test_user_with_high_perm_throttling(self): simulate_throttle_usage( url="/api/packages", - client=self.th_18000_user_csrf_client, + client=self.th_high_user_csrf_client, mock_use_count=17999, ) - response = self.th_18000_user_csrf_client.get("/api/packages") + response = self.th_high_user_csrf_client.get("/api/packages") self.assertEqual(response.status_code, status.HTTP_200_OK) # exhausted 18000/hr allowed requests for user with 18000 perm. - response = self.th_18000_user_csrf_client.get("/api/packages") + response = self.th_high_user_csrf_client.get("/api/packages") self.assertEqual(response.status_code, status.HTTP_429_TOO_MANY_REQUESTS) def test_user_with_unrestricted_perm_throttling(self): @@ -154,7 +149,6 @@ def test_user_with_unrestricted_perm_throttling(self): def test_anon_throttling(self): simulate_throttle_usage( - throttle_cls=AnonRateThrottle, url="/api/packages", client=self.csrf_client_anon, mock_use_count=3599, diff --git a/vulnerabilities/throttling.py b/vulnerabilities/throttling.py index fd96acdef..e14c1a1c0 100644 --- a/vulnerabilities/throttling.py +++ b/vulnerabilities/throttling.py @@ -7,6 +7,7 @@ # See https://aboutcode.org for more information about nexB OSS projects. # +from django.core.exceptions import ImproperlyConfigured from rest_framework.exceptions import Throttled from rest_framework.throttling import UserRateThrottle from rest_framework.views import exception_handler @@ -14,28 +15,41 @@ class PermissionBasedUserRateThrottle(UserRateThrottle): """ - Throttle authenticated users based on their assigned permissions. - If no throttling permission is assigned, default to rate for `user` - scope provided via `DEFAULT_THROTTLE_RATES` in settings.py. + Throttles authenticated users based on their assigned permissions. + If no throttling permission is assigned, defaults to `medium` throttling + for authenticated users and `anon` for unauthenticated users. """ + def __init__(self): + pass + def allow_request(self, request, view): user = request.user + throttling_tier = "medium" - if user and user.is_authenticated: - if user.has_perm("vulnerabilities.throttle_unrestricted"): - return True - elif user.has_perm("vulnerabilities.throttle_18000_hour"): - self.rate = "18000/hour" - elif user.has_perm("vulnerabilities.throttle_14400_hour"): - self.rate = "14400/hour" - elif user.has_perm("vulnerabilities.throttle_3600_hour"): - self.rate = "3600/hour" + if not user or not user.is_authenticated: + throttling_tier = "anon" + elif user.has_perm("vulnerabilities.throttle_3_unrestricted"): + return True + elif user.has_perm("vulnerabilities.throttle_2_high"): + throttling_tier = "high" + elif user.has_perm("vulnerabilities.throttle_1_medium"): + throttling_tier = "medium" + elif user.has_perm("vulnerabilities.throttle_0_low"): + throttling_tier = "low" - self.num_requests, self.duration = self.parse_rate(self.rate) + self.rate = self.get_throttle_rate(throttling_tier) + self.num_requests, self.duration = self.parse_rate(self.rate) return super().allow_request(request, view) + def get_throttle_rate(self, tier): + try: + return self.THROTTLE_RATES[tier] + except KeyError: + msg = f"No throttle rate set for {tier}." + raise ImproperlyConfigured(msg) + def throttled_exception_handler(exception, context): """ diff --git a/vulnerablecode/settings.py b/vulnerablecode/settings.py index 63810397c..b1a51c0a7 100644 --- a/vulnerablecode/settings.py +++ b/vulnerablecode/settings.py @@ -190,7 +190,17 @@ LOGIN_REDIRECT_URL = "/" LOGOUT_REDIRECT_URL = "/" -REST_FRAMEWORK_DEFAULT_THROTTLE_RATES = {"anon": "3600/hour", "user": "10800/hour"} +THROTTLE_RATES_ANON = env.str("THROTTLE_RATES_ANON", default="3600/hour") +THROTTLE_RATES_USER_HIGH = env.str("THROTTLE_RATES_USER_HIGH", default="18000/hour") +THROTTLE_RATES_USER_MEDIUM = env.str("THROTTLE_RATES_USER_MEDIUM", default="14400/hour") +THROTTLE_RATES_USER_LOW = env.str("THROTTLE_RATES_USER_LOW", default="10800/hour") + +REST_FRAMEWORK_DEFAULT_THROTTLE_RATES = { + "anon": THROTTLE_RATES_ANON, + "low": THROTTLE_RATES_USER_LOW, + "medium": THROTTLE_RATES_USER_MEDIUM, + "high": THROTTLE_RATES_USER_HIGH, +} if IS_TESTS: @@ -235,7 +245,6 @@ ), "DEFAULT_THROTTLE_CLASSES": [ "vulnerabilities.throttling.PermissionBasedUserRateThrottle", - "rest_framework.throttling.AnonRateThrottle", ], "DEFAULT_THROTTLE_RATES": REST_FRAMEWORK_DEFAULT_THROTTLE_RATES, "EXCEPTION_HANDLER": "vulnerabilities.throttling.throttled_exception_handler", From 722935f3b39bcba4abee0b848afe21f94c8b1abf Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Thu, 26 Jun 2025 15:01:54 +0530 Subject: [PATCH 030/390] Test throttling behavior for user in group Signed-off-by: Keshav Priyadarshi --- vulnerabilities/tests/test_throttling.py | 22 ++++++++++++++++++++++ vulnerablecode/settings.py | 16 ++++++++-------- 2 files changed, 30 insertions(+), 8 deletions(-) diff --git a/vulnerabilities/tests/test_throttling.py b/vulnerabilities/tests/test_throttling.py index 25af231f8..4ff83c70d 100644 --- a/vulnerabilities/tests/test_throttling.py +++ b/vulnerabilities/tests/test_throttling.py @@ -9,6 +9,7 @@ import json +from django.contrib.auth.models import Group from django.contrib.auth.models import Permission from django.core.cache import cache from rest_framework import status @@ -77,6 +78,16 @@ def setUp(self): HTTP_AUTHORIZATION=self.th_unrestricted_user_auth ) + # unrestricted throttling for group user + group, _ = Group.objects.get_or_create(name="Test Unrestricted") + group.permissions.add(permission_unrestricted) + + self.th_group_user = ApiUser.objects.create_api_user(username="g@mail.com") + self.th_group_user.groups.add(group) + self.th_group_user_auth = f"Token {self.th_group_user.auth_token.key}" + self.th_group_user_csrf_client = APIClient(enforce_csrf_checks=True) + self.th_group_user_csrf_client.credentials(HTTP_AUTHORIZATION=self.th_group_user_auth) + self.csrf_client_anon = APIClient(enforce_csrf_checks=True) self.csrf_client_anon_1 = APIClient(enforce_csrf_checks=True) @@ -147,6 +158,17 @@ def test_user_with_unrestricted_perm_throttling(self): response = self.th_unrestricted_user_csrf_client.get("/api/packages") self.assertEqual(response.status_code, status.HTTP_200_OK) + def test_user_in_group_with_unrestricted_perm_throttling(self): + simulate_throttle_usage( + url="/api/packages", + client=self.th_group_user_csrf_client, + mock_use_count=20000, + ) + + # no throttling for user in group with unrestricted perm. + response = self.th_group_user_csrf_client.get("/api/packages") + self.assertEqual(response.status_code, status.HTTP_200_OK) + def test_anon_throttling(self): simulate_throttle_usage( url="/api/packages", diff --git a/vulnerablecode/settings.py b/vulnerablecode/settings.py index b1a51c0a7..0d4207b23 100644 --- a/vulnerablecode/settings.py +++ b/vulnerablecode/settings.py @@ -190,16 +190,16 @@ LOGIN_REDIRECT_URL = "/" LOGOUT_REDIRECT_URL = "/" -THROTTLE_RATES_ANON = env.str("THROTTLE_RATES_ANON", default="3600/hour") -THROTTLE_RATES_USER_HIGH = env.str("THROTTLE_RATES_USER_HIGH", default="18000/hour") -THROTTLE_RATES_USER_MEDIUM = env.str("THROTTLE_RATES_USER_MEDIUM", default="14400/hour") -THROTTLE_RATES_USER_LOW = env.str("THROTTLE_RATES_USER_LOW", default="10800/hour") +THROTTLE_RATE_ANON = env.str("THROTTLE_RATE_ANON", default="3600/hour") +THROTTLE_RATE_USER_HIGH = env.str("THROTTLE_RATE_USER_HIGH", default="18000/hour") +THROTTLE_RATE_USER_MEDIUM = env.str("THROTTLE_RATE_USER_MEDIUM", default="14400/hour") +THROTTLE_RATE_USER_LOW = env.str("THROTTLE_RATE_USER_LOW", default="10800/hour") REST_FRAMEWORK_DEFAULT_THROTTLE_RATES = { - "anon": THROTTLE_RATES_ANON, - "low": THROTTLE_RATES_USER_LOW, - "medium": THROTTLE_RATES_USER_MEDIUM, - "high": THROTTLE_RATES_USER_HIGH, + "anon": THROTTLE_RATE_ANON, + "low": THROTTLE_RATE_USER_LOW, + "medium": THROTTLE_RATE_USER_MEDIUM, + "high": THROTTLE_RATE_USER_HIGH, } From be5edc35298dda9a2fd9eb5b540708909099afae Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Tue, 1 Jul 2025 17:45:45 +0530 Subject: [PATCH 031/390] Resolve migration conflicts Signed-off-by: Keshav Priyadarshi --- ...alter_apiuser_options.py => 0095_alter_apiuser_options.py} | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) rename vulnerabilities/migrations/{0093_alter_apiuser_options.py => 0095_alter_apiuser_options.py} (87%) diff --git a/vulnerabilities/migrations/0093_alter_apiuser_options.py b/vulnerabilities/migrations/0095_alter_apiuser_options.py similarity index 87% rename from vulnerabilities/migrations/0093_alter_apiuser_options.py rename to vulnerabilities/migrations/0095_alter_apiuser_options.py index 61c5b183d..2f30298a4 100644 --- a/vulnerabilities/migrations/0093_alter_apiuser_options.py +++ b/vulnerabilities/migrations/0095_alter_apiuser_options.py @@ -1,4 +1,4 @@ -# Generated by Django 4.2.22 on 2025-06-25 18:56 +# Generated by Django 4.2.22 on 2025-07-01 11:59 from django.db import migrations @@ -6,7 +6,7 @@ class Migration(migrations.Migration): dependencies = [ - ("vulnerabilities", "0092_pipelineschedule_pipelinerun"), + ("vulnerabilities", "0094_advisoryalias_advisoryreference_advisoryseverity_and_more"), ] operations = [ From e43b6933ac8a708c0cd1637749decd8db6d7ad19 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Wed, 18 Jun 2025 11:18:53 +0530 Subject: [PATCH 032/390] Prevent deletion of scheduled jobs when result_ttl < interval Signed-off-by: Keshav Priyadarshi --- vulnerabilities/schedules.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/vulnerabilities/schedules.py b/vulnerabilities/schedules.py index 1c34bd759..687dafc7a 100644 --- a/vulnerabilities/schedules.py +++ b/vulnerabilities/schedules.py @@ -14,7 +14,6 @@ from redis.exceptions import ConnectionError from vulnerabilities.tasks import enqueue_pipeline -from vulnerablecode.settings import VULNERABLECODE_PIPELINE_TIMEOUT log = logging.getLogger(__name__) scheduler = django_rq.get_scheduler() @@ -36,7 +35,6 @@ def schedule_execution(pipeline_schedule, execute_now=False): func=enqueue_pipeline, args=[pipeline_schedule.pipeline_id], interval=interval_in_seconds, - result_ttl=f"{VULNERABLECODE_PIPELINE_TIMEOUT}h", repeat=None, ) return job._id From 6bd10775837cc806ef10e893dfe043e01c998b6e Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Wed, 18 Jun 2025 15:16:25 +0530 Subject: [PATCH 033/390] Rename schedule ui to Pipeline Dashboard Signed-off-by: Keshav Priyadarshi --- vulnerabilities/templates/navbar.html | 4 ++-- .../{pipeline_schedule_list.html => pipeline_dashboard.html} | 4 ++-- vulnerabilities/templates/pipeline_run_list.html | 2 +- vulnerabilities/views.py | 2 +- vulnerablecode/urls.py | 4 ++-- 5 files changed, 8 insertions(+), 8 deletions(-) rename vulnerabilities/templates/{pipeline_schedule_list.html => pipeline_dashboard.html} (98%) diff --git a/vulnerabilities/templates/navbar.html b/vulnerabilities/templates/navbar.html index 51b796ae4..b7a7fba0a 100644 --- a/vulnerabilities/templates/navbar.html +++ b/vulnerabilities/templates/navbar.html @@ -20,8 +20,8 @@
{% endif %} -
-
- {{ form.captcha }} -
+
+ {{ form.captcha }}
diff --git a/vulnerabilities/templates/api_user_creation_form.html b/vulnerabilities/templates/api_user_creation_form.html index 4c596f094..5a913f5ae 100644 --- a/vulnerabilities/templates/api_user_creation_form.html +++ b/vulnerabilities/templates/api_user_creation_form.html @@ -26,11 +26,13 @@
{% endif %}
-

- VulnerableCode API key request -

-
+
+

VulnerableCode API Key Request

+
+
+ +

You need an API key to access the VulnerableCode JSON REST API. Please check the live OpenAPI documentation @@ -41,17 +43,34 @@


- - {% csrf_token %} - {% for field in form %} -
- -
- {{ field }} -
+
+
+ + {% csrf_token %} +
+
+
- {% endfor %} - - +
+
+
+ +
+
+
+
+ +
+
+
+ {{ form.captcha }} +
+ + +
+
{% endblock %} diff --git a/vulnerablecode/settings.py b/vulnerablecode/settings.py index 735e01a82..3111c9523 100644 --- a/vulnerablecode/settings.py +++ b/vulnerablecode/settings.py @@ -83,21 +83,10 @@ "drf_spectacular", # required for Django collectstatic discovery "drf_spectacular_sidecar", - "django_recaptcha", "django_rq", "django_altcha", ) -if env.str("RECAPTCHA_PUBLIC_KEY", None): - RECAPTCHA_PUBLIC_KEY = env.str("RECAPTCHA_PUBLIC_KEY") - -if env.str("RECAPTCHA_PRIVATE_KEY", None): - RECAPTCHA_PRIVATE_KEY = env.str("RECAPTCHA_PRIVATE_KEY") - -SILENCED_SYSTEM_CHECKS = ["django_recaptcha.recaptcha_test_key_error"] -SILENCED_SYSTEM_CHECKS = ["django_recaptcha.recaptcha_test_key_error"] -RECAPTCHA_DOMAIN = env.str("RECAPTCHA_DOMAIN", "www.recaptcha.net") - MIDDLEWARE = ( "django.middleware.security.SecurityMiddleware", From e1a028fc690a3ce00244db418659b459a97e2c03 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Tue, 24 Jun 2025 21:52:41 +0530 Subject: [PATCH 038/390] Add top navigation button to pipeline dashboard Signed-off-by: Keshav Priyadarshi --- vulnerabilities/forms.py | 12 ++++++++++-- .../templates/pipeline_run_details.html | 14 ++++++++------ vulnerabilities/templates/pipeline_run_list.html | 5 ++++- 3 files changed, 22 insertions(+), 9 deletions(-) diff --git a/vulnerabilities/forms.py b/vulnerabilities/forms.py index d980a5d9d..7ee348354 100644 --- a/vulnerabilities/forms.py +++ b/vulnerabilities/forms.py @@ -46,7 +46,11 @@ class AdvisorySearchForm(forms.Form): class ApiUserCreationForm(forms.ModelForm): """Support a simplified creation for API-only users directly from the UI.""" - captcha = AltchaField(floating=True) + captcha = AltchaField( + floating=True, + hidefooter=True, + hidelogo=True, + ) class Meta: model = ApiUser @@ -105,4 +109,8 @@ class PipelineSchedulePackageForm(forms.Form): class AdminLoginForm(AdminAuthenticationForm): - captcha = AltchaField(floating=True) + captcha = AltchaField( + floating=True, + hidefooter=True, + hidelogo=True, + ) diff --git a/vulnerabilities/templates/pipeline_run_details.html b/vulnerabilities/templates/pipeline_run_details.html index 6ebf92807..8dcaba959 100644 --- a/vulnerabilities/templates/pipeline_run_details.html +++ b/vulnerabilities/templates/pipeline_run_details.html @@ -34,8 +34,10 @@ {% endblock %} {% block content %} -
+ + Back to All Runs +

{{ pipeline_name }} Run Log


@@ -181,12 +183,12 @@

Log Output

{% endif %} - - - Back to All Runs - + {% if run.run_output or run.log %} + + Back to All Runs + + {% endif %} -
{% endblock %} diff --git a/vulnerabilities/templates/pipeline_run_list.html b/vulnerabilities/templates/pipeline_run_list.html index 522e78cd6..f11eca0b8 100644 --- a/vulnerabilities/templates/pipeline_run_list.html +++ b/vulnerabilities/templates/pipeline_run_list.html @@ -34,6 +34,7 @@
+ Back to Dashboard

{{ pipeline_name }} Runs


@@ -109,7 +110,9 @@

{{ pipeline_name }} Runs

{% endfor %} - Back to Dashboard + {% if run_list|length > 10 %} + Back to Dashboard + {% endif %}
{% if is_paginated %}
+ + +{% endif %} +{% endblock %} diff --git a/vulnerabilities/views.py b/vulnerabilities/views.py index 856c10ce6..b9d172ca1 100644 --- a/vulnerabilities/views.py +++ b/vulnerabilities/views.py @@ -37,6 +37,7 @@ from vulnerabilities.forms import PackageSearchForm from vulnerabilities.forms import PipelineSchedulePackageForm from vulnerabilities.forms import VulnerabilitySearchForm +from vulnerabilities.models import AdvisorySetMember from vulnerabilities.models import ImpactedPackage from vulnerabilities.models import PipelineRun from vulnerabilities.models import PipelineSchedule @@ -292,6 +293,110 @@ def get_object(self, queryset=None): return package +class PackageV3Details(DetailView): + model = models.PackageV2 + template_name = "package_details_v3.html" + slug_url_kwarg = "purl" + slug_field = "purl" + + def get_context_data(self, **kwargs): + context = super().get_context_data(**kwargs) + package = self.object + + next_non_vulnerable, latest_non_vulnerable = package.get_non_vulnerable_versions() + + context["package"] = package + context["next_non_vulnerable"] = next_non_vulnerable + context["latest_non_vulnerable"] = latest_non_vulnerable + context["package_search_form"] = PackageSearchForm(self.request.GET) + + affected_by_advisories_qs = ( + models.AdvisorySet.objects.filter(package=package, relation_type="affecting") + .select_related("primary_advisory") + .prefetch_related( + Prefetch( + "members", + queryset=AdvisorySetMember.objects.filter(is_primary=False).select_related( + "advisory" + ), + to_attr="secondary_members", + ) + ) + ) + + fixing_advisories_qs = ( + models.AdvisorySet.objects.filter(package=package, relation_type="fixing") + .select_related("primary_advisory") + .prefetch_related( + Prefetch( + "members", + queryset=AdvisorySetMember.objects.filter(is_primary=False).select_related( + "advisory" + ), + to_attr="secondary_members", + ) + ) + ) + + print(affected_by_advisories_qs) + print(fixing_advisories_qs) + + affected_by_advisories_url = None + fixing_advisories_url = None + + affected_by_advisories_qs_ids = affected_by_advisories_qs.only("id") + fixing_advisories_qs_ids = fixing_advisories_qs.only("id") + + # affected_by_advisories = list(affected_by_advisories_qs_ids[:101]) + # if len(affected_by_advisories) > 100: + # affected_by_advisories_url = reverse_lazy( + # "affected_by_advisories_v2", kwargs={"purl": package.package_url} + # ) + # context["affected_by_advisories_v2_url"] = affected_by_advisories_url + # context["affected_by_advisories_v2"] = [] + # context["fixed_package_details"] = {} + + # else: + fixed_pkg_details = get_fixed_package_details(package) + + context["affected_by_advisories_v2"] = affected_by_advisories_qs + context["fixed_package_details"] = fixed_pkg_details + context["affected_by_advisories_v2_url"] = None + + # fixing_advisories = list(fixing_advisories_qs_ids[:101]) + # if len(fixing_advisories) > 100: + # fixing_advisories_url = reverse_lazy( + # "fixing_advisories_v2", kwargs={"purl": package.package_url} + # ) + # context["fixing_advisories_v2_url"] = fixing_advisories_url + # context["fixing_advisories_v2"] = [] + + # else: + context["fixing_advisories_v2"] = fixing_advisories_qs + context["fixing_advisories_v2_url"] = None + + return context + + def get_object(self, queryset=None): + if queryset is None: + queryset = self.get_queryset() + + purl = self.kwargs.get(self.slug_url_kwarg) + if purl: + queryset = queryset.for_purl(purl) + else: + cls = self.__class__.__name__ + raise AttributeError( + f"Package details view {cls} must be called with a purl, " f"but got: {purl!r}" + ) + + try: + package = queryset.get() + except queryset.model.DoesNotExist: + raise Http404(f"No Package found for purl: {purl}") + return package + + def get_fixed_package_details(package): rows = package.affected_in_impacts.values_list( "advisory__avid", diff --git a/vulnerablecode/urls.py b/vulnerablecode/urls.py index eb1bc006b..efbfc9c6f 100644 --- a/vulnerablecode/urls.py +++ b/vulnerablecode/urls.py @@ -29,7 +29,7 @@ from vulnerabilities.api_v3 import AffectedByAdvisoriesViewSet from vulnerabilities.api_v3 import FixingAdvisoriesViewSet from vulnerabilities.api_v3 import PackageV3ViewSet -from vulnerabilities.views import AdminLoginView +from vulnerabilities.views import AdminLoginView, PackageV3Details from vulnerabilities.views import AdvisoryDetails from vulnerabilities.views import AdvisoryPackagesDetails from vulnerabilities.views import AffectedByAdvisoriesListView @@ -141,7 +141,7 @@ def __init__(self, *args, **kwargs): ), re_path( r"^packages/v2/(?Ppkg:.+)$", - PackageV2Details.as_view(), + PackageV3Details.as_view(), name="package_details_v2", ), re_path( From 59fd85ff1bc14535e206d941fb5eb804e360ebbf Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Fri, 27 Mar 2026 01:47:24 +0530 Subject: [PATCH 327/390] Fix content hash logic Signed-off-by: Tushar Goel --- vulnerabilities/utils.py | 5 +---- vulnerablecode/urls.py | 3 ++- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/vulnerabilities/utils.py b/vulnerabilities/utils.py index f90d42401..88adf1c41 100644 --- a/vulnerabilities/utils.py +++ b/vulnerabilities/utils.py @@ -611,6 +611,7 @@ def normalize_text(text): def normalize_list(lst): """Sort a list to ensure consistent ordering.""" + lst = [x for x in lst if x] return sorted(lst) if lst else [] @@ -885,13 +886,9 @@ def compute_advisory_content(advisory_data): if isinstance(advisory_data, AdvisoryV2): advisory_data = advisory_data.to_advisory_data() normalized_data = { - "summary": normalize_text(advisory_data.summary), "affected_packages": [ pkg.to_dict() for pkg in normalize_list(advisory_data.affected_packages) if pkg ], - "severities": [sev.to_dict() for sev in normalize_list(advisory_data.severities) if sev], - "weaknesses": normalize_list(advisory_data.weaknesses), - "patches": [patch.to_dict() for patch in normalize_list(advisory_data.patches)], } normalized_json = json.dumps(normalized_data, separators=(",", ":"), sort_keys=True) diff --git a/vulnerablecode/urls.py b/vulnerablecode/urls.py index efbfc9c6f..745d2a469 100644 --- a/vulnerablecode/urls.py +++ b/vulnerablecode/urls.py @@ -29,7 +29,7 @@ from vulnerabilities.api_v3 import AffectedByAdvisoriesViewSet from vulnerabilities.api_v3 import FixingAdvisoriesViewSet from vulnerabilities.api_v3 import PackageV3ViewSet -from vulnerabilities.views import AdminLoginView, PackageV3Details +from vulnerabilities.views import AdminLoginView from vulnerabilities.views import AdvisoryDetails from vulnerabilities.views import AdvisoryPackagesDetails from vulnerabilities.views import AffectedByAdvisoriesListView @@ -41,6 +41,7 @@ from vulnerabilities.views import PackageSearch from vulnerabilities.views import PackageSearchV2 from vulnerabilities.views import PackageV2Details +from vulnerabilities.views import PackageV3Details from vulnerabilities.views import PipelineRunDetailView from vulnerabilities.views import PipelineRunListView from vulnerabilities.views import PipelineScheduleListView From f562fd85c8440aab8229fcec73774f83c82deb09 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Fri, 27 Mar 2026 01:48:13 +0530 Subject: [PATCH 328/390] Test out small use case Signed-off-by: Tushar Goel --- .../pipelines/v2_improvers/group_advisories_for_packages.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vulnerabilities/pipelines/v2_improvers/group_advisories_for_packages.py b/vulnerabilities/pipelines/v2_improvers/group_advisories_for_packages.py index 244de770e..386ce63af 100644 --- a/vulnerabilities/pipelines/v2_improvers/group_advisories_for_packages.py +++ b/vulnerabilities/pipelines/v2_improvers/group_advisories_for_packages.py @@ -118,7 +118,7 @@ def get_merged_identifier_groups(advisories): def group_advisoris_for_packages(logger=None): - for package in PackageV2.objects.iterator(): + for package in PackageV2.objects.filter(package_url="pkg:pypi/django@1.5.2").iterator(): affecting_advisories = AdvisoryV2.objects.latest_affecting_advisories_for_purl( purl=package.purl ).prefetch_related("aliases") From 931e111e8eed66a23894c203406c31693a035708 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Fri, 27 Mar 2026 02:09:16 +0530 Subject: [PATCH 329/390] Group for all packages Signed-off-by: Tushar Goel --- .../pipelines/v2_improvers/group_advisories_for_packages.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/vulnerabilities/pipelines/v2_improvers/group_advisories_for_packages.py b/vulnerabilities/pipelines/v2_improvers/group_advisories_for_packages.py index 386ce63af..f26211f10 100644 --- a/vulnerabilities/pipelines/v2_improvers/group_advisories_for_packages.py +++ b/vulnerabilities/pipelines/v2_improvers/group_advisories_for_packages.py @@ -36,11 +36,13 @@ def merge_advisories(advisories): advisories = list(advisories) + print(len(advisories)) + content_hash_map = defaultdict(list) result_groups = [] for adv in advisories: - + print(adv.avid) if adv.advisory_content_hash: content_hash_map[adv.advisory_content_hash].append(adv) else: @@ -118,7 +120,7 @@ def get_merged_identifier_groups(advisories): def group_advisoris_for_packages(logger=None): - for package in PackageV2.objects.filter(package_url="pkg:pypi/django@1.5.2").iterator(): + for package in PackageV2.objects.iterator(): affecting_advisories = AdvisoryV2.objects.latest_affecting_advisories_for_purl( purl=package.purl ).prefetch_related("aliases") From 3286f90cb8214544dcb7609bbf1b4e418c7a50ad Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Fri, 27 Mar 2026 02:53:28 +0530 Subject: [PATCH 330/390] Change process to compute hash Signed-off-by: Tushar Goel --- .../group_advisories_for_packages.py | 31 +++++++++++++------ 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/vulnerabilities/pipelines/v2_improvers/group_advisories_for_packages.py b/vulnerabilities/pipelines/v2_improvers/group_advisories_for_packages.py index f26211f10..52d16c093 100644 --- a/vulnerabilities/pipelines/v2_improvers/group_advisories_for_packages.py +++ b/vulnerabilities/pipelines/v2_improvers/group_advisories_for_packages.py @@ -7,6 +7,8 @@ # See https://aboutcode.org for more information about nexB OSS projects. # +import hashlib +import json from collections import defaultdict from django.db import transaction @@ -16,7 +18,7 @@ from vulnerabilities.models import AdvisoryV2 from vulnerabilities.models import PackageV2 from vulnerabilities.pipelines import VulnerableCodePipeline -from vulnerabilities.utils import compute_advisory_content +from vulnerabilities.utils import normalize_list class GroupAdvisoriesForPackages(VulnerableCodePipeline): @@ -42,15 +44,26 @@ def merge_advisories(advisories): result_groups = [] for adv in advisories: - print(adv.avid) - if adv.advisory_content_hash: - content_hash_map[adv.advisory_content_hash].append(adv) + affected = [] + fixed = [] + + for impact in adv.impacted_packages.all(): + affected.extend([pkg.package_url for pkg in impact.affecting_packages.all()]) + + fixed.extend([pkg.package_url for pkg in impact.fixed_by_packages.all()]) + + normalized_data = { + "affected_packages": normalize_list(affected), + "fixed_packages": normalize_list(fixed), + } + + normalized_json = json.dumps(normalized_data, separators=(",", ":"), sort_keys=True) + content_hash = hashlib.sha256(normalized_json.encode("utf-8")).hexdigest() + + if content_hash: + content_hash_map[content_hash].append(adv) else: - content_hash = compute_advisory_content(advisory_data=adv) - if content_hash: - content_hash_map[content_hash].append(adv) - else: - result_groups.append([adv]) + result_groups.append([adv]) final_groups = [] From 8266b254887d3e70e9eb391824e64bec4d6d2cd4 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Fri, 27 Mar 2026 03:01:05 +0530 Subject: [PATCH 331/390] Prefetch affected packages Signed-off-by: Tushar Goel --- .../group_advisories_for_packages.py | 34 ++++++++++++------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/vulnerabilities/pipelines/v2_improvers/group_advisories_for_packages.py b/vulnerabilities/pipelines/v2_improvers/group_advisories_for_packages.py index 52d16c093..0d466e44a 100644 --- a/vulnerabilities/pipelines/v2_improvers/group_advisories_for_packages.py +++ b/vulnerabilities/pipelines/v2_improvers/group_advisories_for_packages.py @@ -38,8 +38,6 @@ def merge_advisories(advisories): advisories = list(advisories) - print(len(advisories)) - content_hash_map = defaultdict(list) result_groups = [] @@ -77,18 +75,15 @@ def merge_advisories(advisories): def get_merged_identifier_groups(advisories): identifier_groups = defaultdict(set) - advisory_to_identifiers = defaultdict(set) advisories = list(advisories) for adv in advisories: identifier_groups[adv.advisory_id].add(adv) - advisory_to_identifiers[adv].add(adv.advisory_id) - for alias in adv.aliases.all(): - identifier_groups[alias.alias].add(adv) - advisory_to_identifiers[adv].add(alias.alias) + for alias in adv.aliases.values_list("alias", flat=True): + identifier_groups[alias].add(adv) groups = [set(advs) for advs in identifier_groups.values() if len(advs) > 1] @@ -134,13 +129,26 @@ def get_merged_identifier_groups(advisories): def group_advisoris_for_packages(logger=None): for package in PackageV2.objects.iterator(): - affecting_advisories = AdvisoryV2.objects.latest_affecting_advisories_for_purl( - purl=package.purl - ).prefetch_related("aliases") + print(package) + affecting_advisories = ( + AdvisoryV2.objects + .latest_affecting_advisories_for_purl(purl=package.purl) + .prefetch_related( + "aliases", + "impacted_packages__affecting_packages", + "impacted_packages__fixed_by_packages", + ) + ) - fixed_by_advisories = AdvisoryV2.objects.latest_fixed_by_advisories_for_purl( - purl=package.purl - ).prefetch_related("aliases") + fixed_by_advisories = ( + AdvisoryV2.objects + .latest_fixed_by_advisories_for_purl(purl=package.purl) + .prefetch_related( + "aliases", + "impacted_packages__affecting_packages", + "impacted_packages__fixed_by_packages", + ) + ) try: delete_and_save_advisory_set(package, affecting_advisories, relation="affecting") From bf29369b4c4e67b8e31648f41be78db0ca9d6463 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Fri, 27 Mar 2026 03:22:05 +0530 Subject: [PATCH 332/390] Cache the advisory content hash Signed-off-by: Tushar Goel --- .../group_advisories_for_packages.py | 76 ++++++++++--------- 1 file changed, 40 insertions(+), 36 deletions(-) diff --git a/vulnerabilities/pipelines/v2_improvers/group_advisories_for_packages.py b/vulnerabilities/pipelines/v2_improvers/group_advisories_for_packages.py index 0d466e44a..87b05eb5f 100644 --- a/vulnerabilities/pipelines/v2_improvers/group_advisories_for_packages.py +++ b/vulnerabilities/pipelines/v2_improvers/group_advisories_for_packages.py @@ -34,34 +34,23 @@ def group_advisories_for_packages(self): group_advisoris_for_packages(logger=self.log) +CONTENT_HASH_CACHE = {} + + def merge_advisories(advisories): advisories = list(advisories) content_hash_map = defaultdict(list) - result_groups = [] for adv in advisories: - affected = [] - fixed = [] - - for impact in adv.impacted_packages.all(): - affected.extend([pkg.package_url for pkg in impact.affecting_packages.all()]) - - fixed.extend([pkg.package_url for pkg in impact.fixed_by_packages.all()]) - - normalized_data = { - "affected_packages": normalize_list(affected), - "fixed_packages": normalize_list(fixed), - } - - normalized_json = json.dumps(normalized_data, separators=(",", ":"), sort_keys=True) - content_hash = hashlib.sha256(normalized_json.encode("utf-8")).hexdigest() - - if content_hash: - content_hash_map[content_hash].append(adv) + if adv.avid in CONTENT_HASH_CACHE: + content_hash = CONTENT_HASH_CACHE[adv.avid] else: - result_groups.append([adv]) + content_hash = compute_advisory_content_hash(adv) + CONTENT_HASH_CACHE[adv.avid] = content_hash + + content_hash_map[content_hash].append(adv) final_groups = [] @@ -72,6 +61,25 @@ def merge_advisories(advisories): return final_groups +def compute_advisory_content_hash(adv): + affected = [] + fixed = [] + + for impact in adv.impacted_packages.all(): + affected.extend([pkg.package_url for pkg in impact.affecting_packages.all()]) + + fixed.extend([pkg.package_url for pkg in impact.fixed_by_packages.all()]) + + normalized_data = { + "affected_packages": normalize_list(affected), + "fixed_packages": normalize_list(fixed), + } + + normalized_json = json.dumps(normalized_data, separators=(",", ":"), sort_keys=True) + content_hash = hashlib.sha256(normalized_json.encode("utf-8")).hexdigest() + return content_hash + + def get_merged_identifier_groups(advisories): identifier_groups = defaultdict(set) @@ -130,24 +138,20 @@ def get_merged_identifier_groups(advisories): def group_advisoris_for_packages(logger=None): for package in PackageV2.objects.iterator(): print(package) - affecting_advisories = ( - AdvisoryV2.objects - .latest_affecting_advisories_for_purl(purl=package.purl) - .prefetch_related( - "aliases", - "impacted_packages__affecting_packages", - "impacted_packages__fixed_by_packages", - ) + affecting_advisories = AdvisoryV2.objects.latest_affecting_advisories_for_purl( + purl=package.purl + ).prefetch_related( + "aliases", + "impacted_packages__affecting_packages", + "impacted_packages__fixed_by_packages", ) - fixed_by_advisories = ( - AdvisoryV2.objects - .latest_fixed_by_advisories_for_purl(purl=package.purl) - .prefetch_related( - "aliases", - "impacted_packages__affecting_packages", - "impacted_packages__fixed_by_packages", - ) + fixed_by_advisories = AdvisoryV2.objects.latest_fixed_by_advisories_for_purl( + purl=package.purl + ).prefetch_related( + "aliases", + "impacted_packages__affecting_packages", + "impacted_packages__fixed_by_packages", ) try: From 680f45e920342325f5e3040c25aea135dd524890 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Fri, 27 Mar 2026 12:29:14 +0530 Subject: [PATCH 333/390] Group specific ecosystems Signed-off-by: Tushar Goel --- .../v2_improvers/group_advisories_for_packages.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/vulnerabilities/pipelines/v2_improvers/group_advisories_for_packages.py b/vulnerabilities/pipelines/v2_improvers/group_advisories_for_packages.py index 87b05eb5f..99aa079f7 100644 --- a/vulnerabilities/pipelines/v2_improvers/group_advisories_for_packages.py +++ b/vulnerabilities/pipelines/v2_improvers/group_advisories_for_packages.py @@ -41,6 +41,9 @@ def merge_advisories(advisories): advisories = list(advisories) + if len(advisories) > 1000: + return + content_hash_map = defaultdict(list) for adv in advisories: @@ -136,7 +139,9 @@ def get_merged_identifier_groups(advisories): def group_advisoris_for_packages(logger=None): - for package in PackageV2.objects.iterator(): + for package in PackageV2.objects.filter( + type__in=["npm", "pypi", "nuget", "maven", "composer"] + ).iterator(): print(package) affecting_advisories = AdvisoryV2.objects.latest_affecting_advisories_for_purl( purl=package.purl From b9c4f185abf7a3ce295b82a10f0ccea00f1c65a7 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Fri, 27 Mar 2026 12:32:46 +0530 Subject: [PATCH 334/390] Group specific ecosystems Signed-off-by: Tushar Goel --- .../pipelines/v2_improvers/group_advisories_for_packages.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/vulnerabilities/pipelines/v2_improvers/group_advisories_for_packages.py b/vulnerabilities/pipelines/v2_improvers/group_advisories_for_packages.py index 99aa079f7..75b983e1c 100644 --- a/vulnerabilities/pipelines/v2_improvers/group_advisories_for_packages.py +++ b/vulnerabilities/pipelines/v2_improvers/group_advisories_for_packages.py @@ -41,9 +41,6 @@ def merge_advisories(advisories): advisories = list(advisories) - if len(advisories) > 1000: - return - content_hash_map = defaultdict(list) for adv in advisories: From 312d4444b0e98de04888d47e07c5a393960e224c Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Sat, 28 Mar 2026 01:00:19 +0530 Subject: [PATCH 335/390] Use V2 views Signed-off-by: Tushar Goel --- .../templates/package_details_v2.html | 52 ++------- vulnerabilities/utils.py | 100 ++++++++++++++++++ vulnerabilities/views.py | 67 ++++++++---- vulnerablecode/urls.py | 2 +- 4 files changed, 156 insertions(+), 65 deletions(-) diff --git a/vulnerabilities/templates/package_details_v2.html b/vulnerabilities/templates/package_details_v2.html index f90585b9d..06c15f0d0 100644 --- a/vulnerabilities/templates/package_details_v2.html +++ b/vulnerabilities/templates/package_details_v2.html @@ -136,8 +136,6 @@ Advisory - Source - Date Published Summary Fixed in package version @@ -147,15 +145,15 @@ {% for advisory in affected_by_advisories_v2 %} - - {{advisory.primary.avid }} + + {{advisory.identifier }}
- {% if advisory.primary.alias|length != 0 %} + {% if advisory.aliases|length != 0 %} Aliases: {% endif %}
- {% for alias in advisory.primary.alias %} + {% for alias in advisory.aliases %} {% if alias.url %} {{ alias }} @@ -166,26 +164,12 @@ {% endif %} {% endfor %} - {% if advisory.secondary|length != 0 %} -

Supporting advisories are listed below the primary advisory.

- {% for secondary in advisory.secondary %} - - {{secondary.avid }} - - {% endfor %} - {% endif %} - {{advisory.primary.url}} - - - {{advisory.primary.date_published}} - - - {{ advisory.primary.summary }} + {{ advisory.advisory.summary|truncatewords:20 }} - {% with fixed=fixed_package_details|get_item:advisory.primary.avid %} + {% with fixed=fixed_package_details|get_item:advisory.advisory.avid %} {% if fixed %} {% for item in fixed %}
@@ -240,8 +224,6 @@ Advisory - Source - Date Published Summary Aliases @@ -250,30 +232,16 @@ {% for advisory in fixing_advisories_v2 %} - - {{advisory.primary.avid }} + + {{advisory.identifier }}
- {% if advisory.secondary|length != 0 %} -

Supporting advisories are listed below the primary advisory.

- {% for secondary in advisory.secondary %} - - {{secondary.avid }} - - {% endfor %} - {% endif %} - - - {{advisory.primary.url}} - - - {{advisory.primary.date_published}} - {{ advisory.primary.summary }} + {{ advisory.advisory.summary|truncatewords:20 }} - {% for alias in advisory.primary.alias %} + {% for alias in advisory.aliases %} {% if alias.url %} {{ alias }} diff --git a/vulnerabilities/utils.py b/vulnerabilities/utils.py index 88adf1c41..2dd606a92 100644 --- a/vulnerabilities/utils.py +++ b/vulnerabilities/utils.py @@ -895,3 +895,103 @@ def compute_advisory_content(advisory_data): content_hash = hashlib.sha256(normalized_json.encode("utf-8")).hexdigest() return content_hash + + +def merge_advisories(advisories, package): + + advisories = list(advisories) + + content_hash_map = defaultdict(list) + + for adv in advisories: + content_hash = compute_advisory_content_hash(adv, package) + content_hash_map[content_hash].append(adv) + + final_groups = [] + + for group in content_hash_map.values(): + groups = get_merged_identifier_groups(group) + final_groups.extend(groups) + + return final_groups + + +def compute_advisory_content_hash(adv, package): + affected = [] + fixed = [] + + version_less_purl = PackageURL( + type=package.type, + namespace=package.namespace, + name=package.name, + qualifiers=package.qualifiers, + subpath=package.subpath, + ) + + for impact in adv.impacted_packages.filter(base_purl=str(version_less_purl)): + affected.extend([pkg.package_url for pkg in impact.affecting_packages.all()]) + fixed.extend([pkg.package_url for pkg in impact.fixed_by_packages.all()]) + + normalized_data = { + "affected_packages": normalize_list(affected), + "fixed_packages": normalize_list(fixed), + } + + normalized_json = json.dumps(normalized_data, separators=(",", ":"), sort_keys=True) + content_hash = hashlib.sha256(normalized_json.encode("utf-8")).hexdigest() + return content_hash + + +def get_merged_identifier_groups(advisories): + + identifier_groups = defaultdict(set) + + advisories = list(advisories) + + for adv in advisories: + + identifier_groups[adv.advisory_id].add(adv) + + for alias in adv.aliases.values_list("alias", flat=True): + identifier_groups[alias].add(adv) + + groups = [set(advs) for advs in identifier_groups.values() if len(advs) > 1] + + merged = [] + + for group in groups: + group = set(group) + + i = 0 + while i < len(merged): + if group & merged[i]: + group |= merged[i] + merged.pop(i) + else: + i += 1 + + merged.append(group) + + all_grouped = set() + for g in merged: + all_grouped |= g + + for adv in advisories: + if adv not in all_grouped: + merged.append({adv}) + + final_groups = [] + + for group in merged: + identifiers = set() + for adv in group: + for alias in adv.aliases.all(): + identifiers.add(alias) + + primary = max(group, key=lambda a: a.precedence if a.precedence is not None else -1) + + secondary = [a for a in group if a != primary] + + final_groups.append((identifiers, primary, secondary)) + + return final_groups diff --git a/vulnerabilities/views.py b/vulnerabilities/views.py index b9d172ca1..c8bfc6634 100644 --- a/vulnerabilities/views.py +++ b/vulnerabilities/views.py @@ -38,6 +38,7 @@ from vulnerabilities.forms import PipelineSchedulePackageForm from vulnerabilities.forms import VulnerabilitySearchForm from vulnerabilities.models import AdvisorySetMember +from vulnerabilities.models import AdvisoryV2 from vulnerabilities.models import ImpactedPackage from vulnerabilities.models import PipelineRun from vulnerabilities.models import PipelineSchedule @@ -45,6 +46,7 @@ from vulnerabilities.severity_systems import EPSS from vulnerabilities.severity_systems import SCORING_SYSTEMS from vulnerabilities.utils import group_advisories_by_content +from vulnerabilities.utils import merge_advisories from vulnerablecode import __version__ as VULNERABLECODE_VERSION from vulnerablecode.settings import env @@ -218,22 +220,30 @@ def get_context_data(self, **kwargs): context["latest_non_vulnerable"] = latest_non_vulnerable context["package_search_form"] = PackageSearchForm(self.request.GET) - affected_by_advisories_qs = models.AdvisoryV2.objects.latest_affecting_advisories_for_purl( - package.package_url + affecting_advisories = AdvisoryV2.objects.latest_affecting_advisories_for_purl( + purl=package.purl + ).prefetch_related( + "aliases", + "impacted_packages__affecting_packages", + "impacted_packages__fixed_by_packages", ) - fixing_advisories_qs = models.AdvisoryV2.objects.latest_fixed_by_advisories_for_purl( - package.package_url + fixed_by_advisories = AdvisoryV2.objects.latest_fixed_by_advisories_for_purl( + purl=package.purl + ).prefetch_related( + "aliases", + "impacted_packages__affecting_packages", + "impacted_packages__fixed_by_packages", ) affected_by_advisories_url = None fixing_advisories_url = None - affected_by_advisories_qs_ids = affected_by_advisories_qs.only("id") - fixing_advisories_qs_ids = fixing_advisories_qs.only("id") + affected_by_advisories_qs_ids = affecting_advisories.only("id") + fixing_advisories_qs_ids = fixed_by_advisories.only("id") - affected_by_advisories = list(affected_by_advisories_qs_ids[:101]) - if len(affected_by_advisories) > 100: + affected_by_advisories = list(affected_by_advisories_qs_ids[:1001]) + if len(affected_by_advisories) > 1001: affected_by_advisories_url = reverse_lazy( "affected_by_advisories_v2", kwargs={"purl": package.package_url} ) @@ -242,19 +252,25 @@ def get_context_data(self, **kwargs): context["fixed_package_details"] = {} else: + advisories = [] + fixed_pkg_details = get_fixed_package_details(package) - affected_avid_by_hash = {} - affected_avid_by_hash = group_advisories_by_content(affected_by_advisories_qs) - affecting_advs = [] + groups = merge_advisories(affecting_advisories, package) + for aliases, primary, _ in groups: + identifier = primary.advisory_id.split("/")[-1] + + filtered_aliases = [alias for alias in aliases if alias.alias != identifier] + + advisories.append( + {"aliases": filtered_aliases, "advisory": primary, "identifier": identifier} + ) - for hash in affected_avid_by_hash: - affecting_advs.append(affected_avid_by_hash[hash]) - context["affected_by_advisories_v2"] = affecting_advs + context["affected_by_advisories_v2"] = advisories context["fixed_package_details"] = fixed_pkg_details context["affected_by_advisories_v2_url"] = None - fixing_advisories = list(fixing_advisories_qs_ids[:101]) - if len(fixing_advisories) > 100: + fixing_advisories = list(fixing_advisories_qs_ids[:1001]) + if len(fixing_advisories) > 1001: fixing_advisories_url = reverse_lazy( "fixing_advisories_v2", kwargs={"purl": package.package_url} ) @@ -262,13 +278,20 @@ def get_context_data(self, **kwargs): context["fixing_advisories_v2"] = [] else: - fixing_avid_by_hash = {} - fixing_avid_by_hash = group_advisories_by_content(fixing_advisories_qs) - fixing_advs = [] + advisories = [] + + fixed_pkg_details = get_fixed_package_details(package) + groups = merge_advisories(fixing_advisories, package) + for aliases, primary, _ in groups: + identifier = primary.advisory_id.split("/")[-1] + + filtered_aliases = [alias for alias in aliases if alias.alias != identifier] + + advisories.append( + {"aliases": filtered_aliases, "advisory": primary, "identifier": identifier} + ) - for hash in fixing_avid_by_hash: - fixing_advs.append(fixing_avid_by_hash[hash]) - context["fixing_advisories_v2"] = fixing_advs + context["fixing_advisories_v2"] = advisories context["fixing_advisories_v2_url"] = None return context diff --git a/vulnerablecode/urls.py b/vulnerablecode/urls.py index 745d2a469..44cacd9b0 100644 --- a/vulnerablecode/urls.py +++ b/vulnerablecode/urls.py @@ -142,7 +142,7 @@ def __init__(self, *args, **kwargs): ), re_path( r"^packages/v2/(?Ppkg:.+)$", - PackageV3Details.as_view(), + PackageV2Details.as_view(), name="package_details_v2", ), re_path( From 0b753c9cf35bab2450a8502c1ffc89d710b5f047 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Mon, 30 Mar 2026 16:32:04 +0530 Subject: [PATCH 336/390] Adjust API and UI for new grouping Signed-off-by: Tushar Goel --- vulnerabilities/api_v3.py | 115 +++++++++-- vulnerabilities/improvers/__init__.py | 2 - ...remove_advisoryset_identifiers_and_more.py | 30 +++ vulnerabilities/models.py | 15 +- .../v2_importers/github_osv_importer.py | 2 +- .../pipelines/v2_importers/pypa_importer.py | 2 +- .../pipelines/v2_importers/pysec_importer.py | 2 +- .../compute_advisory_content_hash.py | 65 ------ .../group_advisories_for_packages.py | 162 +-------------- vulnerabilities/pipes/advisory.py | 3 - vulnerabilities/pipes/group_advisories.py | 50 +++++ .../templates/package_details_v2.html | 102 +++++++++- .../test_compute_advisory_content_hash.py | 88 -------- vulnerabilities/tests/test_advisory_merge.py | 192 ++++++++++++++++++ vulnerabilities/tests/test_api_v3.py | 21 +- vulnerabilities/utils.py | 104 +++++----- vulnerabilities/views.py | 126 +++++++----- 17 files changed, 620 insertions(+), 461 deletions(-) create mode 100644 vulnerabilities/migrations/0119_remove_advisoryset_identifiers_and_more.py delete mode 100644 vulnerabilities/pipelines/v2_improvers/compute_advisory_content_hash.py create mode 100644 vulnerabilities/pipes/group_advisories.py delete mode 100644 vulnerabilities/tests/pipelines/v2_improvers/test_compute_advisory_content_hash.py create mode 100644 vulnerabilities/tests/test_advisory_merge.py diff --git a/vulnerabilities/api_v3.py b/vulnerabilities/api_v3.py index b55822e41..ea82dcce3 100644 --- a/vulnerabilities/api_v3.py +++ b/vulnerabilities/api_v3.py @@ -20,13 +20,16 @@ from rest_framework.throttling import AnonRateThrottle from vulnerabilities.models import AdvisoryReference +from vulnerabilities.models import AdvisorySet from vulnerabilities.models import AdvisorySeverity from vulnerabilities.models import AdvisoryV2 from vulnerabilities.models import AdvisoryWeakness from vulnerabilities.models import ImpactedPackageAffecting from vulnerabilities.models import PackageV2 from vulnerabilities.throttling import PermissionBasedUserRateThrottle -from vulnerabilities.utils import group_advisories_by_content +from vulnerabilities.utils import TYPES_WITH_MULTIPLE_IMPORTERS +from vulnerabilities.utils import get_advisories_from_groups +from vulnerabilities.utils import merge_and_save_grouped_advisories class PackageQuerySerializer(serializers.Serializer): @@ -210,6 +213,32 @@ def get_affected_by_vulnerabilities(self, package): """Return a dictionary with advisory as keys and their details, including fixed_by_packages.""" advisories_qs = AdvisoryV2.objects.latest_affecting_advisories_for_purl(package.package_url) + advisories = [] + + is_grouped = AdvisorySet.objects.filter(package=package, relation_type="affecting").exists() + + if is_grouped: + affected_by_advisories_qs = AdvisorySet.objects.filter( + package=package, relation_type="affecting" + ).select_related("primary_advisory") + + affected_groups = [ + (list(adv.aliases.all()), adv.primary_advisory, "") + for adv in affected_by_advisories_qs + ] + + advisories = get_advisories_from_groups(affected_groups) + return self.return_advisories_data(package, advisories_qs, advisories) + + if package.type in TYPES_WITH_MULTIPLE_IMPORTERS: + advisories_qs = advisories_qs.prefetch_related( + "aliases", + "impacted_packages__affecting_packages", + "impacted_packages__fixed_by_packages", + ) + advisories = merge_and_save_grouped_advisories(package, advisories_qs, "affecting") + return self.return_advisories_data(package, advisories_qs, advisories) + advisories_ids = advisories_qs.only("id") advisories_ids = list(advisories_ids[:101]) @@ -227,20 +256,19 @@ def get_affected_by_vulnerabilities(self, package): impact_by_avid = {impact.advisory.avid: impact for impact in impacts} - grouped = group_advisories_by_content(advisories_qs) - result = [] - for entry in grouped.values(): - primary = entry["primary"] - impact = impact_by_avid.get(primary.avid) + + for advisory in advisories_qs: + impact = impact_by_avid.get(advisory.avid) if not impact: continue result.append( { - "advisory_id": primary.avid, + "advisory_id": advisory.advisory_id.split("/")[-1], + "aliases": [alias.alias for alias in advisory.aliases.all()], + "summary": advisory.summary, "fixed_by_packages": [pkg.purl for pkg in impact.fixed_by_packages.all()], - "duplicate_advisory_ids": [a.avid for a in entry["secondary"]], } ) @@ -249,21 +277,82 @@ def get_affected_by_vulnerabilities(self, package): def get_fixing_vulnerabilities(self, package): advisories_qs = AdvisoryV2.objects.latest_fixed_by_advisories_for_purl(package.package_url) + advisories = [] + + is_grouped = AdvisorySet.objects.filter(package=package, relation_type="fixing").exists() + + if is_grouped: + fixing_advisories_qs = AdvisorySet.objects.filter( + package=package, relation_type="fixing" + ).select_related("primary_advisory") + + fixing_groups = [ + (list(adv.aliases.all()), adv.primary_advisory, "") for adv in fixing_advisories_qs + ] + + advisories = get_advisories_from_groups(fixing_groups) + return self.return_fixing_advisories_data(advisories) + + if package.type in TYPES_WITH_MULTIPLE_IMPORTERS: + advisories_qs = advisories_qs.prefetch_related( + "aliases", + "impacted_packages__affecting_packages", + "impacted_packages__fixed_by_packages", + ) + advisories = merge_and_save_grouped_advisories(package, advisories_qs, "fixing") + return self.return_fixing_advisories_data(advisories) + advisories_ids = advisories_qs.only("id") advisories_ids = list(advisories_ids[:101]) if len(advisories_ids) > 100: return None - grouped = group_advisories_by_content(advisories_qs) + results = [] + for advisory in advisories_qs: + results.append( + { + "advisory_id": advisory.advisory_id.split("/")[-1], + } + ) + return results + + def return_fixing_advisories_data(self, advisories): result = [] - for entry in grouped.values(): - primary = entry["primary"] + for advisory in advisories: result.append( { - "advisory_id": primary.avid, - "duplicate_advisory_ids": [a.avid for a in entry["secondary"]], + "advisory_id": advisory["identifier"], + } + ) + + return result + + def return_advisories_data(self, package, advisories_qs, advisories): + advisory_by_avid = {adv.avid: adv for adv in advisories_qs} + avids = advisory_by_avid.keys() + + impacts = ( + package.affected_in_impacts.filter(advisory__avid__in=avids) + .select_related("advisory") + .prefetch_related("fixed_by_packages") + ) + + impact_by_avid = {impact.advisory.avid: impact for impact in impacts} + + result = [] + for advisory in advisories: + impact = impact_by_avid.get(advisory["advisory"].avid) + if not impact: + continue + + result.append( + { + "advisory_id": advisory["identifier"], + "aliases": [alias.alias for alias in advisory["aliases"]], + "summary": advisory["advisory"].summary, + "fixed_by_packages": [pkg.purl for pkg in impact.fixed_by_packages.all()], } ) diff --git a/vulnerabilities/improvers/__init__.py b/vulnerabilities/improvers/__init__.py index 61a1fc882..3e991d658 100644 --- a/vulnerabilities/improvers/__init__.py +++ b/vulnerabilities/improvers/__init__.py @@ -20,7 +20,6 @@ from vulnerabilities.pipelines import populate_vulnerability_summary_pipeline from vulnerabilities.pipelines import remove_duplicate_advisories from vulnerabilities.pipelines.v2_improvers import collect_ssvc_trees -from vulnerabilities.pipelines.v2_improvers import compute_advisory_content_hash from vulnerabilities.pipelines.v2_improvers import compute_advisory_todo as compute_advisory_todo_v2 from vulnerabilities.pipelines.v2_improvers import compute_package_risk as compute_package_risk_v2 from vulnerabilities.pipelines.v2_improvers import ( @@ -76,7 +75,6 @@ compute_advisory_todo.ComputeToDo, collect_ssvc_trees.CollectSSVCPipeline, relate_severities.RelateSeveritiesPipeline, - compute_advisory_content_hash.ComputeAdvisoryContentHash, group_advisories_for_packages.GroupAdvisoriesForPackages, ] ) diff --git a/vulnerabilities/migrations/0119_remove_advisoryset_identifiers_and_more.py b/vulnerabilities/migrations/0119_remove_advisoryset_identifiers_and_more.py new file mode 100644 index 000000000..503e14f8d --- /dev/null +++ b/vulnerabilities/migrations/0119_remove_advisoryset_identifiers_and_more.py @@ -0,0 +1,30 @@ +# Generated by Django 5.2.11 on 2026-03-30 08:35 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("vulnerabilities", "0118_advisoryset_advisorysetmember"), + ] + + operations = [ + migrations.RemoveField( + model_name="advisoryset", + name="identifiers", + ), + migrations.RemoveField( + model_name="advisoryv2", + name="advisory_content_hash", + ), + migrations.AddField( + model_name="advisoryset", + name="aliases", + field=models.ManyToManyField( + help_text="A list of serializable Alias objects", + related_name="advisory_sets", + to="vulnerabilities.advisoryalias", + ), + ), + ] diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index 05bf86a17..f51a92dbd 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -2949,7 +2949,11 @@ class AdvisorySet(models.Model): package = models.ForeignKey("PackageV2", on_delete=models.CASCADE) relation_type = models.CharField(max_length=20, choices=RELATION_TYPE_CHOICES) - identifiers = models.JSONField() + aliases = models.ManyToManyField( + AdvisoryAlias, + related_name="advisory_sets", + help_text="A list of serializable Alias objects", + ) primary_advisory = models.ForeignKey("AdvisoryV2", on_delete=models.PROTECT) @@ -3101,13 +3105,6 @@ class AdvisoryV2(models.Model): help_text="Related advisories that are used to calculate the severity of this advisory.", ) - advisory_content_hash = models.CharField( - max_length=64, - blank=True, - null=True, - help_text="A unique hash computed from the content of the advisory used to identify advisories with the same content.", - ) - risk_score = models.DecimalField( null=True, blank=True, @@ -3311,7 +3308,7 @@ def search(self, query: str = None): except ValueError: # otherwise use query as a plain string qs = qs.filter(package_url__icontains=query) - return qs.order_by("package_url") + return qs.order_by("package_url").order_by("-version_rank") def with_vulnerability_counts(self): return self.annotate( diff --git a/vulnerabilities/pipelines/v2_importers/github_osv_importer.py b/vulnerabilities/pipelines/v2_importers/github_osv_importer.py index cfe92d93f..33acaf7f8 100644 --- a/vulnerabilities/pipelines/v2_importers/github_osv_importer.py +++ b/vulnerabilities/pipelines/v2_importers/github_osv_importer.py @@ -31,7 +31,7 @@ class GithubOSVImporterPipeline(VulnerableCodeBaseImporterPipelineV2): license_url = "https://github.com/github/advisory-database/blob/main/LICENSE.md" repo_url = "git+https://github.com/github/advisory-database/" - precedence = 100 + precedence = 200 @classmethod def steps(cls): diff --git a/vulnerabilities/pipelines/v2_importers/pypa_importer.py b/vulnerabilities/pipelines/v2_importers/pypa_importer.py index 90599e99d..7a80ed70f 100644 --- a/vulnerabilities/pipelines/v2_importers/pypa_importer.py +++ b/vulnerabilities/pipelines/v2_importers/pypa_importer.py @@ -29,7 +29,7 @@ class PyPaImporterPipeline(VulnerableCodeBaseImporterPipelineV2): spdx_license_expression = "CC-BY-4.0" license_url = "https://github.com/pypa/advisory-database/blob/main/LICENSE" repo_url = "git+https://github.com/pypa/advisory-database" - precedence = 200 + precedence = 500 @classmethod def steps(cls): diff --git a/vulnerabilities/pipelines/v2_importers/pysec_importer.py b/vulnerabilities/pipelines/v2_importers/pysec_importer.py index 05614b961..e9225a4f5 100644 --- a/vulnerabilities/pipelines/v2_importers/pysec_importer.py +++ b/vulnerabilities/pipelines/v2_importers/pysec_importer.py @@ -29,7 +29,7 @@ class PyPIImporterPipeline(VulnerableCodeBaseImporterPipelineV2): license_url = "https://github.com/pypa/advisory-database/blob/main/LICENSE" url = "https://osv-vulnerabilities.storage.googleapis.com/PyPI/all.zip" spdx_license_expression = "CC-BY-4.0" - precedence = 100 + precedence = 300 @classmethod def steps(cls): diff --git a/vulnerabilities/pipelines/v2_improvers/compute_advisory_content_hash.py b/vulnerabilities/pipelines/v2_improvers/compute_advisory_content_hash.py deleted file mode 100644 index 8b285d361..000000000 --- a/vulnerabilities/pipelines/v2_improvers/compute_advisory_content_hash.py +++ /dev/null @@ -1,65 +0,0 @@ -# -# Copyright (c) nexB Inc. and others. All rights reserved. -# VulnerableCode is a trademark of nexB Inc. -# SPDX-License-Identifier: Apache-2.0 -# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/aboutcode-org/vulnerablecode for support or download. -# See https://aboutcode.org for more information about nexB OSS projects. -# - - -from aboutcode.pipeline import LoopProgress - -from vulnerabilities.models import AdvisoryV2 -from vulnerabilities.pipelines import VulnerableCodePipeline -from vulnerabilities.utils import compute_advisory_content - - -class ComputeAdvisoryContentHash(VulnerableCodePipeline): - """Compute Advisory Content Hash for Advisory.""" - - pipeline_id = "compute_advisory_content_hash_v2" - - @classmethod - def steps(cls): - return (cls.compute_advisory_content_hash,) - - def compute_advisory_content_hash(self): - """Compute Advisory Content Hash for Advisory.""" - - advisories = AdvisoryV2.objects.latest_per_avid().filter(advisory_content_hash__isnull=True) - - advisories_count = advisories.count() - - progress = LoopProgress( - total_iterations=advisories_count, - logger=self.log, - progress_step=1, - ) - - to_update = [] - batch_size = 5000 - - for advisory in progress.iter(advisories.iterator(chunk_size=batch_size)): - try: - advisory.advisory_content_hash = compute_advisory_content(advisory) - to_update.append(advisory) - except Exception as e: - self.log(f"Error computing advisory_content_hash for {advisory.avid}: {e}") - - if len(to_update) >= batch_size: - AdvisoryV2.objects.bulk_update( - to_update, - ["advisory_content_hash"], - batch_size=batch_size, - ) - to_update.clear() - - if to_update: - AdvisoryV2.objects.bulk_update( - to_update, - ["advisory_content_hash"], - batch_size=batch_size, - ) - - self.log("Finished computing advisory_content_hash") diff --git a/vulnerabilities/pipelines/v2_improvers/group_advisories_for_packages.py b/vulnerabilities/pipelines/v2_improvers/group_advisories_for_packages.py index 75b983e1c..d2c8f6296 100644 --- a/vulnerabilities/pipelines/v2_improvers/group_advisories_for_packages.py +++ b/vulnerabilities/pipelines/v2_improvers/group_advisories_for_packages.py @@ -7,18 +7,12 @@ # See https://aboutcode.org for more information about nexB OSS projects. # -import hashlib -import json -from collections import defaultdict - -from django.db import transaction - -from vulnerabilities.models import AdvisorySet -from vulnerabilities.models import AdvisorySetMember from vulnerabilities.models import AdvisoryV2 from vulnerabilities.models import PackageV2 from vulnerabilities.pipelines import VulnerableCodePipeline -from vulnerabilities.utils import normalize_list +from vulnerabilities.pipes.group_advisories import delete_and_save_advisory_set +from vulnerabilities.utils import TYPES_WITH_MULTIPLE_IMPORTERS +from vulnerabilities.utils import merge_advisories class GroupAdvisoriesForPackages(VulnerableCodePipeline): @@ -34,112 +28,9 @@ def group_advisories_for_packages(self): group_advisoris_for_packages(logger=self.log) -CONTENT_HASH_CACHE = {} - - -def merge_advisories(advisories): - - advisories = list(advisories) - - content_hash_map = defaultdict(list) - - for adv in advisories: - if adv.avid in CONTENT_HASH_CACHE: - content_hash = CONTENT_HASH_CACHE[adv.avid] - else: - content_hash = compute_advisory_content_hash(adv) - CONTENT_HASH_CACHE[adv.avid] = content_hash - - content_hash_map[content_hash].append(adv) - - final_groups = [] - - for group in content_hash_map.values(): - groups = get_merged_identifier_groups(group) - final_groups.extend(groups) - - return final_groups - - -def compute_advisory_content_hash(adv): - affected = [] - fixed = [] - - for impact in adv.impacted_packages.all(): - affected.extend([pkg.package_url for pkg in impact.affecting_packages.all()]) - - fixed.extend([pkg.package_url for pkg in impact.fixed_by_packages.all()]) - - normalized_data = { - "affected_packages": normalize_list(affected), - "fixed_packages": normalize_list(fixed), - } - - normalized_json = json.dumps(normalized_data, separators=(",", ":"), sort_keys=True) - content_hash = hashlib.sha256(normalized_json.encode("utf-8")).hexdigest() - return content_hash - - -def get_merged_identifier_groups(advisories): - - identifier_groups = defaultdict(set) - - advisories = list(advisories) - - for adv in advisories: - - identifier_groups[adv.advisory_id].add(adv) - - for alias in adv.aliases.values_list("alias", flat=True): - identifier_groups[alias].add(adv) - - groups = [set(advs) for advs in identifier_groups.values() if len(advs) > 1] - - merged = [] - - for group in groups: - group = set(group) - - i = 0 - while i < len(merged): - if group & merged[i]: - group |= merged[i] - merged.pop(i) - else: - i += 1 - - merged.append(group) - - all_grouped = set() - for g in merged: - all_grouped |= g - - for adv in advisories: - if adv not in all_grouped: - merged.append({adv}) - - final_groups = [] - - for group in merged: - identifiers = set() - for adv in group: - for alias in adv.aliases.values_list("alias", flat=True): - identifiers.add(alias) - - primary = max(group, key=lambda a: a.precedence if a.precedence is not None else -1) - - secondary = [a for a in group if a != primary] - - final_groups.append((identifiers, primary, secondary)) - - return final_groups - - def group_advisoris_for_packages(logger=None): - for package in PackageV2.objects.filter( - type__in=["npm", "pypi", "nuget", "maven", "composer"] - ).iterator(): - print(package) + for package in PackageV2.objects.filter(type__in=TYPES_WITH_MULTIPLE_IMPORTERS).iterator(): + print(f"Grouping advisories for package {package.purl}") affecting_advisories = AdvisoryV2.objects.latest_affecting_advisories_for_purl( purl=package.purl ).prefetch_related( @@ -157,45 +48,10 @@ def group_advisoris_for_packages(logger=None): ) try: - delete_and_save_advisory_set(package, affecting_advisories, relation="affecting") - delete_and_save_advisory_set(package, fixed_by_advisories, relation="fixing") + affected_groups = merge_advisories(affecting_advisories, package) + fixed_by_groups = merge_advisories(fixed_by_advisories, package) + delete_and_save_advisory_set(affected_groups, package, relation="affecting") + delete_and_save_advisory_set(fixed_by_groups, package, relation="fixing") except Exception as e: print(f"Failed rebuilding advisory sets for package {package.purl}: {e!r}") continue - - -@transaction.atomic -def delete_and_save_advisory_set(package, advisories, relation=None): - AdvisorySet.objects.filter(package=package, relation_type=relation).delete() - - groups = merge_advisories(advisories) - - membership_to_create = [] - - for identifiers, primary, secondary in groups: - - advisory_set = AdvisorySet.objects.create( - package=package, - relation_type=relation, - identifiers=list(identifiers), - primary_advisory=primary, - ) - - membership_to_create.append( - AdvisorySetMember( - advisory_set=advisory_set, - advisory=primary, - is_primary=True, - ) - ) - - for adv in secondary: - membership_to_create.append( - AdvisorySetMember( - advisory_set=advisory_set, - advisory=adv, - is_primary=False, - ) - ) - - AdvisorySetMember.objects.bulk_create(membership_to_create) diff --git a/vulnerabilities/pipes/advisory.py b/vulnerabilities/pipes/advisory.py index a7f67153f..bcdd95075 100644 --- a/vulnerabilities/pipes/advisory.py +++ b/vulnerabilities/pipes/advisory.py @@ -48,7 +48,6 @@ from vulnerabilities.models import VulnerabilitySeverity from vulnerabilities.models import Weakness from vulnerabilities.pipes.univers_utils import get_exact_purls_v2 -from vulnerabilities.utils import compute_advisory_content def get_or_create_aliases(aliases: List) -> QuerySet: @@ -302,7 +301,6 @@ def insert_advisory_v2( advisory_obj = None created = False content_id = compute_content_id_v2(advisory_data=advisory) - advisory_content_hash = compute_advisory_content(advisory_data=advisory) try: default_data = { "datasource_id": pipeline_id, @@ -313,7 +311,6 @@ def insert_advisory_v2( "original_advisory_text": advisory.original_advisory_text, "url": advisory.url, "precedence": precedence, - "advisory_content_hash": advisory_content_hash, } advisory_obj, created = AdvisoryV2.objects.get_or_create( diff --git a/vulnerabilities/pipes/group_advisories.py b/vulnerabilities/pipes/group_advisories.py new file mode 100644 index 000000000..d66365706 --- /dev/null +++ b/vulnerabilities/pipes/group_advisories.py @@ -0,0 +1,50 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +from django.db import transaction + + +@transaction.atomic +def delete_and_save_advisory_set(groups, package, relation=None): + from vulnerabilities.models import AdvisorySet + from vulnerabilities.models import AdvisorySetMember + + AdvisorySet.objects.filter(package=package, relation_type=relation).delete() + + membership_to_create = [] + + for identifiers, primary, secondary in groups: + + advisory_set = AdvisorySet.objects.create( + package=package, + relation_type=relation, + primary_advisory=primary, + ) + + advisory_set.aliases.add(*identifiers) + advisory_set.save() + + membership_to_create.append( + AdvisorySetMember( + advisory_set=advisory_set, + advisory=primary, + is_primary=True, + ) + ) + + for adv in secondary: + membership_to_create.append( + AdvisorySetMember( + advisory_set=advisory_set, + advisory=adv, + is_primary=False, + ) + ) + + AdvisorySetMember.objects.bulk_create(membership_to_create) diff --git a/vulnerabilities/templates/package_details_v2.html b/vulnerabilities/templates/package_details_v2.html index 06c15f0d0..8c3f62756 100644 --- a/vulnerabilities/templates/package_details_v2.html +++ b/vulnerabilities/templates/package_details_v2.html @@ -141,6 +141,7 @@ + {% if grouped %} {% for advisory in affected_by_advisories_v2 %} @@ -201,6 +202,68 @@ {% endfor %} + {% else %} + + {% for advisory in affected_by_advisories_v2 %} + + + + {{advisory.advisory_id }} + +
+ {% if advisory.aliases.all|length != 0 %} + Aliases: + {% endif %} +
+ {% for alias in advisory.aliases.all %} + {% if alias.url %} + {{ alias }} +
+ {% else %} + {{ alias }} +
+ {% endif %} + {% endfor %} + + + + {{ advisory.summary|truncatewords:20 }} + + + {% with fixed=fixed_package_details|get_item:advisory.avid %} + {% if fixed %} + {% for item in fixed %} +
+ {{ item.pkg.version }} +
+ {% if item.pkg.is_vulnerable %} + + Vulnerable + + {% else %} + + Not vulnerable + + {% endif %} +
+ {% endfor %} + {% else %} + There are no reported fixed by versions. + {% endif %} + {% endwith %} + + + {% empty %} + + + This package is not known to be subject of any advisories. + + + {% endfor %} + + {% endif %} {% elif affected_by_advisories_v2_url %}
@@ -228,6 +291,8 @@ Aliases + + {% if grouped %} {% for advisory in fixing_advisories_v2 %} @@ -261,8 +326,43 @@ {% endfor %} - + {% else %} + + {% for advisory in fixing_advisories_v2 %} + + + + {{advisory.advisory_id }} + +
+ + + {{ advisory.summary|truncatewords:20 }} + + + {% for alias in advisory.aliases.all %} + {% if alias.url %} + {{ alias }} +
+ {% else %} + {{ alias }} +
+ {% endif %} + {% endfor %} + + + {% empty %} + + + This package is not known to fix any advisories. + + + {% endfor %} + + + {% endif %}
{% elif fixing_advisories_v2_url %}
diff --git a/vulnerabilities/tests/pipelines/v2_improvers/test_compute_advisory_content_hash.py b/vulnerabilities/tests/pipelines/v2_improvers/test_compute_advisory_content_hash.py deleted file mode 100644 index 5b7f0c186..000000000 --- a/vulnerabilities/tests/pipelines/v2_improvers/test_compute_advisory_content_hash.py +++ /dev/null @@ -1,88 +0,0 @@ -# -# Copyright (c) nexB Inc. and others. All rights reserved. -# VulnerableCode is a trademark of nexB Inc. -# SPDX-License-Identifier: Apache-2.0 -# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/aboutcode-org/vulnerablecode for support or download. -# See https://aboutcode.org for more information about nexB OSS projects. -# - -from unittest.mock import patch - -import pytest - -from vulnerabilities.models import AdvisoryV2 -from vulnerabilities.pipelines.v2_improvers.compute_advisory_content_hash import ( - ComputeAdvisoryContentHash, -) - -pytestmark = pytest.mark.django_db - - -@pytest.fixture -def advisory_factory(): - def _create(count, with_hash=False, start=0): - objs = [] - for i in range(start, start + count): - objs.append( - AdvisoryV2( - summary=f"summary {i}", - advisory_content_hash="existing_hash" if with_hash else None, - unique_content_id=f"unique_id_{i}", - advisory_id=f"ADV-{i}", - datasource_id="ds", - avid=f"ds/ADV-{i}", - url=f"https://example.com/ADV-{i}", - ) - ) - return AdvisoryV2.objects.bulk_create(objs) - - return _create - - -def run_pipeline(): - pipeline = ComputeAdvisoryContentHash() - pipeline.compute_advisory_content_hash() - - -@patch( - "vulnerabilities.pipelines.v2_improvers.compute_advisory_content_hash.compute_advisory_content" -) -def test_pipeline_updates_only_missing_hash(mock_compute, advisory_factory): - advisory_factory(3, with_hash=False, start=0) - advisory_factory(2, with_hash=True, start=100) - - mock_compute.return_value = "new_hash" - - run_pipeline() - - updated = AdvisoryV2.objects.filter(advisory_content_hash="new_hash").count() - untouched = AdvisoryV2.objects.filter(advisory_content_hash="existing_hash").count() - - assert updated == 3 - assert untouched == 2 - assert mock_compute.call_count == 3 - - -@patch( - "vulnerabilities.pipelines.v2_improvers.compute_advisory_content_hash.compute_advisory_content" -) -def test_pipeline_bulk_update_batches(mock_compute, advisory_factory): - advisory_factory(6000, with_hash=False) - - mock_compute.return_value = "batch_hash" - - run_pipeline() - - assert AdvisoryV2.objects.filter(advisory_content_hash="batch_hash").count() == 6000 - - assert mock_compute.call_count == 6000 - - -@patch( - "vulnerabilities.pipelines.v2_improvers.compute_advisory_content_hash.compute_advisory_content" -) -def test_pipeline_no_advisories(mock_compute): - run_pipeline() - - assert mock_compute.call_count == 0 diff --git a/vulnerabilities/tests/test_advisory_merge.py b/vulnerabilities/tests/test_advisory_merge.py new file mode 100644 index 000000000..ddcc3cadb --- /dev/null +++ b/vulnerabilities/tests/test_advisory_merge.py @@ -0,0 +1,192 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import hashlib + +import pytest + +from vulnerabilities.models import AdvisoryAlias +from vulnerabilities.models import AdvisorySet +from vulnerabilities.models import AdvisorySetMember +from vulnerabilities.models import AdvisoryV2 +from vulnerabilities.models import ImpactedPackage +from vulnerabilities.models import PackageV2 +from vulnerabilities.utils import compute_advisory_content_hash +from vulnerabilities.utils import delete_and_save_advisory_set +from vulnerabilities.utils import get_advisories_from_groups +from vulnerabilities.utils import get_merged_identifier_groups +from vulnerabilities.utils import merge_advisories +from vulnerabilities.utils import merge_and_save_grouped_advisories + + +@pytest.mark.django_db +class TestAdvisoryMerge: + def create_advisory(self, advisory_id, affected_versions, fixed_versions=None, precedence=None): + unique_content_id = hashlib.sha256(advisory_id.encode()).hexdigest() + + adv = AdvisoryV2.objects.create( + datasource_id="ghsa", + advisory_id=advisory_id, + avid=f"ghsa/{advisory_id}", + unique_content_id=unique_content_id, + url="https://example.com/advisory", + date_collected="2025-07-01T00:00:00Z", + precedence=precedence, + ) + + pkg = PackageV2.objects.from_purl("pkg:pypi/sample@1.0.0") + + impact = ImpactedPackage.objects.create( + advisory=adv, + base_purl="pkg:pypi/sample", + ) + + # affected + for v in affected_versions: + p = PackageV2.objects.from_purl(f"pkg:pypi/sample@{v}") + impact.affecting_packages.add(p) + + # fixed + if fixed_versions: + for v in fixed_versions: + p = PackageV2.objects.from_purl(f"pkg:pypi/sample@{v}") + impact.fixed_by_packages.add(p) + + return adv + + def test_content_hash_same(self): + package = PackageV2.objects.from_purl("pkg:pypi/sample@1.0.0") + + adv1 = self.create_advisory("A1", ["1.0"], ["2.0"]) + adv2 = self.create_advisory("A2", ["1.0"], ["2.0"]) + + h1 = compute_advisory_content_hash(adv1, package) + h2 = compute_advisory_content_hash(adv2, package) + + assert h1 == h2 + + def test_content_hash_different(self): + package = PackageV2.objects.from_purl("pkg:pypi/sample@1.0.0") + + adv1 = self.create_advisory("A1", ["1.0"], ["2.0"]) + adv2 = self.create_advisory("A2", ["1.0"], ["3.0"]) + + assert compute_advisory_content_hash(adv1, package) != compute_advisory_content_hash( + adv2, package + ) + + def test_identifier_merging(self): + adv1 = self.create_advisory("A1", ["1.0"]) + adv2 = self.create_advisory("A2", ["1.0"]) + + alias = AdvisoryAlias.objects.create(alias="CVE-123") + + adv1.aliases.add(alias) + adv2.aliases.add(alias) + + groups = get_merged_identifier_groups([adv1, adv2]) + + assert len(groups) == 1 + identifiers, primary, secondary = groups[0] + + assert len(secondary) == 1 + assert primary in [adv1, adv2] + + def test_transitive_merge(self): + a1 = self.create_advisory("A1", ["1.0"]) + a2 = self.create_advisory("A2", ["1.0"]) + a3 = self.create_advisory("A3", ["1.0"]) + + alias_1 = AdvisoryAlias.objects.create(alias="CVE-1") + alias_2 = AdvisoryAlias.objects.create(alias="CVE-2") + + a1.aliases.add(alias_1) + a2.aliases.add(alias_1) + a2.aliases.add(alias_2) + a3.aliases.add(alias_2) + + groups = get_merged_identifier_groups([a1, a2, a3]) + + assert len(groups) == 1 + + def test_primary_selection_by_precedence(self): + a1 = self.create_advisory("A1", ["1.0"], precedence=1) + a2 = self.create_advisory("A2", ["1.0"], precedence=5) + + alias_1 = AdvisoryAlias.objects.create(alias="CVE-1") + + a1.aliases.add(alias_1) + a2.aliases.add(alias_1) + + groups = get_merged_identifier_groups([a1, a2]) + _, primary, _ = groups[0] + + assert primary == a2 + + def test_get_advisories_from_groups(self): + adv = self.create_advisory("GHSA-ABC-123", ["1.0"]) + adv.aliases.create(alias="CVE-999") + + groups = get_merged_identifier_groups([adv]) + result = get_advisories_from_groups(groups) + + assert result[0]["identifier"] == "GHSA-ABC-123" + assert len(result[0]["aliases"]) == 1 + + def test_delete_and_save_advisory_set(self): + package = PackageV2.objects.from_purl("pkg:pypi/sample@1.0.0") + + adv1 = self.create_advisory("A1", ["1.0"]) + adv2 = self.create_advisory("A2", ["1.0"]) + + adv1.aliases.create(alias="CVE-1") + + groups = [(set(adv1.aliases.all()), adv1, [adv2])] + + delete_and_save_advisory_set(groups, package, relation="affecting") + + assert AdvisorySet.objects.count() == 1 + assert AdvisorySetMember.objects.count() == 2 + + advisory_set = AdvisorySet.objects.first() + members = AdvisorySetMember.objects.filter(advisory_set=advisory_set) + + assert any(m.is_primary for m in members) + assert any(not m.is_primary for m in members) + + def test_merge_and_save_integration(self): + package = PackageV2.objects.from_purl("pkg:pypi/sample@1.0.0") + + adv1 = self.create_advisory("A1", ["1.0"], ["2.0"]) + adv2 = self.create_advisory("A2", ["1.0"], ["2.0"]) + + alias = AdvisoryAlias.objects.create(alias="CVE-1") + + adv1.aliases.add(alias) + adv2.aliases.add(alias) + + result = merge_and_save_grouped_advisories( + package, + [adv1, adv2], + relation="test", + ) + + assert len(result) == 1 + assert AdvisorySet.objects.count() == 1 + assert AdvisorySetMember.objects.count() == 2 + + def test_merge_advisories_separates_different_content(self): + package = PackageV2.objects.from_purl("pkg:pypi/sample@1.0.0") + + adv1 = self.create_advisory("A1", ["1.0"], ["2.0"]) + adv2 = self.create_advisory("A2", ["1.0"], ["3.0"]) + + groups = merge_advisories([adv1, adv2], package) + + assert len(groups) == 2 diff --git a/vulnerabilities/tests/test_api_v3.py b/vulnerabilities/tests/test_api_v3.py index 6b88e5ee5..fa8a08b33 100644 --- a/vulnerabilities/tests/test_api_v3.py +++ b/vulnerabilities/tests/test_api_v3.py @@ -53,7 +53,7 @@ def test_packages_post_without_details(self): def test_packages_post_with_details(self): url = reverse("package-v3-list") - with self.assertNumQueries(23): + with self.assertNumQueries(33): response = self.client.post( url, data={ @@ -171,25 +171,6 @@ def setUp(self): self.client = APIClient(enforce_csrf_checks=True) - def test_packages_post_purl_with_many_advisories(self): - url = reverse("package-v3-list") - - with self.assertNumQueries(12): - response = self.client.post( - url, - data={ - "purls": ["pkg:pypi/sample@1.0.0"], - "details": True, - }, - format="json", - ) - - self.assertEqual(response.status_code, status.HTTP_200_OK) - - results = response.data["results"] - self.assertEqual(len(results), 1) - self.assertIsNotNone(results[0]["affected_by_vulnerabilities_url"]) - def test_advisories_post(self): url = reverse("advisory-v3-list") diff --git a/vulnerabilities/utils.py b/vulnerabilities/utils.py index 2dd606a92..5f791d30b 100644 --- a/vulnerabilities/utils.py +++ b/vulnerabilities/utils.py @@ -41,6 +41,7 @@ from univers.version_range import VersionRange from aboutcode.hashid import build_vcid +from vulnerabilities.pipes.group_advisories import delete_and_save_advisory_set logger = logging.getLogger(__name__) @@ -845,59 +846,10 @@ def compute_patch_checksum(patch_text: str): return hashlib.sha512(patch_text.encode("utf-8")).hexdigest() -def group_advisories_by_content(advisories): - grouped = {} - - for advisory in advisories: - content_hash = ( - advisory.advisory_content_hash - if advisory.advisory_content_hash - else compute_advisory_content(advisory) - ) - - entry = grouped.setdefault( - content_hash, - {"primary": advisory, "secondary": set()}, - ) - - primary = entry["primary"] - - if advisory is primary: - continue - - if advisory.precedence > primary.precedence: - entry["primary"] = advisory - entry["secondary"].add(primary) - else: - entry["secondary"].add(advisory) - - return grouped - - -def compute_advisory_content(advisory_data): +def merge_advisories(advisories, package): """ - Compute a unique content hash for an advisory by normalizing its data and hashing it. - - :param advisory_data: An AdvisoryData object - :return: SHA-256 hash digest as content hash + Merge advisories based on their content hash and identifiers. """ - from vulnerabilities.models import AdvisoryV2 - - if isinstance(advisory_data, AdvisoryV2): - advisory_data = advisory_data.to_advisory_data() - normalized_data = { - "affected_packages": [ - pkg.to_dict() for pkg in normalize_list(advisory_data.affected_packages) if pkg - ], - } - - normalized_json = json.dumps(normalized_data, separators=(",", ":"), sort_keys=True) - content_hash = hashlib.sha256(normalized_json.encode("utf-8")).hexdigest() - - return content_hash - - -def merge_advisories(advisories, package): advisories = list(advisories) @@ -917,6 +869,8 @@ def merge_advisories(advisories, package): def compute_advisory_content_hash(adv, package): + """Compute a content hash for an advisory based on its affected and fixed packages for a given package. + This is used to determine if two advisories are the same based on their content.""" affected = [] fixed = [] @@ -943,6 +897,10 @@ def compute_advisory_content_hash(adv, package): def get_merged_identifier_groups(advisories): + """ + Merge advisories based on their identifiers (advisory_id and aliases). + Example: If two advisories share ``advisory_id`` or share an alias, they will be merged together. + """ identifier_groups = defaultdict(set) @@ -985,7 +943,7 @@ def get_merged_identifier_groups(advisories): for group in merged: identifiers = set() for adv in group: - for alias in adv.aliases.all(): + for alias in adv.aliases.all().order_by("alias"): identifiers.add(alias) primary = max(group, key=lambda a: a.precedence if a.precedence is not None else -1) @@ -995,3 +953,45 @@ def get_merged_identifier_groups(advisories): final_groups.append((identifiers, primary, secondary)) return final_groups + + +def get_advisories_from_groups(groups): + """ + Return a list of advisories from the merged groups of advisories. + """ + advisories = [] + for aliases, primary, _ in groups: + identifier = primary.advisory_id.split("/")[-1] + + filtered_aliases = [alias for alias in aliases if alias.alias != identifier] + + advisories.append( + {"aliases": filtered_aliases, "advisory": primary, "identifier": identifier} + ) + + return advisories + + +def merge_and_save_grouped_advisories(package, advisories, relation): + """ + Merge advisories based on their content and identifiers and save the merged advisories to the database. + """ + groups = merge_advisories(advisories, package) + delete_and_save_advisory_set(groups, package, relation) + advisories = get_advisories_from_groups(groups) + + return advisories + + +TYPES_WITH_MULTIPLE_IMPORTERS = [ + "pypi", + "maven", + "nuget", + "golang", + "npm", + "composer", + "hex", + "cargo", + "gem", + "conan", +] diff --git a/vulnerabilities/views.py b/vulnerabilities/views.py index c8bfc6634..8051dfb35 100644 --- a/vulnerabilities/views.py +++ b/vulnerabilities/views.py @@ -39,14 +39,15 @@ from vulnerabilities.forms import VulnerabilitySearchForm from vulnerabilities.models import AdvisorySetMember from vulnerabilities.models import AdvisoryV2 -from vulnerabilities.models import ImpactedPackage from vulnerabilities.models import PipelineRun from vulnerabilities.models import PipelineSchedule from vulnerabilities.pipelines.v2_importers.epss_importer_v2 import EPSSImporterPipeline +from vulnerabilities.pipes.group_advisories import delete_and_save_advisory_set from vulnerabilities.severity_systems import EPSS from vulnerabilities.severity_systems import SCORING_SYSTEMS -from vulnerabilities.utils import group_advisories_by_content -from vulnerabilities.utils import merge_advisories +from vulnerabilities.utils import TYPES_WITH_MULTIPLE_IMPORTERS +from vulnerabilities.utils import get_advisories_from_groups +from vulnerabilities.utils import merge_and_save_grouped_advisories from vulnerablecode import __version__ as VULNERABLECODE_VERSION from vulnerablecode.settings import env @@ -160,12 +161,7 @@ def get_queryset(self, query=None): on exact purl, partial purl or just name and namespace. """ query = query or self.request.GET.get("search") or "" - return ( - self.model.objects.search(query) - .prefetch_related() - .order_by("package_url") - .with_is_vulnerable() - ) + return self.model.objects.search(query).prefetch_related().with_is_vulnerable() class AffectedByAdvisoriesListView(ListView): @@ -220,57 +216,97 @@ def get_context_data(self, **kwargs): context["latest_non_vulnerable"] = latest_non_vulnerable context["package_search_form"] = PackageSearchForm(self.request.GET) + is_grouped = models.AdvisorySet.objects.filter(package=package).exists() + + if is_grouped: + context["grouped"] = True + fixed_pkg_details = get_fixed_package_details(package) + context["fixed_package_details"] = fixed_pkg_details + + affected_by_advisories_qs = models.AdvisorySet.objects.filter( + package=package, relation_type="affecting" + ).select_related("primary_advisory") + + fixing_advisories_qs = models.AdvisorySet.objects.filter( + package=package, relation_type="fixing" + ).select_related("primary_advisory") + + affected_groups = [ + (list(adv.aliases.all()), adv.primary_advisory, "") + for adv in affected_by_advisories_qs + ] + fixing_groups = [ + (list(adv.aliases.all()), adv.primary_advisory, "") for adv in fixing_advisories_qs + ] + + affected_advisories = get_advisories_from_groups(affected_groups) + fixing_advisories = get_advisories_from_groups(fixing_groups) + + context["affected_by_advisories_v2"] = affected_advisories + context["fixing_advisories_v2"] = fixing_advisories + + return context + affecting_advisories = AdvisoryV2.objects.latest_affecting_advisories_for_purl( purl=package.purl - ).prefetch_related( - "aliases", - "impacted_packages__affecting_packages", - "impacted_packages__fixed_by_packages", ) fixed_by_advisories = AdvisoryV2.objects.latest_fixed_by_advisories_for_purl( purl=package.purl - ).prefetch_related( - "aliases", - "impacted_packages__affecting_packages", - "impacted_packages__fixed_by_packages", ) + if package.type in TYPES_WITH_MULTIPLE_IMPORTERS: + fixed_pkg_details = get_fixed_package_details(package) + context["fixed_package_details"] = fixed_pkg_details + context["grouped"] = True + + affecting_advisories = affecting_advisories.prefetch_related( + "aliases", + "impacted_packages__affecting_packages", + "impacted_packages__fixed_by_packages", + ) + + affected_by_advisories = merge_and_save_grouped_advisories( + package, affecting_advisories, "affecting" + ) + + fixed_by_advisories = fixed_by_advisories.prefetch_related( + "aliases", + "impacted_packages__affecting_packages", + "impacted_packages__fixed_by_packages", + ) + + fixing_advisories = merge_and_save_grouped_advisories( + package, fixed_by_advisories, "fixing" + ) + + context["affected_by_advisories_v2"] = affected_by_advisories + context["fixing_advisories_v2"] = fixing_advisories + return context + + context["grouped"] = False + affected_by_advisories_url = None fixing_advisories_url = None affected_by_advisories_qs_ids = affecting_advisories.only("id") fixing_advisories_qs_ids = fixed_by_advisories.only("id") - affected_by_advisories = list(affected_by_advisories_qs_ids[:1001]) - if len(affected_by_advisories) > 1001: + affected_by_advisories = list(affected_by_advisories_qs_ids[:101]) + if len(affected_by_advisories) > 101: affected_by_advisories_url = reverse_lazy( "affected_by_advisories_v2", kwargs={"purl": package.package_url} ) context["affected_by_advisories_v2_url"] = affected_by_advisories_url - context["affected_by_advisories_v2"] = [] - context["fixed_package_details"] = {} else: - advisories = [] - fixed_pkg_details = get_fixed_package_details(package) - groups = merge_advisories(affecting_advisories, package) - for aliases, primary, _ in groups: - identifier = primary.advisory_id.split("/")[-1] - - filtered_aliases = [alias for alias in aliases if alias.alias != identifier] - - advisories.append( - {"aliases": filtered_aliases, "advisory": primary, "identifier": identifier} - ) - - context["affected_by_advisories_v2"] = advisories context["fixed_package_details"] = fixed_pkg_details + context["affected_by_advisories_v2"] = affecting_advisories context["affected_by_advisories_v2_url"] = None - fixing_advisories = list(fixing_advisories_qs_ids[:1001]) - if len(fixing_advisories) > 1001: + fixing_advisories = list(fixing_advisories_qs_ids[:101]) + if len(fixing_advisories) > 101: fixing_advisories_url = reverse_lazy( "fixing_advisories_v2", kwargs={"purl": package.package_url} ) @@ -278,21 +314,7 @@ def get_context_data(self, **kwargs): context["fixing_advisories_v2"] = [] else: - advisories = [] - - fixed_pkg_details = get_fixed_package_details(package) - groups = merge_advisories(fixing_advisories, package) - for aliases, primary, _ in groups: - identifier = primary.advisory_id.split("/")[-1] - - filtered_aliases = [alias for alias in aliases if alias.alias != identifier] - - advisories.append( - {"aliases": filtered_aliases, "advisory": primary, "identifier": identifier} - ) - - context["fixing_advisories_v2"] = advisories - context["fixing_advisories_v2_url"] = None + context["fixing_advisories_v2"] = fixed_by_advisories return context @@ -430,7 +452,7 @@ def get_fixed_package_details(package): pkg_map = { p.id: p - for p in models.PackageV2.objects.filter(id__in=pkg_ids).annotate( + for p in models.PackageV2.objects.filter(id__in=pkg_ids, is_ghost=False).annotate( is_vulnerable=Exists( models.ImpactedPackage.objects.filter(affecting_packages=OuterRef("pk")) ) From 8dac89edfe352eb6ff146a2949bc665c9a700375 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Mon, 30 Mar 2026 17:36:32 +0530 Subject: [PATCH 337/390] Handle None in UI Signed-off-by: Tushar Goel --- .../templates/advisory_detail.html | 21 ++++++++++++++++--- .../templates/package_details_v2.html | 4 ++++ vulnerabilities/templates/packages_v2.html | 8 ++++++- 3 files changed, 29 insertions(+), 4 deletions(-) diff --git a/vulnerabilities/templates/advisory_detail.html b/vulnerabilities/templates/advisory_detail.html index 595412df4..5e0e61584 100644 --- a/vulnerabilities/templates/advisory_detail.html +++ b/vulnerabilities/templates/advisory_detail.html @@ -214,9 +214,20 @@ {% for severity in severities %} {{ severity.scoring_system }} - {{ severity.value }} - + + {% if severity.value is not None %} + {{ severity.value }} + {% else %} + {{ "" }} + {% endif %} + + + {% if severity.url is not None %} + {{ severity.url }} + {% else %} + {{ "" }} + {% endif %} {% empty %} @@ -483,7 +494,11 @@
{% for severity_vector in severity_vectors %} {% if severity_vector.vector.version == '2.0' %} - Vector: {{ severity_vector.vector.vectorString }} Found at {{ severity_vector.origin }} + Vector: {{ severity_vector.vector.vectorString }} + {% if severity_vector.origin %} + Found at + {{ severity_vector.origin }} + {% endif %} diff --git a/vulnerabilities/templates/package_details_v2.html b/vulnerabilities/templates/package_details_v2.html index 8c3f62756..a6c07c352 100644 --- a/vulnerabilities/templates/package_details_v2.html +++ b/vulnerabilities/templates/package_details_v2.html @@ -118,7 +118,11 @@ Risk score diff --git a/vulnerabilities/templates/packages_v2.html b/vulnerabilities/templates/packages_v2.html index 4348575da..f114a7159 100644 --- a/vulnerabilities/templates/packages_v2.html +++ b/vulnerabilities/templates/packages_v2.html @@ -62,7 +62,13 @@ target="_self">{{ package.purl }} - + {% empty %} From b20dc39b71f64d49288ae45ebbe4cbfd7ef79250 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Mon, 30 Mar 2026 18:41:55 +0530 Subject: [PATCH 338/390] Handle large number of advisories case Signed-off-by: Tushar Goel --- vulnerabilities/api_v3.py | 103 ++++++++++++++++++++------------------ vulnerabilities/views.py | 69 ++++++++++++------------- 2 files changed, 89 insertions(+), 83 deletions(-) diff --git a/vulnerabilities/api_v3.py b/vulnerabilities/api_v3.py index ea82dcce3..fb9847a1b 100644 --- a/vulnerabilities/api_v3.py +++ b/vulnerabilities/api_v3.py @@ -215,6 +215,45 @@ def get_affected_by_vulnerabilities(self, package): advisories = [] + if package.type not in TYPES_WITH_MULTIPLE_IMPORTERS: + advisories_ids = advisories_qs.only("id") + + advisories_ids = list(advisories_ids[:101]) + if len(advisories_ids) > 100: + return None + + advisory_by_avid = {adv.avid: adv for adv in advisories_qs} + avids = advisory_by_avid.keys() + + impacts = ( + package.affected_in_impacts.filter(advisory__avid__in=avids) + .select_related("advisory") + .prefetch_related("fixed_by_packages") + ) + + impact_by_avid = {impact.advisory.avid: impact for impact in impacts} + + result = [] + + for advisory in advisories_qs: + impact = impact_by_avid.get(advisory.avid) + if not impact: + continue + + result.append( + { + "advisory_id": advisory.advisory_id.split("/")[-1], + "aliases": [alias.alias for alias in advisory.aliases.all()], + "summary": advisory.summary, + "fixed_by_packages": [pkg.purl for pkg in impact.fixed_by_packages.all()], + "severity": advisory.weighted_severity, + "exploitability": advisory.exploitability, + "risk_score": advisory.risk_score, + } + ) + + return result + is_grouped = AdvisorySet.objects.filter(package=package, relation_type="affecting").exists() if is_grouped: @@ -239,43 +278,25 @@ def get_affected_by_vulnerabilities(self, package): advisories = merge_and_save_grouped_advisories(package, advisories_qs, "affecting") return self.return_advisories_data(package, advisories_qs, advisories) - advisories_ids = advisories_qs.only("id") - - advisories_ids = list(advisories_ids[:101]) - if len(advisories_ids) > 100: - return None - - advisory_by_avid = {adv.avid: adv for adv in advisories_qs} - avids = advisory_by_avid.keys() - - impacts = ( - package.affected_in_impacts.filter(advisory__avid__in=avids) - .select_related("advisory") - .prefetch_related("fixed_by_packages") - ) - - impact_by_avid = {impact.advisory.avid: impact for impact in impacts} - - result = [] + def get_fixing_vulnerabilities(self, package): + advisories_qs = AdvisoryV2.objects.latest_fixed_by_advisories_for_purl(package.package_url) - for advisory in advisories_qs: - impact = impact_by_avid.get(advisory.avid) - if not impact: - continue + if not package.type in TYPES_WITH_MULTIPLE_IMPORTERS: + advisories_ids = advisories_qs.only("id") - result.append( - { - "advisory_id": advisory.advisory_id.split("/")[-1], - "aliases": [alias.alias for alias in advisory.aliases.all()], - "summary": advisory.summary, - "fixed_by_packages": [pkg.purl for pkg in impact.fixed_by_packages.all()], - } - ) + advisories_ids = list(advisories_ids[:101]) + if len(advisories_ids) > 100: + return None - return result + results = [] - def get_fixing_vulnerabilities(self, package): - advisories_qs = AdvisoryV2.objects.latest_fixed_by_advisories_for_purl(package.package_url) + for advisory in advisories_qs: + results.append( + { + "advisory_id": advisory.advisory_id.split("/")[-1], + } + ) + return results advisories = [] @@ -302,22 +323,6 @@ def get_fixing_vulnerabilities(self, package): advisories = merge_and_save_grouped_advisories(package, advisories_qs, "fixing") return self.return_fixing_advisories_data(advisories) - advisories_ids = advisories_qs.only("id") - - advisories_ids = list(advisories_ids[:101]) - if len(advisories_ids) > 100: - return None - - results = [] - - for advisory in advisories_qs: - results.append( - { - "advisory_id": advisory.advisory_id.split("/")[-1], - } - ) - return results - def return_fixing_advisories_data(self, advisories): result = [] for advisory in advisories: diff --git a/vulnerabilities/views.py b/vulnerabilities/views.py index 8051dfb35..829ff22a7 100644 --- a/vulnerabilities/views.py +++ b/vulnerabilities/views.py @@ -216,6 +216,41 @@ def get_context_data(self, **kwargs): context["latest_non_vulnerable"] = latest_non_vulnerable context["package_search_form"] = PackageSearchForm(self.request.GET) + if not package.type in TYPES_WITH_MULTIPLE_IMPORTERS: + context["grouped"] = False + + affected_by_advisories_url = None + fixing_advisories_url = None + + affected_by_advisories_qs_ids = affecting_advisories.only("id") + fixing_advisories_qs_ids = fixed_by_advisories.only("id") + + affected_by_advisories = list(affected_by_advisories_qs_ids[:101]) + if len(affected_by_advisories) > 101: + affected_by_advisories_url = reverse_lazy( + "affected_by_advisories_v2", kwargs={"purl": package.package_url} + ) + context["affected_by_advisories_v2_url"] = affected_by_advisories_url + + else: + fixed_pkg_details = get_fixed_package_details(package) + context["fixed_package_details"] = fixed_pkg_details + context["affected_by_advisories_v2"] = affecting_advisories + context["affected_by_advisories_v2_url"] = None + + fixing_advisories = list(fixing_advisories_qs_ids[:101]) + if len(fixing_advisories) > 101: + fixing_advisories_url = reverse_lazy( + "fixing_advisories_v2", kwargs={"purl": package.package_url} + ) + context["fixing_advisories_v2_url"] = fixing_advisories_url + context["fixing_advisories_v2"] = [] + + else: + context["fixing_advisories_v2"] = fixed_by_advisories + + return context + is_grouped = models.AdvisorySet.objects.filter(package=package).exists() if is_grouped: @@ -284,40 +319,6 @@ def get_context_data(self, **kwargs): context["fixing_advisories_v2"] = fixing_advisories return context - context["grouped"] = False - - affected_by_advisories_url = None - fixing_advisories_url = None - - affected_by_advisories_qs_ids = affecting_advisories.only("id") - fixing_advisories_qs_ids = fixed_by_advisories.only("id") - - affected_by_advisories = list(affected_by_advisories_qs_ids[:101]) - if len(affected_by_advisories) > 101: - affected_by_advisories_url = reverse_lazy( - "affected_by_advisories_v2", kwargs={"purl": package.package_url} - ) - context["affected_by_advisories_v2_url"] = affected_by_advisories_url - - else: - fixed_pkg_details = get_fixed_package_details(package) - context["fixed_package_details"] = fixed_pkg_details - context["affected_by_advisories_v2"] = affecting_advisories - context["affected_by_advisories_v2_url"] = None - - fixing_advisories = list(fixing_advisories_qs_ids[:101]) - if len(fixing_advisories) > 101: - fixing_advisories_url = reverse_lazy( - "fixing_advisories_v2", kwargs={"purl": package.package_url} - ) - context["fixing_advisories_v2_url"] = fixing_advisories_url - context["fixing_advisories_v2"] = [] - - else: - context["fixing_advisories_v2"] = fixed_by_advisories - - return context - def get_object(self, queryset=None): if queryset is None: queryset = self.get_queryset() From 4f97321140c3b5d16d5ffe2a32d66b57fae7d9d4 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Mon, 30 Mar 2026 19:06:49 +0530 Subject: [PATCH 339/390] Fix views Signed-off-by: Tushar Goel --- .../templates/advisory_detail.html | 18 +++++++++++++--- vulnerabilities/views.py | 21 ++++++++++++------- 2 files changed, 29 insertions(+), 10 deletions(-) diff --git a/vulnerabilities/templates/advisory_detail.html b/vulnerabilities/templates/advisory_detail.html index 5e0e61584..90f1d6d8b 100644 --- a/vulnerabilities/templates/advisory_detail.html +++ b/vulnerabilities/templates/advisory_detail.html @@ -137,7 +137,11 @@ applications, or networks. This metric is determined automatically based on the discovery of known exploits."> Exploitability @@ -146,7 +150,11 @@ data-tooltip="Weighted severity is the highest value calculated by multiplying each severity by its corresponding weight, divided by 10." >Weighted Severity @@ -157,7 +165,11 @@ " >Risk diff --git a/vulnerabilities/views.py b/vulnerabilities/views.py index 829ff22a7..87e0c71d6 100644 --- a/vulnerabilities/views.py +++ b/vulnerabilities/views.py @@ -217,6 +217,14 @@ def get_context_data(self, **kwargs): context["package_search_form"] = PackageSearchForm(self.request.GET) if not package.type in TYPES_WITH_MULTIPLE_IMPORTERS: + affecting_advisories = AdvisoryV2.objects.latest_affecting_advisories_for_purl( + purl=package.purl + ) + + fixed_by_advisories = AdvisoryV2.objects.latest_fixed_by_advisories_for_purl( + purl=package.purl + ) + context["grouped"] = False affected_by_advisories_url = None @@ -282,15 +290,14 @@ def get_context_data(self, **kwargs): return context - affecting_advisories = AdvisoryV2.objects.latest_affecting_advisories_for_purl( - purl=package.purl - ) - - fixed_by_advisories = AdvisoryV2.objects.latest_fixed_by_advisories_for_purl( + if package.type in TYPES_WITH_MULTIPLE_IMPORTERS: + affecting_advisories = AdvisoryV2.objects.latest_affecting_advisories_for_purl( purl=package.purl - ) + ) - if package.type in TYPES_WITH_MULTIPLE_IMPORTERS: + fixed_by_advisories = AdvisoryV2.objects.latest_fixed_by_advisories_for_purl( + purl=package.purl + ) fixed_pkg_details = get_fixed_package_details(package) context["fixed_package_details"] = fixed_pkg_details context["grouped"] = True From 4f2d1495c1fa40283d4e180febbed52430500cd9 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Mon, 30 Mar 2026 19:50:05 +0530 Subject: [PATCH 340/390] Fix views Signed-off-by: Tushar Goel --- vulnerabilities/views.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vulnerabilities/views.py b/vulnerabilities/views.py index 87e0c71d6..63d02c5b1 100644 --- a/vulnerabilities/views.py +++ b/vulnerabilities/views.py @@ -234,7 +234,7 @@ def get_context_data(self, **kwargs): fixing_advisories_qs_ids = fixed_by_advisories.only("id") affected_by_advisories = list(affected_by_advisories_qs_ids[:101]) - if len(affected_by_advisories) > 101: + if len(affected_by_advisories) > 100: affected_by_advisories_url = reverse_lazy( "affected_by_advisories_v2", kwargs={"purl": package.package_url} ) @@ -247,7 +247,7 @@ def get_context_data(self, **kwargs): context["affected_by_advisories_v2_url"] = None fixing_advisories = list(fixing_advisories_qs_ids[:101]) - if len(fixing_advisories) > 101: + if len(fixing_advisories) > 100: fixing_advisories_url = reverse_lazy( "fixing_advisories_v2", kwargs={"purl": package.package_url} ) From 610c205482060462014170855864108832826dc8 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Tue, 31 Mar 2026 00:26:56 +0530 Subject: [PATCH 341/390] Add risk, severity and exploits Signed-off-by: Tushar Goel --- vulnerabilities/api_v3.py | 3 ++ .../templates/package_details_v2.html | 15 ++++++++++ vulnerabilities/utils.py | 29 ++++++++++++++++--- vulnerabilities/views.py | 4 +-- 4 files changed, 45 insertions(+), 6 deletions(-) diff --git a/vulnerabilities/api_v3.py b/vulnerabilities/api_v3.py index fb9847a1b..986096165 100644 --- a/vulnerabilities/api_v3.py +++ b/vulnerabilities/api_v3.py @@ -356,6 +356,9 @@ def return_advisories_data(self, package, advisories_qs, advisories): { "advisory_id": advisory["identifier"], "aliases": [alias.alias for alias in advisory["aliases"]], + "weighted_severity": advisory["weighted_severity"], + "exploitability": advisory["exploitability"], + "risk_score": advisory["risk_score"], "summary": advisory["advisory"].summary, "fixed_by_packages": [pkg.purl for pkg in impact.fixed_by_packages.all()], } diff --git a/vulnerabilities/templates/package_details_v2.html b/vulnerabilities/templates/package_details_v2.html index a6c07c352..8511348ec 100644 --- a/vulnerabilities/templates/package_details_v2.html +++ b/vulnerabilities/templates/package_details_v2.html @@ -142,6 +142,7 @@ + @@ -197,6 +198,13 @@ {% endif %} {% endwith %} + {% empty %} @@ -258,6 +266,13 @@ {% endif %} {% endwith %} + {% empty %} diff --git a/vulnerabilities/utils.py b/vulnerabilities/utils.py index 5f791d30b..ecf2f6878 100644 --- a/vulnerabilities/utils.py +++ b/vulnerabilities/utils.py @@ -960,13 +960,34 @@ def get_advisories_from_groups(groups): Return a list of advisories from the merged groups of advisories. """ advisories = [] - for aliases, primary, _ in groups: + weighted_severity = None + exploitability = None + risk_score = None + for aliases, primary, secondaries in groups: + severity_scores = [] + exploitability_scores = [] identifier = primary.advisory_id.split("/")[-1] - filtered_aliases = [alias for alias in aliases if alias.alias != identifier] - + severity_scores.extend([adv.weighted_severity for adv in secondaries]) + exploitability_scores.extend([adv.exploitability for adv in secondaries]) + severity_scores.append(primary.weighted_severity) + exploitability_scores.append(primary.exploitability) + if severity_scores: + weighted_severity = round(max(severity_scores), 1) + if exploitability_scores: + exploitability = max(exploitability_scores) + if exploitability and weighted_severity: + risk_score = min(float(exploitability * weighted_severity), 10.0) + risk_score = round(risk_score, 1) advisories.append( - {"aliases": filtered_aliases, "advisory": primary, "identifier": identifier} + { + "aliases": filtered_aliases, + "advisory": primary, + "identifier": identifier, + "weighted_severity": weighted_severity, + "exploitability": exploitability, + "risk_score": risk_score, + } ) return advisories diff --git a/vulnerabilities/views.py b/vulnerabilities/views.py index 63d02c5b1..11852aa59 100644 --- a/vulnerabilities/views.py +++ b/vulnerabilities/views.py @@ -218,7 +218,7 @@ def get_context_data(self, **kwargs): if not package.type in TYPES_WITH_MULTIPLE_IMPORTERS: affecting_advisories = AdvisoryV2.objects.latest_affecting_advisories_for_purl( - purl=package.purl + purl=package.purl ) fixed_by_advisories = AdvisoryV2.objects.latest_fixed_by_advisories_for_purl( @@ -292,7 +292,7 @@ def get_context_data(self, **kwargs): if package.type in TYPES_WITH_MULTIPLE_IMPORTERS: affecting_advisories = AdvisoryV2.objects.latest_affecting_advisories_for_purl( - purl=package.purl + purl=package.purl ) fixed_by_advisories = AdvisoryV2.objects.latest_fixed_by_advisories_for_purl( From af98f071e6408a6c37a62dade593364db740bb67 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Tue, 31 Mar 2026 00:30:31 +0530 Subject: [PATCH 342/390] Dedupe fixed_by_packages Signed-off-by: Tushar Goel --- vulnerabilities/api_v3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vulnerabilities/api_v3.py b/vulnerabilities/api_v3.py index 986096165..2803ac9b8 100644 --- a/vulnerabilities/api_v3.py +++ b/vulnerabilities/api_v3.py @@ -360,7 +360,7 @@ def return_advisories_data(self, package, advisories_qs, advisories): "exploitability": advisory["exploitability"], "risk_score": advisory["risk_score"], "summary": advisory["advisory"].summary, - "fixed_by_packages": [pkg.purl for pkg in impact.fixed_by_packages.all()], + "fixed_by_packages": list(set([pkg.purl for pkg in impact.fixed_by_packages.all()])), } ) From 97da322e5edeb3af477d65909d7e925a7a4b9e70 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Tue, 31 Mar 2026 00:53:30 +0530 Subject: [PATCH 343/390] Fix severity and exploit calculation Signed-off-by: Tushar Goel --- vulnerabilities/api_v3.py | 50 ++++++++++++++++++++++++++++++++------- vulnerabilities/views.py | 46 ++++++++++++++++++++++++++++------- 2 files changed, 78 insertions(+), 18 deletions(-) diff --git a/vulnerabilities/api_v3.py b/vulnerabilities/api_v3.py index 2803ac9b8..cf8f1c3ec 100644 --- a/vulnerabilities/api_v3.py +++ b/vulnerabilities/api_v3.py @@ -21,6 +21,7 @@ from vulnerabilities.models import AdvisoryReference from vulnerabilities.models import AdvisorySet +from vulnerabilities.models import AdvisorySetMember from vulnerabilities.models import AdvisorySeverity from vulnerabilities.models import AdvisoryV2 from vulnerabilities.models import AdvisoryWeakness @@ -257,12 +258,26 @@ def get_affected_by_vulnerabilities(self, package): is_grouped = AdvisorySet.objects.filter(package=package, relation_type="affecting").exists() if is_grouped: - affected_by_advisories_qs = AdvisorySet.objects.filter( - package=package, relation_type="affecting" - ).select_related("primary_advisory") + affected_by_advisories_qs = ( + AdvisorySet.objects.filter(package=package, relation_type="affecting") + .select_related("primary_advisory") + .prefetch_related( + Prefetch( + "members", + queryset=AdvisorySetMember.objects.filter(is_primary=False).select_related( + "advisory" + ), + to_attr="secondary_members", + ) + ) + ) affected_groups = [ - (list(adv.aliases.all()), adv.primary_advisory, "") + ( + list(adv.aliases.all()), + adv.primary_advisory, + [member.advisory for member in adv.secondary_members], + ) for adv in affected_by_advisories_qs ] @@ -303,12 +318,27 @@ def get_fixing_vulnerabilities(self, package): is_grouped = AdvisorySet.objects.filter(package=package, relation_type="fixing").exists() if is_grouped: - fixing_advisories_qs = AdvisorySet.objects.filter( - package=package, relation_type="fixing" - ).select_related("primary_advisory") + fixing_advisories_qs = ( + AdvisorySet.objects.filter(package=package, relation_type="fixing") + .select_related("primary_advisory") + .prefetch_related( + Prefetch( + "members", + queryset=AdvisorySetMember.objects.filter(is_primary=False).select_related( + "advisory" + ), + to_attr="secondary_members", + ) + ) + ) fixing_groups = [ - (list(adv.aliases.all()), adv.primary_advisory, "") for adv in fixing_advisories_qs + ( + list(adv.aliases.all()), + adv.primary_advisory, + [member.advisory for member in adv.secondary_members], + ) + for adv in fixing_advisories_qs ] advisories = get_advisories_from_groups(fixing_groups) @@ -360,7 +390,9 @@ def return_advisories_data(self, package, advisories_qs, advisories): "exploitability": advisory["exploitability"], "risk_score": advisory["risk_score"], "summary": advisory["advisory"].summary, - "fixed_by_packages": list(set([pkg.purl for pkg in impact.fixed_by_packages.all()])), + "fixed_by_packages": list( + set([pkg.purl for pkg in impact.fixed_by_packages.all()]) + ), } ) diff --git a/vulnerabilities/views.py b/vulnerabilities/views.py index 11852aa59..c88c437b5 100644 --- a/vulnerabilities/views.py +++ b/vulnerabilities/views.py @@ -42,7 +42,6 @@ from vulnerabilities.models import PipelineRun from vulnerabilities.models import PipelineSchedule from vulnerabilities.pipelines.v2_importers.epss_importer_v2 import EPSSImporterPipeline -from vulnerabilities.pipes.group_advisories import delete_and_save_advisory_set from vulnerabilities.severity_systems import EPSS from vulnerabilities.severity_systems import SCORING_SYSTEMS from vulnerabilities.utils import TYPES_WITH_MULTIPLE_IMPORTERS @@ -266,20 +265,49 @@ def get_context_data(self, **kwargs): fixed_pkg_details = get_fixed_package_details(package) context["fixed_package_details"] = fixed_pkg_details - affected_by_advisories_qs = models.AdvisorySet.objects.filter( - package=package, relation_type="affecting" - ).select_related("primary_advisory") + affected_by_advisories_qs = ( + models.AdvisorySet.objects.filter(package=package, relation_type="affecting") + .select_related("primary_advisory") + .prefetch_related( + Prefetch( + "members", + queryset=AdvisorySetMember.objects.filter(is_primary=False).select_related( + "advisory" + ), + to_attr="secondary_members", + ) + ) + ) - fixing_advisories_qs = models.AdvisorySet.objects.filter( - package=package, relation_type="fixing" - ).select_related("primary_advisory") + fixing_advisories_qs = ( + models.AdvisorySet.objects.filter(package=package, relation_type="fixing") + .select_related("primary_advisory") + .prefetch_related( + Prefetch( + "members", + queryset=AdvisorySetMember.objects.filter(is_primary=False).select_related( + "advisory" + ), + to_attr="secondary_members", + ) + ) + ) affected_groups = [ - (list(adv.aliases.all()), adv.primary_advisory, "") + ( + list(adv.aliases.all()), + adv.primary_advisory, + [a.advisory for a in adv.secondary_members], + ) for adv in affected_by_advisories_qs ] fixing_groups = [ - (list(adv.aliases.all()), adv.primary_advisory, "") for adv in fixing_advisories_qs + ( + list(adv.aliases.all()), + adv.primary_advisory, + [a.advisory for a in adv.secondary_members], + ) + for adv in fixing_advisories_qs ] affected_advisories = get_advisories_from_groups(affected_groups) From 07433dc7d65552ff9d6cb50c10a2f68c7e5e77fb Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Tue, 31 Mar 2026 16:13:12 +0530 Subject: [PATCH 344/390] Fix grouping Signed-off-by: Tushar Goel --- vulnerabilities/api_v3.py | 49 +++++++++------ vulnerabilities/models.py | 26 ++++++++ .../group_advisories_for_packages.py | 7 ++- vulnerabilities/pipes/group_advisories.py | 12 ++-- vulnerabilities/tests/test_advisory_merge.py | 7 ++- vulnerabilities/utils.py | 61 ++++++++++++------- vulnerabilities/views.py | 27 +++++--- 7 files changed, 127 insertions(+), 62 deletions(-) diff --git a/vulnerabilities/api_v3.py b/vulnerabilities/api_v3.py index cf8f1c3ec..ea1586394 100644 --- a/vulnerabilities/api_v3.py +++ b/vulnerabilities/api_v3.py @@ -7,6 +7,7 @@ # See https://aboutcode.org for more information about nexB OSS projects. # +from typing import List from urllib.parse import urlencode from django.db.models import Exists @@ -25,6 +26,8 @@ from vulnerabilities.models import AdvisorySeverity from vulnerabilities.models import AdvisoryV2 from vulnerabilities.models import AdvisoryWeakness +from vulnerabilities.models import Group +from vulnerabilities.models import GroupedAdvisory from vulnerabilities.models import ImpactedPackageAffecting from vulnerabilities.models import PackageV2 from vulnerabilities.throttling import PermissionBasedUserRateThrottle @@ -273,15 +276,15 @@ def get_affected_by_vulnerabilities(self, package): ) affected_groups = [ - ( - list(adv.aliases.all()), - adv.primary_advisory, - [member.advisory for member in adv.secondary_members], + Group( + aliases=list(adv.aliases.all()), + primary_advisory=adv.primary_advisory, + secondaries=[member.advisory for member in adv.secondary_members], ) for adv in affected_by_advisories_qs ] - advisories = get_advisories_from_groups(affected_groups) + advisories: List[GroupedAdvisory] = get_advisories_from_groups(affected_groups) return self.return_advisories_data(package, advisories_qs, advisories) if package.type in TYPES_WITH_MULTIPLE_IMPORTERS: @@ -290,7 +293,9 @@ def get_affected_by_vulnerabilities(self, package): "impacted_packages__affecting_packages", "impacted_packages__fixed_by_packages", ) - advisories = merge_and_save_grouped_advisories(package, advisories_qs, "affecting") + advisories: List[GroupedAdvisory] = merge_and_save_grouped_advisories( + package, advisories_qs, "affecting" + ) return self.return_advisories_data(package, advisories_qs, advisories) def get_fixing_vulnerabilities(self, package): @@ -333,15 +338,15 @@ def get_fixing_vulnerabilities(self, package): ) fixing_groups = [ - ( - list(adv.aliases.all()), - adv.primary_advisory, - [member.advisory for member in adv.secondary_members], + Group( + aliases=list(adv.aliases.all()), + primary_advisory=adv.primary_advisory, + secondaries=[member.advisory for member in adv.secondary_members], ) for adv in fixing_advisories_qs ] - advisories = get_advisories_from_groups(fixing_groups) + advisories: List[GroupedAdvisory] = get_advisories_from_groups(fixing_groups) return self.return_fixing_advisories_data(advisories) if package.type in TYPES_WITH_MULTIPLE_IMPORTERS: @@ -350,15 +355,18 @@ def get_fixing_vulnerabilities(self, package): "impacted_packages__affecting_packages", "impacted_packages__fixed_by_packages", ) - advisories = merge_and_save_grouped_advisories(package, advisories_qs, "fixing") + advisories: List[GroupedAdvisory] = merge_and_save_grouped_advisories( + package, advisories_qs, "fixing" + ) return self.return_fixing_advisories_data(advisories) def return_fixing_advisories_data(self, advisories): result = [] for advisory in advisories: + assert isinstance(advisory, GroupedAdvisory) result.append( { - "advisory_id": advisory["identifier"], + "advisory_id": advisory.identifier, } ) @@ -378,18 +386,19 @@ def return_advisories_data(self, package, advisories_qs, advisories): result = [] for advisory in advisories: - impact = impact_by_avid.get(advisory["advisory"].avid) + assert isinstance(advisory, GroupedAdvisory) + impact = impact_by_avid.get(advisory.advisory.avid) if not impact: continue result.append( { - "advisory_id": advisory["identifier"], - "aliases": [alias.alias for alias in advisory["aliases"]], - "weighted_severity": advisory["weighted_severity"], - "exploitability": advisory["exploitability"], - "risk_score": advisory["risk_score"], - "summary": advisory["advisory"].summary, + "advisory_id": advisory.identifier, + "aliases": [alias.alias for alias in advisory.aliases], + "weighted_severity": advisory.weighted_severity, + "exploitability": advisory.exploitability, + "risk_score": advisory.risk_score, + "summary": advisory.advisory.summary, "fixed_by_packages": list( set([pkg.purl for pkg in impact.fixed_by_packages.all()]) ), diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index f51a92dbd..45d8acf55 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -20,6 +20,9 @@ from operator import attrgetter from traceback import format_exc as traceback_format_exc from typing import List +from typing import NamedTuple +from typing import Optional +from typing import Set from typing import Union from urllib.parse import urljoin @@ -3714,3 +3717,26 @@ def __str__(self): class Meta: unique_together = ("vector", "source_advisory") + + +class Group(NamedTuple): + """ + A Group of advisories that have been merged together based on their content and identifiers. + """ + + aliases: Set[AdvisoryAlias] + primary: AdvisoryV2 + secondaries: List[AdvisoryV2] + + +class GroupedAdvisory(NamedTuple): + """ + A GroupedAdvisory represents a single advisory that has been grouped with its aliases and related advisories. + """ + + aliases: Set[AdvisoryAlias] + advisory: AdvisoryV2 + identifier: str + weighted_severity: Optional[float] + exploitability: Optional[float] + risk_score: Optional[float] diff --git a/vulnerabilities/pipelines/v2_improvers/group_advisories_for_packages.py b/vulnerabilities/pipelines/v2_improvers/group_advisories_for_packages.py index d2c8f6296..db49447ff 100644 --- a/vulnerabilities/pipelines/v2_improvers/group_advisories_for_packages.py +++ b/vulnerabilities/pipelines/v2_improvers/group_advisories_for_packages.py @@ -7,7 +7,10 @@ # See https://aboutcode.org for more information about nexB OSS projects. # +from typing import List + from vulnerabilities.models import AdvisoryV2 +from vulnerabilities.models import Group from vulnerabilities.models import PackageV2 from vulnerabilities.pipelines import VulnerableCodePipeline from vulnerabilities.pipes.group_advisories import delete_and_save_advisory_set @@ -48,8 +51,8 @@ def group_advisoris_for_packages(logger=None): ) try: - affected_groups = merge_advisories(affecting_advisories, package) - fixed_by_groups = merge_advisories(fixed_by_advisories, package) + affected_groups: List[Group] = merge_advisories(affecting_advisories, package) + fixed_by_groups: List[Group] = merge_advisories(fixed_by_advisories, package) delete_and_save_advisory_set(affected_groups, package, relation="affecting") delete_and_save_advisory_set(fixed_by_groups, package, relation="fixing") except Exception as e: diff --git a/vulnerabilities/pipes/group_advisories.py b/vulnerabilities/pipes/group_advisories.py index d66365706..983ac3386 100644 --- a/vulnerabilities/pipes/group_advisories.py +++ b/vulnerabilities/pipes/group_advisories.py @@ -14,31 +14,33 @@ def delete_and_save_advisory_set(groups, package, relation=None): from vulnerabilities.models import AdvisorySet from vulnerabilities.models import AdvisorySetMember + from vulnerabilities.models import Group AdvisorySet.objects.filter(package=package, relation_type=relation).delete() membership_to_create = [] - for identifiers, primary, secondary in groups: + for group in groups: + assert isinstance(group, Group) advisory_set = AdvisorySet.objects.create( package=package, relation_type=relation, - primary_advisory=primary, + primary_advisory=group.primary, ) - advisory_set.aliases.add(*identifiers) + advisory_set.aliases.add(*group.aliases) advisory_set.save() membership_to_create.append( AdvisorySetMember( advisory_set=advisory_set, - advisory=primary, + advisory=group.primary, is_primary=True, ) ) - for adv in secondary: + for adv in group.secondaries: membership_to_create.append( AdvisorySetMember( advisory_set=advisory_set, diff --git a/vulnerabilities/tests/test_advisory_merge.py b/vulnerabilities/tests/test_advisory_merge.py index ddcc3cadb..08b586ff3 100644 --- a/vulnerabilities/tests/test_advisory_merge.py +++ b/vulnerabilities/tests/test_advisory_merge.py @@ -15,6 +15,7 @@ from vulnerabilities.models import AdvisorySet from vulnerabilities.models import AdvisorySetMember from vulnerabilities.models import AdvisoryV2 +from vulnerabilities.models import Group from vulnerabilities.models import ImpactedPackage from vulnerabilities.models import PackageV2 from vulnerabilities.utils import compute_advisory_content_hash @@ -136,8 +137,8 @@ def test_get_advisories_from_groups(self): groups = get_merged_identifier_groups([adv]) result = get_advisories_from_groups(groups) - assert result[0]["identifier"] == "GHSA-ABC-123" - assert len(result[0]["aliases"]) == 1 + assert result[0].identifier == "GHSA-ABC-123" + assert len(result[0].aliases) == 1 def test_delete_and_save_advisory_set(self): package = PackageV2.objects.from_purl("pkg:pypi/sample@1.0.0") @@ -147,7 +148,7 @@ def test_delete_and_save_advisory_set(self): adv1.aliases.create(alias="CVE-1") - groups = [(set(adv1.aliases.all()), adv1, [adv2])] + groups = [Group(aliases=set(adv1.aliases.all()), primary=adv1, secondaries=[adv2])] delete_and_save_advisory_set(groups, package, relation="affecting") diff --git a/vulnerabilities/utils.py b/vulnerabilities/utils.py index ecf2f6878..e8a13821e 100644 --- a/vulnerabilities/utils.py +++ b/vulnerabilities/utils.py @@ -20,7 +20,9 @@ from functools import total_ordering from http import HTTPStatus from typing import List +from typing import NamedTuple from typing import Optional +from typing import Set from typing import Tuple from typing import Union from unittest.mock import MagicMock @@ -850,6 +852,7 @@ def merge_advisories(advisories, package): """ Merge advisories based on their content hash and identifiers. """ + from vulnerabilities.models import Group advisories = list(advisories) @@ -859,7 +862,7 @@ def merge_advisories(advisories, package): content_hash = compute_advisory_content_hash(adv, package) content_hash_map[content_hash].append(adv) - final_groups = [] + final_groups: List[Group] = [] for group in content_hash_map.values(): groups = get_merged_identifier_groups(group) @@ -901,6 +904,7 @@ def get_merged_identifier_groups(advisories): Merge advisories based on their identifiers (advisory_id and aliases). Example: If two advisories share ``advisory_id`` or share an alias, they will be merged together. """ + from vulnerabilities.models import Group identifier_groups = defaultdict(set) @@ -938,7 +942,7 @@ def get_merged_identifier_groups(advisories): if adv not in all_grouped: merged.append({adv}) - final_groups = [] + final_groups: List[Group] = [] for group in merged: identifiers = set() @@ -950,7 +954,7 @@ def get_merged_identifier_groups(advisories): secondary = [a for a in group if a != primary] - final_groups.append((identifiers, primary, secondary)) + final_groups.append(Group(aliases=identifiers, primary=primary, secondaries=secondary)) return final_groups @@ -959,35 +963,48 @@ def get_advisories_from_groups(groups): """ Return a list of advisories from the merged groups of advisories. """ + from vulnerabilities.models import Group + from vulnerabilities.models import GroupedAdvisory + advisories = [] - weighted_severity = None - exploitability = None - risk_score = None - for aliases, primary, secondaries in groups: + + for group in groups: + + assert isinstance(group, Group) + weighted_severity = None + exploitability = None + risk_score = None + severity_scores = [] - exploitability_scores = [] - identifier = primary.advisory_id.split("/")[-1] - filtered_aliases = [alias for alias in aliases if alias.alias != identifier] - severity_scores.extend([adv.weighted_severity for adv in secondaries]) - exploitability_scores.extend([adv.exploitability for adv in secondaries]) - severity_scores.append(primary.weighted_severity) - exploitability_scores.append(primary.exploitability) + severity_scores.append(group.primary.weighted_severity or 0.0) + severity_scores.extend([adv.weighted_severity or 0.0 for adv in group.secondaries]) + if severity_scores: weighted_severity = round(max(severity_scores), 1) + + exploitability_scores = [] + exploitability_scores.append(group.primary.exploitability or 0.0) + exploitability_scores.extend([adv.exploitability or 0.0 for adv in group.secondaries]) + if exploitability_scores: exploitability = max(exploitability_scores) + if exploitability and weighted_severity: risk_score = min(float(exploitability * weighted_severity), 10.0) risk_score = round(risk_score, 1) + + identifier = group.primary.advisory_id.split("/")[-1] + filtered_aliases = [alias for alias in group.aliases if alias.alias != identifier] + advisories.append( - { - "aliases": filtered_aliases, - "advisory": primary, - "identifier": identifier, - "weighted_severity": weighted_severity, - "exploitability": exploitability, - "risk_score": risk_score, - } + GroupedAdvisory( + aliases=filtered_aliases, + advisory=group.primary, + identifier=identifier, + weighted_severity=weighted_severity, + exploitability=exploitability, + risk_score=risk_score, + ) ) return advisories diff --git a/vulnerabilities/views.py b/vulnerabilities/views.py index c88c437b5..f9274a18d 100644 --- a/vulnerabilities/views.py +++ b/vulnerabilities/views.py @@ -8,6 +8,7 @@ # import logging from collections import defaultdict +from typing import List from cvss.exceptions import CVSS2MalformedError from cvss.exceptions import CVSS3MalformedError @@ -39,6 +40,8 @@ from vulnerabilities.forms import VulnerabilitySearchForm from vulnerabilities.models import AdvisorySetMember from vulnerabilities.models import AdvisoryV2 +from vulnerabilities.models import Group +from vulnerabilities.models import GroupedAdvisory from vulnerabilities.models import PipelineRun from vulnerabilities.models import PipelineSchedule from vulnerabilities.pipelines.v2_importers.epss_importer_v2 import EPSSImporterPipeline @@ -295,23 +298,27 @@ def get_context_data(self, **kwargs): affected_groups = [ ( - list(adv.aliases.all()), - adv.primary_advisory, - [a.advisory for a in adv.secondary_members], + Group( + aliases=list(adv.aliases.all()), + primary=adv.primary_advisory, + secondaries=[a.advisory for a in adv.secondary_members], + ) ) for adv in affected_by_advisories_qs ] fixing_groups = [ ( - list(adv.aliases.all()), - adv.primary_advisory, - [a.advisory for a in adv.secondary_members], + Group( + aliases=list(adv.aliases.all()), + primary=adv.primary_advisory, + secondaries=[a.advisory for a in adv.secondary_members], + ) ) for adv in fixing_advisories_qs ] - affected_advisories = get_advisories_from_groups(affected_groups) - fixing_advisories = get_advisories_from_groups(fixing_groups) + affected_advisories: List[GroupedAdvisory] = get_advisories_from_groups(affected_groups) + fixing_advisories: List[GroupedAdvisory] = get_advisories_from_groups(fixing_groups) context["affected_by_advisories_v2"] = affected_advisories context["fixing_advisories_v2"] = fixing_advisories @@ -336,7 +343,7 @@ def get_context_data(self, **kwargs): "impacted_packages__fixed_by_packages", ) - affected_by_advisories = merge_and_save_grouped_advisories( + affected_by_advisories: List[GroupedAdvisory] = merge_and_save_grouped_advisories( package, affecting_advisories, "affecting" ) @@ -346,7 +353,7 @@ def get_context_data(self, **kwargs): "impacted_packages__fixed_by_packages", ) - fixing_advisories = merge_and_save_grouped_advisories( + fixing_advisories: List[GroupedAdvisory] = merge_and_save_grouped_advisories( package, fixed_by_advisories, "fixing" ) From cfb2d7d00290656eec944d4e1a3aacef9257f038 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Tue, 31 Mar 2026 16:20:58 +0530 Subject: [PATCH 345/390] Fix API Signed-off-by: Tushar Goel --- vulnerabilities/api_v3.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vulnerabilities/api_v3.py b/vulnerabilities/api_v3.py index ea1586394..a15d5a0cd 100644 --- a/vulnerabilities/api_v3.py +++ b/vulnerabilities/api_v3.py @@ -278,7 +278,7 @@ def get_affected_by_vulnerabilities(self, package): affected_groups = [ Group( aliases=list(adv.aliases.all()), - primary_advisory=adv.primary_advisory, + primary=adv.primary_advisory, secondaries=[member.advisory for member in adv.secondary_members], ) for adv in affected_by_advisories_qs @@ -340,7 +340,7 @@ def get_fixing_vulnerabilities(self, package): fixing_groups = [ Group( aliases=list(adv.aliases.all()), - primary_advisory=adv.primary_advisory, + primary=adv.primary_advisory, secondaries=[member.advisory for member in adv.secondary_members], ) for adv in fixing_advisories_qs From 28c5c638c1d7eef23ee0bb6d73d08eb3b16ae4b8 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Tue, 31 Mar 2026 16:37:50 +0530 Subject: [PATCH 346/390] Ignore goruped case Signed-off-by: Tushar Goel --- vulnerabilities/api_v3.py | 112 +++++++++++++++++----------------- vulnerabilities/views.py | 122 +++++++++++++++++++------------------- 2 files changed, 117 insertions(+), 117 deletions(-) diff --git a/vulnerabilities/api_v3.py b/vulnerabilities/api_v3.py index a15d5a0cd..0d1df5418 100644 --- a/vulnerabilities/api_v3.py +++ b/vulnerabilities/api_v3.py @@ -258,34 +258,34 @@ def get_affected_by_vulnerabilities(self, package): return result - is_grouped = AdvisorySet.objects.filter(package=package, relation_type="affecting").exists() - - if is_grouped: - affected_by_advisories_qs = ( - AdvisorySet.objects.filter(package=package, relation_type="affecting") - .select_related("primary_advisory") - .prefetch_related( - Prefetch( - "members", - queryset=AdvisorySetMember.objects.filter(is_primary=False).select_related( - "advisory" - ), - to_attr="secondary_members", - ) - ) - ) - - affected_groups = [ - Group( - aliases=list(adv.aliases.all()), - primary=adv.primary_advisory, - secondaries=[member.advisory for member in adv.secondary_members], - ) - for adv in affected_by_advisories_qs - ] - - advisories: List[GroupedAdvisory] = get_advisories_from_groups(affected_groups) - return self.return_advisories_data(package, advisories_qs, advisories) + # is_grouped = AdvisorySet.objects.filter(package=package, relation_type="affecting").exists() + + # if is_grouped: + # affected_by_advisories_qs = ( + # AdvisorySet.objects.filter(package=package, relation_type="affecting") + # .select_related("primary_advisory") + # .prefetch_related( + # Prefetch( + # "members", + # queryset=AdvisorySetMember.objects.filter(is_primary=False).select_related( + # "advisory" + # ), + # to_attr="secondary_members", + # ) + # ) + # ) + + # affected_groups = [ + # Group( + # aliases=list(adv.aliases.all()), + # primary=adv.primary_advisory, + # secondaries=[member.advisory for member in adv.secondary_members], + # ) + # for adv in affected_by_advisories_qs + # ] + + # advisories: List[GroupedAdvisory] = get_advisories_from_groups(affected_groups) + # return self.return_advisories_data(package, advisories_qs, advisories) if package.type in TYPES_WITH_MULTIPLE_IMPORTERS: advisories_qs = advisories_qs.prefetch_related( @@ -320,34 +320,34 @@ def get_fixing_vulnerabilities(self, package): advisories = [] - is_grouped = AdvisorySet.objects.filter(package=package, relation_type="fixing").exists() - - if is_grouped: - fixing_advisories_qs = ( - AdvisorySet.objects.filter(package=package, relation_type="fixing") - .select_related("primary_advisory") - .prefetch_related( - Prefetch( - "members", - queryset=AdvisorySetMember.objects.filter(is_primary=False).select_related( - "advisory" - ), - to_attr="secondary_members", - ) - ) - ) - - fixing_groups = [ - Group( - aliases=list(adv.aliases.all()), - primary=adv.primary_advisory, - secondaries=[member.advisory for member in adv.secondary_members], - ) - for adv in fixing_advisories_qs - ] - - advisories: List[GroupedAdvisory] = get_advisories_from_groups(fixing_groups) - return self.return_fixing_advisories_data(advisories) + # is_grouped = AdvisorySet.objects.filter(package=package, relation_type="fixing").exists() + + # if is_grouped: + # fixing_advisories_qs = ( + # AdvisorySet.objects.filter(package=package, relation_type="fixing") + # .select_related("primary_advisory") + # .prefetch_related( + # Prefetch( + # "members", + # queryset=AdvisorySetMember.objects.filter(is_primary=False).select_related( + # "advisory" + # ), + # to_attr="secondary_members", + # ) + # ) + # ) + + # fixing_groups = [ + # Group( + # aliases=list(adv.aliases.all()), + # primary=adv.primary_advisory, + # secondaries=[member.advisory for member in adv.secondary_members], + # ) + # for adv in fixing_advisories_qs + # ] + + # advisories: List[GroupedAdvisory] = get_advisories_from_groups(fixing_groups) + # return self.return_fixing_advisories_data(advisories) if package.type in TYPES_WITH_MULTIPLE_IMPORTERS: advisories_qs = advisories_qs.prefetch_related( diff --git a/vulnerabilities/views.py b/vulnerabilities/views.py index f9274a18d..a9d599b49 100644 --- a/vulnerabilities/views.py +++ b/vulnerabilities/views.py @@ -261,69 +261,69 @@ def get_context_data(self, **kwargs): return context - is_grouped = models.AdvisorySet.objects.filter(package=package).exists() - - if is_grouped: - context["grouped"] = True - fixed_pkg_details = get_fixed_package_details(package) - context["fixed_package_details"] = fixed_pkg_details - - affected_by_advisories_qs = ( - models.AdvisorySet.objects.filter(package=package, relation_type="affecting") - .select_related("primary_advisory") - .prefetch_related( - Prefetch( - "members", - queryset=AdvisorySetMember.objects.filter(is_primary=False).select_related( - "advisory" - ), - to_attr="secondary_members", - ) - ) - ) - - fixing_advisories_qs = ( - models.AdvisorySet.objects.filter(package=package, relation_type="fixing") - .select_related("primary_advisory") - .prefetch_related( - Prefetch( - "members", - queryset=AdvisorySetMember.objects.filter(is_primary=False).select_related( - "advisory" - ), - to_attr="secondary_members", - ) - ) - ) - - affected_groups = [ - ( - Group( - aliases=list(adv.aliases.all()), - primary=adv.primary_advisory, - secondaries=[a.advisory for a in adv.secondary_members], - ) - ) - for adv in affected_by_advisories_qs - ] - fixing_groups = [ - ( - Group( - aliases=list(adv.aliases.all()), - primary=adv.primary_advisory, - secondaries=[a.advisory for a in adv.secondary_members], - ) - ) - for adv in fixing_advisories_qs - ] - - affected_advisories: List[GroupedAdvisory] = get_advisories_from_groups(affected_groups) - fixing_advisories: List[GroupedAdvisory] = get_advisories_from_groups(fixing_groups) + # is_grouped = models.AdvisorySet.objects.filter(package=package).exists() + + # if is_grouped: + # context["grouped"] = True + # fixed_pkg_details = get_fixed_package_details(package) + # context["fixed_package_details"] = fixed_pkg_details + + # affected_by_advisories_qs = ( + # models.AdvisorySet.objects.filter(package=package, relation_type="affecting") + # .select_related("primary_advisory") + # .prefetch_related( + # Prefetch( + # "members", + # queryset=AdvisorySetMember.objects.filter(is_primary=False).select_related( + # "advisory" + # ), + # to_attr="secondary_members", + # ) + # ) + # ) - context["affected_by_advisories_v2"] = affected_advisories - context["fixing_advisories_v2"] = fixing_advisories + # fixing_advisories_qs = ( + # models.AdvisorySet.objects.filter(package=package, relation_type="fixing") + # .select_related("primary_advisory") + # .prefetch_related( + # Prefetch( + # "members", + # queryset=AdvisorySetMember.objects.filter(is_primary=False).select_related( + # "advisory" + # ), + # to_attr="secondary_members", + # ) + # ) + # ) - return context + # affected_groups = [ + # ( + # Group( + # aliases=list(adv.aliases.all()), + # primary=adv.primary_advisory, + # secondaries=[a.advisory for a in adv.secondary_members], + # ) + # ) + # for adv in affected_by_advisories_qs + # ] + # fixing_groups = [ + # ( + # Group( + # aliases=list(adv.aliases.all()), + # primary=adv.primary_advisory, + # secondaries=[a.advisory for a in adv.secondary_members], + # ) + # ) + # for adv in fixing_advisories_qs + # ] + + # affected_advisories: List[GroupedAdvisory] = get_advisories_from_groups(affected_groups) + # fixing_advisories: List[GroupedAdvisory] = get_advisories_from_groups(fixing_groups) + + # context["affected_by_advisories_v2"] = affected_advisories + # context["fixing_advisories_v2"] = fixing_advisories + + # return context if package.type in TYPES_WITH_MULTIPLE_IMPORTERS: affecting_advisories = AdvisoryV2.objects.latest_affecting_advisories_for_purl( From 7c4859e732b95b55f2df46ee2e023796cce2731d Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Tue, 31 Mar 2026 16:54:06 +0530 Subject: [PATCH 347/390] Revert grouping Signed-off-by: Tushar Goel --- vulnerabilities/api_v3.py | 112 +++++++++++++++++----------------- vulnerabilities/views.py | 122 +++++++++++++++++++------------------- 2 files changed, 117 insertions(+), 117 deletions(-) diff --git a/vulnerabilities/api_v3.py b/vulnerabilities/api_v3.py index 0d1df5418..a15d5a0cd 100644 --- a/vulnerabilities/api_v3.py +++ b/vulnerabilities/api_v3.py @@ -258,34 +258,34 @@ def get_affected_by_vulnerabilities(self, package): return result - # is_grouped = AdvisorySet.objects.filter(package=package, relation_type="affecting").exists() - - # if is_grouped: - # affected_by_advisories_qs = ( - # AdvisorySet.objects.filter(package=package, relation_type="affecting") - # .select_related("primary_advisory") - # .prefetch_related( - # Prefetch( - # "members", - # queryset=AdvisorySetMember.objects.filter(is_primary=False).select_related( - # "advisory" - # ), - # to_attr="secondary_members", - # ) - # ) - # ) - - # affected_groups = [ - # Group( - # aliases=list(adv.aliases.all()), - # primary=adv.primary_advisory, - # secondaries=[member.advisory for member in adv.secondary_members], - # ) - # for adv in affected_by_advisories_qs - # ] - - # advisories: List[GroupedAdvisory] = get_advisories_from_groups(affected_groups) - # return self.return_advisories_data(package, advisories_qs, advisories) + is_grouped = AdvisorySet.objects.filter(package=package, relation_type="affecting").exists() + + if is_grouped: + affected_by_advisories_qs = ( + AdvisorySet.objects.filter(package=package, relation_type="affecting") + .select_related("primary_advisory") + .prefetch_related( + Prefetch( + "members", + queryset=AdvisorySetMember.objects.filter(is_primary=False).select_related( + "advisory" + ), + to_attr="secondary_members", + ) + ) + ) + + affected_groups = [ + Group( + aliases=list(adv.aliases.all()), + primary=adv.primary_advisory, + secondaries=[member.advisory for member in adv.secondary_members], + ) + for adv in affected_by_advisories_qs + ] + + advisories: List[GroupedAdvisory] = get_advisories_from_groups(affected_groups) + return self.return_advisories_data(package, advisories_qs, advisories) if package.type in TYPES_WITH_MULTIPLE_IMPORTERS: advisories_qs = advisories_qs.prefetch_related( @@ -320,34 +320,34 @@ def get_fixing_vulnerabilities(self, package): advisories = [] - # is_grouped = AdvisorySet.objects.filter(package=package, relation_type="fixing").exists() - - # if is_grouped: - # fixing_advisories_qs = ( - # AdvisorySet.objects.filter(package=package, relation_type="fixing") - # .select_related("primary_advisory") - # .prefetch_related( - # Prefetch( - # "members", - # queryset=AdvisorySetMember.objects.filter(is_primary=False).select_related( - # "advisory" - # ), - # to_attr="secondary_members", - # ) - # ) - # ) - - # fixing_groups = [ - # Group( - # aliases=list(adv.aliases.all()), - # primary=adv.primary_advisory, - # secondaries=[member.advisory for member in adv.secondary_members], - # ) - # for adv in fixing_advisories_qs - # ] - - # advisories: List[GroupedAdvisory] = get_advisories_from_groups(fixing_groups) - # return self.return_fixing_advisories_data(advisories) + is_grouped = AdvisorySet.objects.filter(package=package, relation_type="fixing").exists() + + if is_grouped: + fixing_advisories_qs = ( + AdvisorySet.objects.filter(package=package, relation_type="fixing") + .select_related("primary_advisory") + .prefetch_related( + Prefetch( + "members", + queryset=AdvisorySetMember.objects.filter(is_primary=False).select_related( + "advisory" + ), + to_attr="secondary_members", + ) + ) + ) + + fixing_groups = [ + Group( + aliases=list(adv.aliases.all()), + primary=adv.primary_advisory, + secondaries=[member.advisory for member in adv.secondary_members], + ) + for adv in fixing_advisories_qs + ] + + advisories: List[GroupedAdvisory] = get_advisories_from_groups(fixing_groups) + return self.return_fixing_advisories_data(advisories) if package.type in TYPES_WITH_MULTIPLE_IMPORTERS: advisories_qs = advisories_qs.prefetch_related( diff --git a/vulnerabilities/views.py b/vulnerabilities/views.py index a9d599b49..f9274a18d 100644 --- a/vulnerabilities/views.py +++ b/vulnerabilities/views.py @@ -261,69 +261,69 @@ def get_context_data(self, **kwargs): return context - # is_grouped = models.AdvisorySet.objects.filter(package=package).exists() - - # if is_grouped: - # context["grouped"] = True - # fixed_pkg_details = get_fixed_package_details(package) - # context["fixed_package_details"] = fixed_pkg_details - - # affected_by_advisories_qs = ( - # models.AdvisorySet.objects.filter(package=package, relation_type="affecting") - # .select_related("primary_advisory") - # .prefetch_related( - # Prefetch( - # "members", - # queryset=AdvisorySetMember.objects.filter(is_primary=False).select_related( - # "advisory" - # ), - # to_attr="secondary_members", - # ) - # ) - # ) + is_grouped = models.AdvisorySet.objects.filter(package=package).exists() - # fixing_advisories_qs = ( - # models.AdvisorySet.objects.filter(package=package, relation_type="fixing") - # .select_related("primary_advisory") - # .prefetch_related( - # Prefetch( - # "members", - # queryset=AdvisorySetMember.objects.filter(is_primary=False).select_related( - # "advisory" - # ), - # to_attr="secondary_members", - # ) - # ) - # ) + if is_grouped: + context["grouped"] = True + fixed_pkg_details = get_fixed_package_details(package) + context["fixed_package_details"] = fixed_pkg_details + + affected_by_advisories_qs = ( + models.AdvisorySet.objects.filter(package=package, relation_type="affecting") + .select_related("primary_advisory") + .prefetch_related( + Prefetch( + "members", + queryset=AdvisorySetMember.objects.filter(is_primary=False).select_related( + "advisory" + ), + to_attr="secondary_members", + ) + ) + ) + + fixing_advisories_qs = ( + models.AdvisorySet.objects.filter(package=package, relation_type="fixing") + .select_related("primary_advisory") + .prefetch_related( + Prefetch( + "members", + queryset=AdvisorySetMember.objects.filter(is_primary=False).select_related( + "advisory" + ), + to_attr="secondary_members", + ) + ) + ) + + affected_groups = [ + ( + Group( + aliases=list(adv.aliases.all()), + primary=adv.primary_advisory, + secondaries=[a.advisory for a in adv.secondary_members], + ) + ) + for adv in affected_by_advisories_qs + ] + fixing_groups = [ + ( + Group( + aliases=list(adv.aliases.all()), + primary=adv.primary_advisory, + secondaries=[a.advisory for a in adv.secondary_members], + ) + ) + for adv in fixing_advisories_qs + ] + + affected_advisories: List[GroupedAdvisory] = get_advisories_from_groups(affected_groups) + fixing_advisories: List[GroupedAdvisory] = get_advisories_from_groups(fixing_groups) - # affected_groups = [ - # ( - # Group( - # aliases=list(adv.aliases.all()), - # primary=adv.primary_advisory, - # secondaries=[a.advisory for a in adv.secondary_members], - # ) - # ) - # for adv in affected_by_advisories_qs - # ] - # fixing_groups = [ - # ( - # Group( - # aliases=list(adv.aliases.all()), - # primary=adv.primary_advisory, - # secondaries=[a.advisory for a in adv.secondary_members], - # ) - # ) - # for adv in fixing_advisories_qs - # ] - - # affected_advisories: List[GroupedAdvisory] = get_advisories_from_groups(affected_groups) - # fixing_advisories: List[GroupedAdvisory] = get_advisories_from_groups(fixing_groups) - - # context["affected_by_advisories_v2"] = affected_advisories - # context["fixing_advisories_v2"] = fixing_advisories - - # return context + context["affected_by_advisories_v2"] = affected_advisories + context["fixing_advisories_v2"] = fixing_advisories + + return context if package.type in TYPES_WITH_MULTIPLE_IMPORTERS: affecting_advisories = AdvisoryV2.objects.latest_affecting_advisories_for_purl( From 8f17b7ea3cc66f451657fbf17dc827de4ae0020d Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Tue, 31 Mar 2026 18:20:45 +0530 Subject: [PATCH 348/390] Change advisory ID for pypa importer Signed-off-by: Tushar Goel --- vulnerabilities/pipelines/v2_importers/pypa_importer.py | 3 +++ vulnerabilities/pipes/osv_v2.py | 9 +++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/vulnerabilities/pipelines/v2_importers/pypa_importer.py b/vulnerabilities/pipelines/v2_importers/pypa_importer.py index 7a80ed70f..142c8a385 100644 --- a/vulnerabilities/pipelines/v2_importers/pypa_importer.py +++ b/vulnerabilities/pipelines/v2_importers/pypa_importer.py @@ -59,11 +59,14 @@ def collect_advisories(self) -> Iterable[AdvisoryDataV2]: ) advisory_text = advisory.read_text() advisory_dict = saneyaml.load(advisory_text) + advisory_path = advisory.relative_to(base_directory) + advisory_id = advisory_path.parent.stem + "/" + advisory_path.stem yield parse_advisory_data_v3( raw_data=advisory_dict, supported_ecosystems=["pypi"], advisory_url=advisory_url, advisory_text=advisory_text, + advisory_id=advisory_id, ) def clean_downloads(self): diff --git a/vulnerabilities/pipes/osv_v2.py b/vulnerabilities/pipes/osv_v2.py index e70ba4a4a..65b5a5904 100644 --- a/vulnerabilities/pipes/osv_v2.py +++ b/vulnerabilities/pipes/osv_v2.py @@ -59,13 +59,18 @@ def parse_advisory_data_v3( - raw_data: dict, supported_ecosystems, advisory_url: str, advisory_text: str + raw_data: dict, + supported_ecosystems, + advisory_url: str, + advisory_text: str, + advisory_id: Optional[str] = None, ) -> Optional[AdvisoryDataV2]: """ Return an AdvisoryData build from a ``raw_data`` mapping of OSV advisory and a ``supported_ecosystem`` string. """ - advisory_id = raw_data.get("id") or "" + if not advisory_id: + advisory_id = raw_data.get("id") or "" if not advisory_id: logger.error(f"Missing advisory id in OSV data: {raw_data}") return None From 5951dfd91ea28498bd4a3d314698804fbbaaf6d8 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Tue, 31 Mar 2026 18:24:02 +0530 Subject: [PATCH 349/390] Change documentation Signed-off-by: Tushar Goel --- PIPELINES-AVID.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PIPELINES-AVID.rst b/PIPELINES-AVID.rst index 43de21e19..3d82400f8 100644 --- a/PIPELINES-AVID.rst +++ b/PIPELINES-AVID.rst @@ -55,7 +55,7 @@ * - project-kb-statements_v2 - Vulnerability ID of the record * - pypa_importer_v2 - - ID of the OSV record + - {package_name}/{ID of the OSV record} * - pysec_importer_v2 - ID of the OSV record * - redhat_importer_v2 From 63f3416e12421da18f979459636b717e8b10dc4f Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Tue, 31 Mar 2026 19:18:19 +0530 Subject: [PATCH 350/390] Increase page_size for pagination Signed-off-by: Tushar Goel --- etc/nginx/conf.d/default.conf | 1 + vulnerablecode/settings.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/etc/nginx/conf.d/default.conf b/etc/nginx/conf.d/default.conf index ce8081c06..131882479 100644 --- a/etc/nginx/conf.d/default.conf +++ b/etc/nginx/conf.d/default.conf @@ -12,6 +12,7 @@ server { proxy_redirect off; client_max_body_size 10G; proxy_read_timeout 600s; + proxy_set_header X-Forwarded-Proto $scheme; } location /static/ { diff --git a/vulnerablecode/settings.py b/vulnerablecode/settings.py index 435cb8953..eaf2c1276 100644 --- a/vulnerablecode/settings.py +++ b/vulnerablecode/settings.py @@ -251,7 +251,7 @@ "EXCEPTION_HANDLER": "vulnerabilities.throttling.throttled_exception_handler", "DEFAULT_PAGINATION_CLASS": "vulnerabilities.pagination.SmallResultSetPagination", # Limit the load on the Database returning a small number of records by default. https://github.com/nexB/vulnerablecode/issues/819 - "PAGE_SIZE": 10, + "PAGE_SIZE": 100, # for API docs "DEFAULT_SCHEMA_CLASS": "drf_spectacular.openapi.AutoSchema", "DATETIME_FORMAT": "%Y-%m-%dT%H:%M:%SZ", From e168ba9e671709aef868431dd5347ac9f9e026e2 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Wed, 1 Apr 2026 16:35:43 +0530 Subject: [PATCH 351/390] Remove risk score from UI Signed-off-by: Tushar Goel --- vulnerabilities/templates/package_details_v2.html | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/vulnerabilities/templates/package_details_v2.html b/vulnerabilities/templates/package_details_v2.html index 8511348ec..a6c07c352 100644 --- a/vulnerabilities/templates/package_details_v2.html +++ b/vulnerabilities/templates/package_details_v2.html @@ -142,7 +142,6 @@ - @@ -198,13 +197,6 @@ {% endif %} {% endwith %} - {% empty %} @@ -266,13 +258,6 @@ {% endif %} {% endwith %} - {% empty %} From 012c3ac9f0756271bb7bc574c3643dc15519da3c Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Wed, 1 Apr 2026 16:39:42 +0530 Subject: [PATCH 352/390] Update API V3 usage Signed-off-by: Tushar Goel --- api_v3_usage.rst | 2 +- vulnerabilities/api_v3.py | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/api_v3_usage.rst b/api_v3_usage.rst index 26ed9377f..0da3fe1af 100644 --- a/api_v3_usage.rst +++ b/api_v3_usage.rst @@ -83,7 +83,7 @@ Parameters: - ``purls`` — list of package URLs to query - ``details`` — boolean (default: ``false``) -- ``approximate`` — boolean (default: ``false``) +- ``ignore_qualifiers_subpath`` — boolean (default: ``false``) The ``approximate`` flag replaces the previous ``plain_purl`` parameter. When set to ``true``, qualifiers and subpaths in PURLs are ignored. diff --git a/vulnerabilities/api_v3.py b/vulnerabilities/api_v3.py index a15d5a0cd..ffa5bd941 100644 --- a/vulnerabilities/api_v3.py +++ b/vulnerabilities/api_v3.py @@ -43,13 +43,13 @@ class PackageQuerySerializer(serializers.Serializer): default=list, ) details = serializers.BooleanField(default=False) - approximate = serializers.BooleanField(default=False) + ignore_qualifiers_subpath = serializers.BooleanField(default=False) def validate(self, data): if not data["purls"]: - if data["details"] or data["approximate"]: + if data["details"] or data["ignore_qualifiers_subpath"]: raise serializers.ValidationError( - "details and approximate must be false when purls is empty" + "``details`` and ``ignore_qualifiers_subpath`` must be false when purls is empty" ) return data @@ -428,7 +428,7 @@ def create(self, request, *args, **kwargs): purls = serializer.validated_data["purls"] details = serializer.validated_data["details"] - approximate = serializer.validated_data["approximate"] + ignore_qualifiers_subpath = serializer.validated_data["ignore_qualifiers_subpath"] if not purls: impacted = ImpactedPackageAffecting.objects.filter(package_id=OuterRef("id")) @@ -444,7 +444,7 @@ def create(self, request, *args, **kwargs): plain_purls = None - if approximate: + if ignore_qualifiers_subpath: plain_purls = [ str( PackageURL( @@ -458,7 +458,7 @@ def create(self, request, *args, **kwargs): ] if not details: - if approximate: + if ignore_qualifiers_subpath: query = ( PackageV2.objects.filter(plain_package_url__in=plain_purls) .values_list("plain_package_url", flat=True) @@ -476,7 +476,7 @@ def create(self, request, *args, **kwargs): page = self.paginate_queryset(query) return self.get_paginated_response(page) - if approximate: + if ignore_qualifiers_subpath: query = ( PackageV2.objects.filter(plain_package_url__in=plain_purls) .order_by("plain_package_url") From ae1b71b89f657363ad34454ec37e4aacf911f1c0 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Wed, 1 Apr 2026 16:45:21 +0530 Subject: [PATCH 353/390] Change tests Signed-off-by: Tushar Goel --- api_v3_usage.rst | 38 +++++++++++++++++----------- vulnerabilities/tests/test_api_v3.py | 4 +-- 2 files changed, 25 insertions(+), 17 deletions(-) diff --git a/api_v3_usage.rst b/api_v3_usage.rst index 0da3fe1af..23732703c 100644 --- a/api_v3_usage.rst +++ b/api_v3_usage.rst @@ -85,7 +85,7 @@ Parameters: - ``details`` — boolean (default: ``false``) - ``ignore_qualifiers_subpath`` — boolean (default: ``false``) -The ``approximate`` flag replaces the previous ``plain_purl`` parameter. +The ``ignore_qualifiers_subpath`` flag replaces the previous ``plain_purl`` parameter. When set to ``true``, qualifiers and subpaths in PURLs are ignored. @@ -140,12 +140,16 @@ Example response: "purl": "pkg:npm/atob@2.0.3", "affected_by_vulnerabilities": [ { - "advisory_id": "nodejs_security_wg/npm-403", - "fixed_by_packages": [ - "pkg:npm/atob@2.1.0" - ], - "duplicate_advisory_ids": [] - } + "advisory_id": "GHSA-g5vw-3h65-2q3v", + "aliases": [], + "weighted_severity": null, + "exploitability_score": null, + "risk_score": null, + "summary": "Access control vulnerable to user data", + "fixed_by_packages": [ + "pkg:pypi/accesscontrol@7.2" + ], + }, ], "fixing_vulnerabilities": [], "next_non_vulnerable_version": "2.1.0", @@ -165,7 +169,7 @@ Using Approximate Matching { "purls": ["pkg:npm/atob@2.0.3?foo=bar"], - "approximate": true, + "ignore_qualifiers_subpath": true, "details": true } @@ -181,13 +185,17 @@ Example response: { "purl": "pkg:npm/atob@2.0.3", "affected_by_vulnerabilities": [ - { - "advisory_id": "nodejs_security_wg/npm-403", - "fixed_by_packages": [ - "pkg:npm/atob@2.1.0" - ], - "duplicate_advisory_ids": [] - } + { + "advisory_id": "GHSA-g5vw-3h65-2q3v", + "aliases": [], + "weighted_severity": null, + "exploitability_score": null, + "risk_score": null, + "summary": "Access control vulnerable to user data", + "fixed_by_packages": [ + "pkg:pypi/accesscontrol@7.2" + ], + } ], "fixing_vulnerabilities": [], "next_non_vulnerable_version": "2.1.0", diff --git a/vulnerabilities/tests/test_api_v3.py b/vulnerabilities/tests/test_api_v3.py index fa8a08b33..c7201af6a 100644 --- a/vulnerabilities/tests/test_api_v3.py +++ b/vulnerabilities/tests/test_api_v3.py @@ -126,14 +126,14 @@ def test_packages_pagination(self): self.assertIn("results", response.data) self.assertIn("next", response.data) - def test_packages_approximate(self): + def test_packages_ignore_qualifiers_subpath(self): url = reverse("package-v3-list") response = self.client.post( url, data={ "purls": ["pkg:pypi/sample@1.0.0?foo=bar"], - "approximate": True, + "ignore_qualifiers_subpath": True, "details": False, }, format="json", From 0eb2acd49158ce1e2f30ee5bb7a51da394881598 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Wed, 1 Apr 2026 16:47:57 +0530 Subject: [PATCH 354/390] Update changelog and prep for release Signed-off-by: Tushar Goel --- CHANGELOG.rst | 6 ++++++ setup.cfg | 2 +- vulnerablecode/__init__.py | 2 +- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 294004e08..4c3d9efb4 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,12 @@ Release notes ============= +Version v38.0.0 +--------------------- + +- This is a major version, we have changed our V3 API, refer to ``api_v3_usage.rst`` for details. +- We have started grouping advisories which have aliases or identifiers in common and also affect same set of packages together. + Version v37.0.0 --------------------- diff --git a/setup.cfg b/setup.cfg index 7e11ae621..5c8efc7dd 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = vulnerablecode -version = 37.0.0 +version = 38.0.0 license = Apache-2.0 AND CC-BY-SA-4.0 # description must be on ONE line https://github.com/pypa/setuptools/issues/1390 diff --git a/vulnerablecode/__init__.py b/vulnerablecode/__init__.py index 13c70b495..80b725801 100644 --- a/vulnerablecode/__init__.py +++ b/vulnerablecode/__init__.py @@ -14,7 +14,7 @@ import git -__version__ = "37.0.0" +__version__ = "38.0.0" PROJECT_DIR = Path(__file__).resolve().parent From 67cf3645dd37f9c69ac6e0fa96232bd2dae4e347 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Wed, 1 Apr 2026 23:27:31 +0530 Subject: [PATCH 355/390] Fix tests Signed-off-by: Tushar Goel --- etc/nginx/conf.d/default.conf | 1 + vulnerabilities/pipes/osv_v2.py | 12 +++++++----- vulnerabilities/tests/test_api_v2.py | 8 ++++---- vulnerabilities/tests/test_api_v3.py | 4 ++-- 4 files changed, 14 insertions(+), 11 deletions(-) diff --git a/etc/nginx/conf.d/default.conf b/etc/nginx/conf.d/default.conf index 131882479..754f65b76 100644 --- a/etc/nginx/conf.d/default.conf +++ b/etc/nginx/conf.d/default.conf @@ -13,6 +13,7 @@ server { client_max_body_size 10G; proxy_read_timeout 600s; proxy_set_header X-Forwarded-Proto $scheme; + proxy_set_header X-Forwarded-Host $host; } location /static/ { diff --git a/vulnerabilities/pipes/osv_v2.py b/vulnerabilities/pipes/osv_v2.py index 65b5a5904..0f8a29e78 100644 --- a/vulnerabilities/pipes/osv_v2.py +++ b/vulnerabilities/pipes/osv_v2.py @@ -69,17 +69,19 @@ def parse_advisory_data_v3( Return an AdvisoryData build from a ``raw_data`` mapping of OSV advisory and a ``supported_ecosystem`` string. """ - if not advisory_id: - advisory_id = raw_data.get("id") or "" - if not advisory_id: + adv_id = raw_data.get("id") + if not adv_id: logger.error(f"Missing advisory id in OSV data: {raw_data}") return None + aliases = raw_data.get("aliases") or [] + if not advisory_id: + advisory_id = adv_id + else: + aliases.append(adv_id) summary = raw_data.get("summary") or "" details = raw_data.get("details") or "" summary = build_description(summary=summary, description=details) - aliases = raw_data.get("aliases") or [] aliases.extend(raw_data.get("upstream", [])) - date_published = get_published_date(raw_data=raw_data) severities = list(get_severities(raw_data=raw_data, url=advisory_url)) references = get_references_v2(raw_data=raw_data) diff --git a/vulnerabilities/tests/test_api_v2.py b/vulnerabilities/tests/test_api_v2.py index c4abe3b97..be447ab0b 100644 --- a/vulnerabilities/tests/test_api_v2.py +++ b/vulnerabilities/tests/test_api_v2.py @@ -185,8 +185,8 @@ def test_list_vulnerabilities_pagination(self): self.assertIn("previous", response.data) # The 'vulnerabilities' dictionary should contain vulnerabilities up to the page limit self.assertEqual( - len(response.data["results"]["vulnerabilities"]), 10 - ) # Assuming default page size is 10 + len(response.data["results"]["vulnerabilities"]), 14 + ) # Assuming default page size is 100 class PackageV2ViewSetTest(APITestCase): @@ -346,8 +346,8 @@ def test_list_packages_pagination(self): self.assertIn("next", response.data) self.assertIn("previous", response.data) self.assertEqual( - len(response.data["results"]["packages"]), 10 - ) # Assuming default page size is 10 + len(response.data["results"]["packages"]), 14 + ) # Assuming default page size is 100 def test_invalid_vulnerability_filter(self): """ diff --git a/vulnerabilities/tests/test_api_v3.py b/vulnerabilities/tests/test_api_v3.py index c7201af6a..280662f2c 100644 --- a/vulnerabilities/tests/test_api_v3.py +++ b/vulnerabilities/tests/test_api_v3.py @@ -182,7 +182,7 @@ def test_advisories_post(self): ) self.assertEqual(response.status_code, status.HTTP_200_OK) - self.assertEqual(len(response.data["results"]), 10) + self.assertEqual(len(response.data["results"]), 100) advisory = response.data["results"][0] self.assertEqual(advisory["advisory_id"], "ghsa_importer/GHSA-12341") @@ -229,5 +229,5 @@ def test_get_all_vulnerable_purls(self): self.assertEqual(response.status_code, status.HTTP_200_OK) results = response.data["results"] - self.assertEqual(len(results), 10) + self.assertEqual(len(results), 100) self.assertIn("next", response.data) From fd5250976e754f7b3f507cb8291c2a21a45e7daa Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Wed, 1 Apr 2026 23:41:10 +0530 Subject: [PATCH 356/390] Fix views for ungrouped advisories Signed-off-by: Tushar Goel --- .../templates/package_details_v2.html | 16 +++++++------- vulnerabilities/views.py | 22 +++++++++++++++++-- 2 files changed, 28 insertions(+), 10 deletions(-) diff --git a/vulnerabilities/templates/package_details_v2.html b/vulnerabilities/templates/package_details_v2.html index a6c07c352..b6aa84009 100644 --- a/vulnerabilities/templates/package_details_v2.html +++ b/vulnerabilities/templates/package_details_v2.html @@ -211,15 +211,15 @@ {% for advisory in affected_by_advisories_v2 %}
Exploitability (E) + {% if package.risk_score is not None %} {{package.risk_score}} + {% else %} + {{""}} + {% endif %}
{{ package.is_vulnerable|yesno:"Yes,No" }}{{ package.risk_score }} + {% if package.risk_score is not None %} + {{ package.risk_score }} + {% else %} + {{ "" }} + {% endif %} +
- {{ advisory.exploitability }} + {% if advisory.exploitability is not None %} + {{ advisory.exploitability }} + {% else %} + {{ "" }} + {% endif %}
- {{ advisory.weighted_severity }} + {% if advisory.weighted_severity is not None %} + {{ advisory.weighted_severity }} + {% else %} + {{ "" }} + {% endif %}
- {{ advisory.risk_score }} + {% if advisory.risk_score is not None %} + {{ advisory.risk_score }} + {% else %} + {{ "" }} + {% endif %}
Advisory Summary Fixed in package versionRisk score
+ {% if advisory.risk_score is not None %} + {{ advisory.risk_score }} + {% else %} + {{ "" }} + {% endif %} +
+ {% if advisory.risk_score is not None %} + {{ advisory.risk_score }} + {% else %} + {{ "" }} + {% endif %} +
Advisory Summary Fixed in package versionRisk score
- {% if advisory.risk_score is not None %} - {{ advisory.risk_score }} - {% else %} - {{ "" }} - {% endif %} -
- {% if advisory.risk_score is not None %} - {{ advisory.risk_score }} - {% else %} - {{ "" }} - {% endif %} -
- + {{advisory.advisory_id }}
- {% if advisory.aliases.all|length != 0 %} + {% if advisory.advisory.aliases.all|length != 0 %} Aliases: {% endif %}
- {% for alias in advisory.aliases.all %} + {% for alias in advisory.advisory.aliases.all %} {% if alias.url %} {{ alias }} @@ -232,10 +232,10 @@
- {{ advisory.summary|truncatewords:20 }} + {{ advisory.advisory.summary|truncatewords:20 }} - {% with fixed=fixed_package_details|get_item:advisory.avid %} + {% with fixed=fixed_package_details|get_item:advisory.advisory.avid %} {% if fixed %} {% for item in fixed %}
@@ -336,16 +336,16 @@ {% for advisory in fixing_advisories_v2 %}
- + {{advisory.advisory_id }}
- {{ advisory.summary|truncatewords:20 }} + {{ advisory.advisory.summary|truncatewords:20 }} - {% for alias in advisory.aliases.all %} + {% for alias in advisory.advisory.aliases.all %} {% if alias.url %} {{ alias }} diff --git a/vulnerabilities/views.py b/vulnerabilities/views.py index f9274a18d..2a3d737a4 100644 --- a/vulnerabilities/views.py +++ b/vulnerabilities/views.py @@ -245,7 +245,15 @@ def get_context_data(self, **kwargs): else: fixed_pkg_details = get_fixed_package_details(package) context["fixed_package_details"] = fixed_pkg_details - context["affected_by_advisories_v2"] = affecting_advisories + affecting_advs = [] + for adv in affecting_advisories: + affecting_advs.append( + { + "advisory_id": adv.advisory_id.split("/")[-1], + "advisory": adv, + } + ) + context["affected_by_advisories_v2"] = affecting_advs context["affected_by_advisories_v2_url"] = None fixing_advisories = list(fixing_advisories_qs_ids[:101]) @@ -257,7 +265,17 @@ def get_context_data(self, **kwargs): context["fixing_advisories_v2"] = [] else: - context["fixing_advisories_v2"] = fixed_by_advisories + fixed_by_advisories = fixed_by_advisories.prefetch_related( + "aliases", + ) + fixed_by_advisories = list(fixed_by_advisories) + fix_advs = [] + for fixed_by_adv in fixed_by_advisories: + fix_advs.append( + {"advisory_id": fixed_by_adv.advisory_id.split("/")[-1], "advisory": fixed_by_adv} + ) + + context["fixing_advisories_v2"] = fix_advs return context From 959709cfe7426eaf81c4508192ad79bb4458c4f7 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Thu, 2 Apr 2026 11:29:19 +0530 Subject: [PATCH 357/390] Minor fixes Signed-off-by: Tushar Goel --- vulnerabilities/improvers/__init__.py | 2 -- .../v2_improvers/group_advisories_for_packages.py | 2 +- vulnerabilities/tests/test_api_v3.py | 9 +++++++++ 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/vulnerabilities/improvers/__init__.py b/vulnerabilities/improvers/__init__.py index 3e991d658..d55ecafdb 100644 --- a/vulnerabilities/improvers/__init__.py +++ b/vulnerabilities/improvers/__init__.py @@ -20,7 +20,6 @@ from vulnerabilities.pipelines import populate_vulnerability_summary_pipeline from vulnerabilities.pipelines import remove_duplicate_advisories from vulnerabilities.pipelines.v2_improvers import collect_ssvc_trees -from vulnerabilities.pipelines.v2_improvers import compute_advisory_todo as compute_advisory_todo_v2 from vulnerabilities.pipelines.v2_improvers import compute_package_risk as compute_package_risk_v2 from vulnerabilities.pipelines.v2_improvers import ( computer_package_version_rank as compute_version_rank_v2, @@ -70,7 +69,6 @@ enhance_with_metasploit_v2.MetasploitImproverPipeline, compute_package_risk_v2.ComputePackageRiskPipeline, compute_version_rank_v2.ComputeVersionRankPipeline, - compute_advisory_todo_v2.ComputeToDo, unfurl_version_range_v2.UnfurlVersionRangePipeline, compute_advisory_todo.ComputeToDo, collect_ssvc_trees.CollectSSVCPipeline, diff --git a/vulnerabilities/pipelines/v2_improvers/group_advisories_for_packages.py b/vulnerabilities/pipelines/v2_improvers/group_advisories_for_packages.py index db49447ff..b34727078 100644 --- a/vulnerabilities/pipelines/v2_improvers/group_advisories_for_packages.py +++ b/vulnerabilities/pipelines/v2_improvers/group_advisories_for_packages.py @@ -19,7 +19,7 @@ class GroupAdvisoriesForPackages(VulnerableCodePipeline): - """Detect and flag packages that do not exist upstream.""" + """Group advisories for packages that have multiple importers""" pipeline_id = "group_advisories_for_packages" diff --git a/vulnerabilities/tests/test_api_v3.py b/vulnerabilities/tests/test_api_v3.py index 280662f2c..36dd7fba1 100644 --- a/vulnerabilities/tests/test_api_v3.py +++ b/vulnerabilities/tests/test_api_v3.py @@ -1,3 +1,12 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + from django.urls import reverse from packageurl import PackageURL from rest_framework import status From 78ca5283336a94044c70736f8fd26f226b309e11 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Thu, 2 Apr 2026 11:50:42 +0530 Subject: [PATCH 358/390] Compute rank while unfurling Signed-off-by: Tushar Goel --- .../group_advisories_for_packages.py | 4 +- .../v2_improvers/unfurl_version_range.py | 2 + .../templates/package_details_v3.html | 367 ------------------ vulnerabilities/views.py | 109 +----- vulnerablecode/urls.py | 1 - 5 files changed, 8 insertions(+), 475 deletions(-) delete mode 100644 vulnerabilities/templates/package_details_v3.html diff --git a/vulnerabilities/pipelines/v2_improvers/group_advisories_for_packages.py b/vulnerabilities/pipelines/v2_improvers/group_advisories_for_packages.py index b34727078..ea6fc9185 100644 --- a/vulnerabilities/pipelines/v2_improvers/group_advisories_for_packages.py +++ b/vulnerabilities/pipelines/v2_improvers/group_advisories_for_packages.py @@ -33,7 +33,7 @@ def group_advisories_for_packages(self): def group_advisoris_for_packages(logger=None): for package in PackageV2.objects.filter(type__in=TYPES_WITH_MULTIPLE_IMPORTERS).iterator(): - print(f"Grouping advisories for package {package.purl}") + logger(f"Grouping advisories for package {package.purl}") affecting_advisories = AdvisoryV2.objects.latest_affecting_advisories_for_purl( purl=package.purl ).prefetch_related( @@ -56,5 +56,5 @@ def group_advisoris_for_packages(logger=None): delete_and_save_advisory_set(affected_groups, package, relation="affecting") delete_and_save_advisory_set(fixed_by_groups, package, relation="fixing") except Exception as e: - print(f"Failed rebuilding advisory sets for package {package.purl}: {e!r}") + logger(f"Failed rebuilding advisory sets for package {package.purl}: {e!r}") continue diff --git a/vulnerabilities/pipelines/v2_improvers/unfurl_version_range.py b/vulnerabilities/pipelines/v2_improvers/unfurl_version_range.py index 48d40e439..1d603b88a 100644 --- a/vulnerabilities/pipelines/v2_improvers/unfurl_version_range.py +++ b/vulnerabilities/pipelines/v2_improvers/unfurl_version_range.py @@ -118,6 +118,8 @@ def bulk_create_with_m2m(purls, impact, relation, logger): affected_packages_v2 = PackageV2.objects.bulk_get_or_create_from_purls(purls=purls) + affected_packages_v2[-1].calculate_version_rank + relations = [ relation(impacted_package=impact, package=package) for package in affected_packages_v2 ] diff --git a/vulnerabilities/templates/package_details_v3.html b/vulnerabilities/templates/package_details_v3.html deleted file mode 100644 index 44ec1c297..000000000 --- a/vulnerabilities/templates/package_details_v3.html +++ /dev/null @@ -1,367 +0,0 @@ -{% extends "base.html" %} -{% load humanize %} -{% load widget_tweaks %} -{% load static %} -{% load url_filters %} -{% load utils %} - -{% block title %} -VulnerableCode Package Details - {{ package.purl }} -{% endblock %} - -{% block content %} -
- {% include "package_search_box_v2.html"%} -
- -{% if package %} -
-
-
-
- Package details: - {{ package.purl }} - -
-
- -
- -
- -
-
-
- {% if affected_by_advisories_v2|length != 0 or affected_by_advisories_v2_url %} -
- {% else %} -
- {% endif %} - - - - - - - {% if package.is_ghost %} - - - - - {% endif %} - -
- - purl - - - {{ package.purl }} -
- Tags - - - Ghost - -
-
- {% if affected_by_advisories_v2|length != 0 or affected_by_advisories_v2_url %} - -
- - - - - - - - - - - - - - - -
- Next non-vulnerable version - - {% if next_non_vulnerable.version %} - {{ next_non_vulnerable.version }} - {% else %} - None. - {% endif %} -
- Latest non-vulnerable version - - {% if latest_non_vulnerable.version %} - {{ latest_non_vulnerable.version }} - {% else %} - None. - {% endif %} -
- Risk score - - {{package.risk_score}} -
-
- - {% endif %} - -
- {% if affected_by_advisories_v2|length != 0 %} -
- Vulnerabilities affecting this package ({{ affected_by_advisories_v2|length }}) -
- - - - - - - - - - - - - {% for advisory in affected_by_advisories_v2 %} - - - - - - - - {% empty %} - - - - {% endfor %} - -
AdvisorySourceDate PublishedSummaryFixed in package version
- - {{advisory.primary_advisory.advisory_id }} - -
- {% if advisory.identifiers|length != 0 %} - Aliases: - {% endif %} -
- {% for alias in advisory.identifiers %} - {% if alias.url %} - {{ alias }} -
- {% else %} - {{ alias }} -
- {% endif %} - {% endfor %} -
- {% if advisory.secondary_members|length != 0 %} -

Supporting advisories are listed below the primary advisory.

- {% for secondary in advisory.secondary_members %} - - {{secondary.advisory.avid }}
-
- {% endfor %} - {% endif %} -
- {{advisory.primary_advisory.url}} - - {{advisory.primary_advisory.date_published}} - - {{ advisory.primary_advisory.summary }} - - {% with fixed=fixed_package_details|get_item:advisory.primary_advisory.avid %} - {% if fixed %} - {% for item in fixed %} -
- {{ item.pkg.version }} -
- {% if item.pkg.is_vulnerable %} - - Vulnerable - - {% else %} - - Not vulnerable - - {% endif %} -
- {% endfor %} - {% else %} - There are no reported fixed by versions. - {% endif %} - {% endwith %} -
- This package is not known to be subject of any advisories. -
- {% elif affected_by_advisories_v2_url %} -
- This package is subject to more than 100 advisories. Please refer to the following - URL for vulnerabilities affecting this package: Advisories -
- {% else %} -
- This package is not known to be subject of any advisories. -
- {% endif %} -
- -
- {% if fixing_advisories_v2|length != 0 %} -
- Vulnerabilities fixed by this package ({{ fixing_advisories_v2|length }}) -
- - - - - - - - - - - - - {% for advisory in fixing_advisories_v2 %} - - - - - - - - {% empty %} - - - - {% endfor %} - -
AdvisorySourceDate PublishedSummaryAliases
- - {{advisory.primary_advisory.advisory_id }} - -
- {% if advisory.secondary_members|length != 0 %} -

Supporting advisories are listed below the primary advisory.

- {% for secondary in advisory.secondary_members %} - - {{secondary.advisory.avid }}
-
- {% endfor %} - {% endif %} -
- {{advisory.primary_advisory.url}} - - {{advisory.primary_advisory.date_published}} - - {{ advisory.primary_advisory.summary }} - - {% for alias in advisory.identifiers %} - {% if alias.url %} - {{ alias }} -
- {% else %} - {{ alias }} -
- {% endif %} - {% endfor %} -
- This package is not known to fix any advisories. -
- -
- {% elif fixing_advisories_v2_url %} -
- This package is known to fix more than 100 advisories. Please refer to the following - URL for vulnerabilities fixed by this package: Advisories -
- {% else %} -
- This package is not known to fix any advisories. -
- {% endif %} -
-
-
- - -
-
-
-
- -{% endif %} -{% endblock %} diff --git a/vulnerabilities/views.py b/vulnerabilities/views.py index 2a3d737a4..4f9f396ea 100644 --- a/vulnerabilities/views.py +++ b/vulnerabilities/views.py @@ -272,7 +272,10 @@ def get_context_data(self, **kwargs): fix_advs = [] for fixed_by_adv in fixed_by_advisories: fix_advs.append( - {"advisory_id": fixed_by_adv.advisory_id.split("/")[-1], "advisory": fixed_by_adv} + { + "advisory_id": fixed_by_adv.advisory_id.split("/")[-1], + "advisory": fixed_by_adv, + } ) context["fixing_advisories_v2"] = fix_advs @@ -399,110 +402,6 @@ def get_object(self, queryset=None): return package -class PackageV3Details(DetailView): - model = models.PackageV2 - template_name = "package_details_v3.html" - slug_url_kwarg = "purl" - slug_field = "purl" - - def get_context_data(self, **kwargs): - context = super().get_context_data(**kwargs) - package = self.object - - next_non_vulnerable, latest_non_vulnerable = package.get_non_vulnerable_versions() - - context["package"] = package - context["next_non_vulnerable"] = next_non_vulnerable - context["latest_non_vulnerable"] = latest_non_vulnerable - context["package_search_form"] = PackageSearchForm(self.request.GET) - - affected_by_advisories_qs = ( - models.AdvisorySet.objects.filter(package=package, relation_type="affecting") - .select_related("primary_advisory") - .prefetch_related( - Prefetch( - "members", - queryset=AdvisorySetMember.objects.filter(is_primary=False).select_related( - "advisory" - ), - to_attr="secondary_members", - ) - ) - ) - - fixing_advisories_qs = ( - models.AdvisorySet.objects.filter(package=package, relation_type="fixing") - .select_related("primary_advisory") - .prefetch_related( - Prefetch( - "members", - queryset=AdvisorySetMember.objects.filter(is_primary=False).select_related( - "advisory" - ), - to_attr="secondary_members", - ) - ) - ) - - print(affected_by_advisories_qs) - print(fixing_advisories_qs) - - affected_by_advisories_url = None - fixing_advisories_url = None - - affected_by_advisories_qs_ids = affected_by_advisories_qs.only("id") - fixing_advisories_qs_ids = fixing_advisories_qs.only("id") - - # affected_by_advisories = list(affected_by_advisories_qs_ids[:101]) - # if len(affected_by_advisories) > 100: - # affected_by_advisories_url = reverse_lazy( - # "affected_by_advisories_v2", kwargs={"purl": package.package_url} - # ) - # context["affected_by_advisories_v2_url"] = affected_by_advisories_url - # context["affected_by_advisories_v2"] = [] - # context["fixed_package_details"] = {} - - # else: - fixed_pkg_details = get_fixed_package_details(package) - - context["affected_by_advisories_v2"] = affected_by_advisories_qs - context["fixed_package_details"] = fixed_pkg_details - context["affected_by_advisories_v2_url"] = None - - # fixing_advisories = list(fixing_advisories_qs_ids[:101]) - # if len(fixing_advisories) > 100: - # fixing_advisories_url = reverse_lazy( - # "fixing_advisories_v2", kwargs={"purl": package.package_url} - # ) - # context["fixing_advisories_v2_url"] = fixing_advisories_url - # context["fixing_advisories_v2"] = [] - - # else: - context["fixing_advisories_v2"] = fixing_advisories_qs - context["fixing_advisories_v2_url"] = None - - return context - - def get_object(self, queryset=None): - if queryset is None: - queryset = self.get_queryset() - - purl = self.kwargs.get(self.slug_url_kwarg) - if purl: - queryset = queryset.for_purl(purl) - else: - cls = self.__class__.__name__ - raise AttributeError( - f"Package details view {cls} must be called with a purl, " f"but got: {purl!r}" - ) - - try: - package = queryset.get() - except queryset.model.DoesNotExist: - raise Http404(f"No Package found for purl: {purl}") - return package - - def get_fixed_package_details(package): rows = package.affected_in_impacts.values_list( "advisory__avid", diff --git a/vulnerablecode/urls.py b/vulnerablecode/urls.py index 44cacd9b0..eb1bc006b 100644 --- a/vulnerablecode/urls.py +++ b/vulnerablecode/urls.py @@ -41,7 +41,6 @@ from vulnerabilities.views import PackageSearch from vulnerabilities.views import PackageSearchV2 from vulnerabilities.views import PackageV2Details -from vulnerabilities.views import PackageV3Details from vulnerabilities.views import PipelineRunDetailView from vulnerabilities.views import PipelineRunListView from vulnerabilities.views import PipelineScheduleListView From c341e6b43b43abdfa722d9727ec4ff90211a5e57 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Thu, 2 Apr 2026 11:55:43 +0530 Subject: [PATCH 359/390] Adjust precedence of importers Signed-off-by: Tushar Goel --- vulnerabilities/models.py | 6 +++--- .../pipelines/v2_importers/elixir_security_importer.py | 2 +- vulnerabilities/pipelines/v2_importers/npm_importer.py | 2 +- .../pipelines/v2_importers/retiredotnet_importer.py | 2 +- vulnerabilities/pipelines/v2_importers/ruby_importer.py | 2 +- vulnerabilities/pipes/openssl.py | 4 +++- .../tests/pipelines/v2_importers/test_collect_fix_commit.py | 4 +++- vulnerabilities/tests/test_api.py | 6 +++--- 8 files changed, 16 insertions(+), 12 deletions(-) diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index 45d8acf55..90e7b0287 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -1139,9 +1139,9 @@ def get_affecting_vulnerabilities(self): next_fixed_package_vulns = list(fixed_by_pkg.affected_by) fixed_by_package_details["fixed_by_purl"] = fixed_by_purl - fixed_by_package_details["fixed_by_purl_vulnerabilities"] = ( - next_fixed_package_vulns - ) + fixed_by_package_details[ + "fixed_by_purl_vulnerabilities" + ] = next_fixed_package_vulns fixed_by_pkgs.append(fixed_by_package_details) vuln_details["fixed_by_package_details"] = fixed_by_pkgs diff --git a/vulnerabilities/pipelines/v2_importers/elixir_security_importer.py b/vulnerabilities/pipelines/v2_importers/elixir_security_importer.py index 3b9f86d8e..2269d0fbc 100644 --- a/vulnerabilities/pipelines/v2_importers/elixir_security_importer.py +++ b/vulnerabilities/pipelines/v2_importers/elixir_security_importer.py @@ -37,7 +37,7 @@ class ElixirSecurityImporterPipeline(VulnerableCodeBaseImporterPipelineV2): repo_url = "git+https://github.com/dependabot/elixir-security-advisories" run_once = True - precedence = 200 + precedence = 400 @classmethod def steps(cls): diff --git a/vulnerabilities/pipelines/v2_importers/npm_importer.py b/vulnerabilities/pipelines/v2_importers/npm_importer.py index 32eec2051..9ec4c16dc 100644 --- a/vulnerabilities/pipelines/v2_importers/npm_importer.py +++ b/vulnerabilities/pipelines/v2_importers/npm_importer.py @@ -41,7 +41,7 @@ class NpmImporterPipeline(VulnerableCodeBaseImporterPipelineV2): license_url = "https://github.com/nodejs/security-wg/blob/main/LICENSE.md" repo_url = "git+https://github.com/nodejs/security-wg" - precedence = 200 + precedence = 500 @classmethod def steps(cls): diff --git a/vulnerabilities/pipelines/v2_importers/retiredotnet_importer.py b/vulnerabilities/pipelines/v2_importers/retiredotnet_importer.py index cb87183e3..de9f131ee 100644 --- a/vulnerabilities/pipelines/v2_importers/retiredotnet_importer.py +++ b/vulnerabilities/pipelines/v2_importers/retiredotnet_importer.py @@ -30,7 +30,7 @@ class RetireDotnetImporterPipeline(VulnerableCodeBaseImporterPipelineV2): pipeline_id = "retiredotnet_importer_v2" run_once = True - precedence = 200 + precedence = 400 @classmethod def steps(cls): diff --git a/vulnerabilities/pipelines/v2_importers/ruby_importer.py b/vulnerabilities/pipelines/v2_importers/ruby_importer.py index fad09a1b5..210f73566 100644 --- a/vulnerabilities/pipelines/v2_importers/ruby_importer.py +++ b/vulnerabilities/pipelines/v2_importers/ruby_importer.py @@ -58,7 +58,7 @@ class RubyImporterPipeline(VulnerableCodeBaseImporterPipelineV2): SOFTWARE. """ - precedence = 200 + precedence = 500 @classmethod def steps(cls): diff --git a/vulnerabilities/pipes/openssl.py b/vulnerabilities/pipes/openssl.py index 1dffdedc1..b240f416c 100644 --- a/vulnerabilities/pipes/openssl.py +++ b/vulnerabilities/pipes/openssl.py @@ -89,7 +89,9 @@ def get_reference(reference_name, tag, reference_url): ref_type = ( AdvisoryReference.COMMIT if "commit" in name or tag == "patch" - else AdvisoryReference.ADVISORY if "advisory" in name else AdvisoryReference.OTHER + else AdvisoryReference.ADVISORY + if "advisory" in name + else AdvisoryReference.OTHER ) return ReferenceV2( diff --git a/vulnerabilities/tests/pipelines/v2_importers/test_collect_fix_commit.py b/vulnerabilities/tests/pipelines/v2_importers/test_collect_fix_commit.py index 9a687a3b7..dac2c7781 100644 --- a/vulnerabilities/tests/pipelines/v2_importers/test_collect_fix_commit.py +++ b/vulnerabilities/tests/pipelines/v2_importers/test_collect_fix_commit.py @@ -52,7 +52,9 @@ def test_collect_fix_commits_groups_by_vuln(mock_repo, pipeline): side_effect=lambda c: ( ["CVE-2021-0001"] if "CVE" in c.message - else ["GHSA-dead-beef-baad"] if "GHSA" in c.message else [] + else ["GHSA-dead-beef-baad"] + if "GHSA" in c.message + else [] ) ) diff --git a/vulnerabilities/tests/test_api.py b/vulnerabilities/tests/test_api.py index 31f2b7774..9ed647099 100644 --- a/vulnerabilities/tests/test_api.py +++ b/vulnerabilities/tests/test_api.py @@ -75,9 +75,9 @@ def cleaned_response(response): reference["scores"] = sorted( reference["scores"], key=lambda x: (x["value"], x["scoring_system"]) ) - package_data["resolved_vulnerabilities"][index]["references"][index2]["scores"] = ( - reference["scores"] - ) + package_data["resolved_vulnerabilities"][index]["references"][index2][ + "scores" + ] = reference["scores"] cleaned_response.append(package_data) From 54b0fc9773a6f19017b0129493a5cd7353818e1a Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Thu, 2 Apr 2026 12:01:50 +0530 Subject: [PATCH 360/390] Upgrade black Signed-off-by: Tushar Goel --- vulnerabilities/models.py | 6 +++--- vulnerabilities/pipes/openssl.py | 4 +--- .../tests/pipelines/v2_importers/test_collect_fix_commit.py | 4 +--- vulnerabilities/tests/test_api.py | 6 +++--- 4 files changed, 8 insertions(+), 12 deletions(-) diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index 90e7b0287..45d8acf55 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -1139,9 +1139,9 @@ def get_affecting_vulnerabilities(self): next_fixed_package_vulns = list(fixed_by_pkg.affected_by) fixed_by_package_details["fixed_by_purl"] = fixed_by_purl - fixed_by_package_details[ - "fixed_by_purl_vulnerabilities" - ] = next_fixed_package_vulns + fixed_by_package_details["fixed_by_purl_vulnerabilities"] = ( + next_fixed_package_vulns + ) fixed_by_pkgs.append(fixed_by_package_details) vuln_details["fixed_by_package_details"] = fixed_by_pkgs diff --git a/vulnerabilities/pipes/openssl.py b/vulnerabilities/pipes/openssl.py index b240f416c..1dffdedc1 100644 --- a/vulnerabilities/pipes/openssl.py +++ b/vulnerabilities/pipes/openssl.py @@ -89,9 +89,7 @@ def get_reference(reference_name, tag, reference_url): ref_type = ( AdvisoryReference.COMMIT if "commit" in name or tag == "patch" - else AdvisoryReference.ADVISORY - if "advisory" in name - else AdvisoryReference.OTHER + else AdvisoryReference.ADVISORY if "advisory" in name else AdvisoryReference.OTHER ) return ReferenceV2( diff --git a/vulnerabilities/tests/pipelines/v2_importers/test_collect_fix_commit.py b/vulnerabilities/tests/pipelines/v2_importers/test_collect_fix_commit.py index dac2c7781..9a687a3b7 100644 --- a/vulnerabilities/tests/pipelines/v2_importers/test_collect_fix_commit.py +++ b/vulnerabilities/tests/pipelines/v2_importers/test_collect_fix_commit.py @@ -52,9 +52,7 @@ def test_collect_fix_commits_groups_by_vuln(mock_repo, pipeline): side_effect=lambda c: ( ["CVE-2021-0001"] if "CVE" in c.message - else ["GHSA-dead-beef-baad"] - if "GHSA" in c.message - else [] + else ["GHSA-dead-beef-baad"] if "GHSA" in c.message else [] ) ) diff --git a/vulnerabilities/tests/test_api.py b/vulnerabilities/tests/test_api.py index 9ed647099..31f2b7774 100644 --- a/vulnerabilities/tests/test_api.py +++ b/vulnerabilities/tests/test_api.py @@ -75,9 +75,9 @@ def cleaned_response(response): reference["scores"] = sorted( reference["scores"], key=lambda x: (x["value"], x["scoring_system"]) ) - package_data["resolved_vulnerabilities"][index]["references"][index2][ - "scores" - ] = reference["scores"] + package_data["resolved_vulnerabilities"][index]["references"][index2]["scores"] = ( + reference["scores"] + ) cleaned_response.append(package_data) From 2c3d35401ebb05d97a25cba187e6964d84ebcb43 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Thu, 2 Apr 2026 19:03:01 +0530 Subject: [PATCH 361/390] Prepare v38.1.0 for release Signed-off-by: Tushar Goel --- CHANGELOG.rst | 8 ++ setup.cfg | 2 +- vulnerabilities/api_v3.py | 5 +- vulnerabilities/importers/__init__.py | 4 - vulnerabilities/improvers/__init__.py | 2 - .../v2_improvers/unfurl_version_range.py | 10 ++- vulnerabilities/tests/test_view.py | 32 ++++++++ vulnerabilities/throttling.py | 13 ++++ vulnerabilities/views.py | 76 ++++++++++++++----- vulnerablecode/__init__.py | 2 +- vulnerablecode/settings.py | 2 + 11 files changed, 127 insertions(+), 29 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 4c3d9efb4..f52bf437a 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,14 @@ Release notes ============= +Version v38.1.0 +--------------------- + +- Throttle UI to 15 requests per minute to avoid abuse and improve performance. +- Handle errors in unfurl_version_range pipeline. +- Remove Todo pipeline from v1 pipelines. +- Add openAPI documentation for Package and Advisory viewset. + Version v38.0.0 --------------------- diff --git a/setup.cfg b/setup.cfg index 5c8efc7dd..16dbe9b9a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = vulnerablecode -version = 38.0.0 +version = 38.1.0 license = Apache-2.0 AND CC-BY-SA-4.0 # description must be on ONE line https://github.com/pypa/setuptools/issues/1390 diff --git a/vulnerabilities/api_v3.py b/vulnerabilities/api_v3.py index ffa5bd941..c17202f25 100644 --- a/vulnerabilities/api_v3.py +++ b/vulnerabilities/api_v3.py @@ -14,6 +14,7 @@ from django.db.models import OuterRef from django.db.models import Prefetch from django_filters import rest_framework as filters +from drf_spectacular.utils import extend_schema from packageurl import PackageURL from rest_framework import serializers from rest_framework import viewsets @@ -422,6 +423,7 @@ class PackageV3ViewSet(viewsets.GenericViewSet): filter_backends = [filters.DjangoFilterBackend] throttle_classes = [AnonRateThrottle, PermissionBasedUserRateThrottle] + @extend_schema(request=PackageQuerySerializer) def create(self, request, *args, **kwargs): serializer = PackageQuerySerializer(data=request.data) serializer.is_valid(raise_exception=True) @@ -528,8 +530,9 @@ class AdvisoryV3ViewSet(viewsets.GenericViewSet): filter_backends = [filters.DjangoFilterBackend] throttle_classes = [AnonRateThrottle, PermissionBasedUserRateThrottle] + @extend_schema(request=AdvisoryQuerySerializer) def create(self, request, *args, **kwargs): - serializer = PackageQuerySerializer(data=request.data) + serializer = AdvisoryQuerySerializer(data=request.data) serializer.is_valid(raise_exception=True) purls = serializer.validated_data["purls"] diff --git a/vulnerabilities/importers/__init__.py b/vulnerabilities/importers/__init__.py index 594021092..439e69731 100644 --- a/vulnerabilities/importers/__init__.py +++ b/vulnerabilities/importers/__init__.py @@ -21,7 +21,6 @@ from vulnerabilities.importers import github_osv from vulnerabilities.importers import istio from vulnerabilities.importers import mozilla -from vulnerabilities.importers import openssl from vulnerabilities.importers import oss_fuzz from vulnerabilities.importers import postgresql from vulnerabilities.importers import project_kb_msr2019 @@ -38,7 +37,6 @@ from vulnerabilities.pipelines import gitlab_importer from vulnerabilities.pipelines import nginx_importer from vulnerabilities.pipelines import npm_importer -from vulnerabilities.pipelines import nvd_importer from vulnerabilities.pipelines import pypa_importer from vulnerabilities.pipelines import pysec_importer from vulnerabilities.pipelines.v2_importers import alpine_linux_importer as alpine_linux_importer_v2 @@ -118,7 +116,6 @@ retiredotnet_importer_v2.RetireDotnetImporterPipeline, ubuntu_osv_importer_v2.UbuntuOSVImporterPipeline, alpine_linux_importer_v2.AlpineLinuxImporterPipeline, - nvd_importer.NVDImporterPipeline, github_importer.GitHubAPIImporterPipeline, gitlab_importer.GitLabImporterPipeline, github_osv.GithubOSVImporter, @@ -136,7 +133,6 @@ alpine_linux_importer.AlpineLinuxImporterPipeline, ruby.RubyImporter, apache_kafka.ApacheKafkaImporter, - openssl.OpensslImporter, openssl_importer_v2.OpenSSLImporterPipeline, redhat.RedhatImporter, archlinux.ArchlinuxImporter, diff --git a/vulnerabilities/improvers/__init__.py b/vulnerabilities/improvers/__init__.py index d55ecafdb..11fa5126a 100644 --- a/vulnerabilities/improvers/__init__.py +++ b/vulnerabilities/improvers/__init__.py @@ -10,7 +10,6 @@ from vulnerabilities.improvers import valid_versions from vulnerabilities.improvers import vulnerability_status from vulnerabilities.pipelines import add_cvss31_to_CVEs -from vulnerabilities.pipelines import compute_advisory_todo from vulnerabilities.pipelines import compute_package_risk from vulnerabilities.pipelines import compute_package_version_rank from vulnerabilities.pipelines import enhance_with_exploitdb @@ -70,7 +69,6 @@ compute_package_risk_v2.ComputePackageRiskPipeline, compute_version_rank_v2.ComputeVersionRankPipeline, unfurl_version_range_v2.UnfurlVersionRangePipeline, - compute_advisory_todo.ComputeToDo, collect_ssvc_trees.CollectSSVCPipeline, relate_severities.RelateSeveritiesPipeline, group_advisories_for_packages.GroupAdvisoriesForPackages, diff --git a/vulnerabilities/pipelines/v2_improvers/unfurl_version_range.py b/vulnerabilities/pipelines/v2_improvers/unfurl_version_range.py index 1d603b88a..f18f43fbf 100644 --- a/vulnerabilities/pipelines/v2_improvers/unfurl_version_range.py +++ b/vulnerabilities/pipelines/v2_improvers/unfurl_version_range.py @@ -52,7 +52,7 @@ def unfurl_version_range(self): if purl.type not in RANGE_CLASS_BY_SCHEMES: continue - versions = get_purl_versions(purl, cached_versions) + versions = get_purl_versions(purl, cached_versions) or [] affected_purls = get_affected_purls( versions=versions, affecting_vers=impact.affecting_vers, @@ -79,6 +79,8 @@ def get_affected_purls(versions, affecting_vers, base_purl, logger): version_class = affecting_version_range.version_class try: + if not versions: + return [] versions = [version_class(v) for v in versions] except Exception as e: logger( @@ -107,8 +109,10 @@ def get_affected_purls(versions, affecting_vers, base_purl, logger): def get_purl_versions(purl, cached_versions): if not purl in cached_versions: - cached_versions[purl] = get_versions(purl) - return cached_versions[purl] + purls = get_versions(purl) + if purls is not None: + cached_versions[purl] = purls + return cached_versions.get(purl) or [] def bulk_create_with_m2m(purls, impact, relation, logger): diff --git a/vulnerabilities/tests/test_view.py b/vulnerabilities/tests/test_view.py index 471e0bf43..3111ef738 100644 --- a/vulnerabilities/tests/test_view.py +++ b/vulnerabilities/tests/test_view.py @@ -11,8 +11,10 @@ import time import pytest +from django.core.cache import cache from django.test import Client from django.test import TestCase +from django.urls import reverse from packageurl import PackageURL from univers import versions @@ -330,3 +332,33 @@ def test_aggregate_fixed_and_affected_packages(self): end_time = time.time() assert end_time - start_time < 0.05 self.assertEqual(response.status_code, 200) + + +class ThrottleTestCase(TestCase): + def setUp(self): + self.client = Client() + cache.clear() + + def test_throttle_after_15_requests(self): + url = reverse("home") + + responses = [] + + for i in range(16): + response = self.client.get( + url, + HTTP_USER_AGENT="test-agent", + ) + responses.append(response.status_code) + + assert all(code == 200 for code in responses[:15]) + + assert responses[15] == 429 + + url = reverse("package_search") + + response = self.client.get( + url, + HTTP_USER_AGENT="test-agent", + ) + assert response.status_code == 429 diff --git a/vulnerabilities/throttling.py b/vulnerabilities/throttling.py index e14c1a1c0..c97b2c89f 100644 --- a/vulnerabilities/throttling.py +++ b/vulnerabilities/throttling.py @@ -51,6 +51,19 @@ def get_throttle_rate(self, tier): raise ImproperlyConfigured(msg) +class AnonUserUIThrottle(UserRateThrottle): + scope = "ui" + + def allow_request(self, request, view): + self.rate = self.THROTTLE_RATES.get("ui") + self.num_requests, self.duration = self.parse_rate(self.rate) + return super().allow_request(request, view) + + def get_cache_key(self, request, view): + ident = self.get_ident(request) + return f"throttle_ui_{ident}" + + def throttled_exception_handler(exception, context): """ Return this response whenever a request has been throttled diff --git a/vulnerabilities/views.py b/vulnerabilities/views.py index 4f9f396ea..b984fbb51 100644 --- a/vulnerabilities/views.py +++ b/vulnerabilities/views.py @@ -20,6 +20,7 @@ from django.db.models import Exists from django.db.models import OuterRef from django.db.models import Prefetch +from django.http import HttpResponse from django.http.response import Http404 from django.shortcuts import get_object_or_404 from django.shortcuts import redirect @@ -47,6 +48,7 @@ from vulnerabilities.pipelines.v2_importers.epss_importer_v2 import EPSSImporterPipeline from vulnerabilities.severity_systems import EPSS from vulnerabilities.severity_systems import SCORING_SYSTEMS +from vulnerabilities.throttling import AnonUserUIThrottle from vulnerabilities.utils import TYPES_WITH_MULTIPLE_IMPORTERS from vulnerabilities.utils import get_advisories_from_groups from vulnerabilities.utils import merge_and_save_grouped_advisories @@ -56,7 +58,47 @@ PAGE_SIZE = 10 -class PackageSearch(ListView): +class VulnerableCodeView(View): + """ + Base ListView for VulnerableCode views that includes throttling. + """ + + throttle_classes = [AnonUserUIThrottle] + + def dispatch(self, request, *args, **kwargs): + throttle = AnonUserUIThrottle() + + if not throttle.allow_request(request, self): + return HttpResponse("Rate limit exceeded", status=429) + + return super().dispatch(request, *args, **kwargs) + + +class VulnerableCodeDetailView(DetailView, VulnerableCodeView): + """ + Base DetailView for VulnerableCode views that includes throttling. + """ + + pass + + +class VulnerableCodeListView(ListView, VulnerableCodeView): + """ + Base ListView for VulnerableCode views that includes throttling. + """ + + pass + + +class VulnerableCodeCreateView(generic.CreateView, VulnerableCodeView): + """ + Base CreateView for VulnerableCode views that includes throttling. + """ + + pass + + +class PackageSearch(VulnerableCodeListView): model = models.Package template_name = "packages.html" ordering = ["type", "namespace", "name", "version"] @@ -84,7 +126,7 @@ def get_queryset(self, query=None): ) -class VulnerabilitySearch(ListView): +class VulnerabilitySearch(VulnerableCodeListView): model = models.Vulnerability template_name = "vulnerabilities.html" ordering = ["vulnerability_id"] @@ -102,7 +144,7 @@ def get_queryset(self, query=None): return self.model.objects.search(query=query).with_package_counts() -class PackageDetails(DetailView): +class PackageDetails(VulnerableCodeDetailView): model = models.Package template_name = "package_details.html" slug_url_kwarg = "purl" @@ -143,7 +185,7 @@ def get_object(self, queryset=None): return package -class PackageSearchV2(ListView): +class PackageSearchV2(VulnerableCodeListView): model = models.PackageV2 template_name = "packages_v2.html" ordering = ["type", "namespace", "name", "version"] @@ -166,7 +208,7 @@ def get_queryset(self, query=None): return self.model.objects.search(query).prefetch_related().with_is_vulnerable() -class AffectedByAdvisoriesListView(ListView): +class AffectedByAdvisoriesListView(VulnerableCodeListView): model = models.AdvisoryV2 template_name = "affected_by_advisories.html" paginate_by = PAGE_SIZE @@ -187,7 +229,7 @@ def get_queryset(self): ) -class FixingAdvisoriesListView(ListView): +class FixingAdvisoriesListView(VulnerableCodeListView): model = models.AdvisoryV2 template_name = "fixing_advisories.html" paginate_by = PAGE_SIZE @@ -201,7 +243,7 @@ def get_queryset(self): ) -class PackageV2Details(DetailView): +class PackageV2Details(VulnerableCodeDetailView): model = models.PackageV2 template_name = "package_details_v2.html" slug_url_kwarg = "purl" @@ -439,7 +481,7 @@ def get_fixed_package_details(package): return fixed_pkg_details -class VulnerabilityDetails(DetailView): +class VulnerabilityDetails(VulnerableCodeDetailView): model = models.Vulnerability template_name = "vulnerability_details.html" slug_url_kwarg = "vulnerability_id" @@ -543,7 +585,7 @@ def get_context_data(self, **kwargs): return context -class AdvisoryDetails(DetailView): +class AdvisoryDetails(VulnerableCodeDetailView): model = models.AdvisoryV2 template_name = "advisory_detail.html" slug_url_kwarg = "avid" @@ -717,7 +759,7 @@ def add_ssvc(ssvc): return context -class HomePage(View): +class HomePage(VulnerableCodeView): template_name = "index.html" def get(self, request): @@ -730,7 +772,7 @@ def get(self, request): return render(request=request, template_name=self.template_name, context=context) -class HomePageV2(View): +class HomePageV2(VulnerableCodeView): template_name = "index_v2.html" def get(self, request): @@ -770,7 +812,7 @@ def get(self, request): """ -class ApiUserCreateView(generic.CreateView): +class ApiUserCreateView(VulnerableCodeCreateView): model = models.ApiUser form_class = ApiUserCreationForm template_name = "api_user_creation_form.html" @@ -800,7 +842,7 @@ def get_success_url(self): return reverse_lazy("api_user_request") -class VulnerabilityPackagesDetails(DetailView): +class VulnerabilityPackagesDetails(VulnerableCodeDetailView): """ View to display all packages affected by or fixing a specific vulnerability. URL: /vulnerabilities/{vulnerability_id}/packages @@ -851,7 +893,7 @@ def get_context_data(self, **kwargs): return context -class AdvisoryPackagesDetails(DetailView): +class AdvisoryPackagesDetails(VulnerableCodeDetailView): """ View to display all packages affected by or fixing a specific vulnerability. URL: /advisories/{id}/packages @@ -902,7 +944,7 @@ def get_queryset(self): ) -class PipelineScheduleListView(ListView, FormMixin): +class PipelineScheduleListView(VulnerableCodeListView, FormMixin): model = PipelineSchedule context_object_name = "schedule_list" template_name = "pipeline_dashboard.html" @@ -926,7 +968,7 @@ def get_context_data(self, **kwargs): return context -class PipelineRunListView(ListView): +class PipelineRunListView(VulnerableCodeListView): model = PipelineRun context_object_name = "run_list" template_name = "pipeline_run_list.html" @@ -952,7 +994,7 @@ def get_context_data(self, **kwargs): return context -class PipelineRunDetailView(DetailView): +class PipelineRunDetailView(VulnerableCodeDetailView): model = PipelineRun template_name = "pipeline_run_details.html" context_object_name = "run" diff --git a/vulnerablecode/__init__.py b/vulnerablecode/__init__.py index 80b725801..4966e4c04 100644 --- a/vulnerablecode/__init__.py +++ b/vulnerablecode/__init__.py @@ -14,7 +14,7 @@ import git -__version__ = "38.0.0" +__version__ = "38.1.0" PROJECT_DIR = Path(__file__).resolve().parent diff --git a/vulnerablecode/settings.py b/vulnerablecode/settings.py index eaf2c1276..4c480cbc8 100644 --- a/vulnerablecode/settings.py +++ b/vulnerablecode/settings.py @@ -192,12 +192,14 @@ LOGOUT_REDIRECT_URL = "/" THROTTLE_RATE_ANON = env.str("THROTTLE_RATE_ANON", default="3600/hour") +THROTTLE_RATE_UI = env.str("THROTTLE_RATE_UI", default="15/minute") THROTTLE_RATE_USER_HIGH = env.str("THROTTLE_RATE_USER_HIGH", default="18000/hour") THROTTLE_RATE_USER_MEDIUM = env.str("THROTTLE_RATE_USER_MEDIUM", default="14400/hour") THROTTLE_RATE_USER_LOW = env.str("THROTTLE_RATE_USER_LOW", default="10800/hour") REST_FRAMEWORK_DEFAULT_THROTTLE_RATES = { "anon": THROTTLE_RATE_ANON, + "ui": THROTTLE_RATE_UI, "low": THROTTLE_RATE_USER_LOW, "medium": THROTTLE_RATE_USER_MEDIUM, "high": THROTTLE_RATE_USER_HIGH, From c4f3af6784ae619c95e992892eb12b5e12e7c3bd Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Sat, 4 Apr 2026 00:36:26 +0530 Subject: [PATCH 362/390] style: format code Signed-off-by: Keshav Priyadarshi --- vulnerabilities/models.py | 6 +++--- vulnerabilities/pipes/openssl.py | 4 +++- .../tests/pipelines/v2_importers/test_collect_fix_commit.py | 4 +++- vulnerabilities/tests/test_api.py | 6 +++--- 4 files changed, 12 insertions(+), 8 deletions(-) diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index 45d8acf55..90e7b0287 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -1139,9 +1139,9 @@ def get_affecting_vulnerabilities(self): next_fixed_package_vulns = list(fixed_by_pkg.affected_by) fixed_by_package_details["fixed_by_purl"] = fixed_by_purl - fixed_by_package_details["fixed_by_purl_vulnerabilities"] = ( - next_fixed_package_vulns - ) + fixed_by_package_details[ + "fixed_by_purl_vulnerabilities" + ] = next_fixed_package_vulns fixed_by_pkgs.append(fixed_by_package_details) vuln_details["fixed_by_package_details"] = fixed_by_pkgs diff --git a/vulnerabilities/pipes/openssl.py b/vulnerabilities/pipes/openssl.py index 1dffdedc1..b240f416c 100644 --- a/vulnerabilities/pipes/openssl.py +++ b/vulnerabilities/pipes/openssl.py @@ -89,7 +89,9 @@ def get_reference(reference_name, tag, reference_url): ref_type = ( AdvisoryReference.COMMIT if "commit" in name or tag == "patch" - else AdvisoryReference.ADVISORY if "advisory" in name else AdvisoryReference.OTHER + else AdvisoryReference.ADVISORY + if "advisory" in name + else AdvisoryReference.OTHER ) return ReferenceV2( diff --git a/vulnerabilities/tests/pipelines/v2_importers/test_collect_fix_commit.py b/vulnerabilities/tests/pipelines/v2_importers/test_collect_fix_commit.py index 9a687a3b7..dac2c7781 100644 --- a/vulnerabilities/tests/pipelines/v2_importers/test_collect_fix_commit.py +++ b/vulnerabilities/tests/pipelines/v2_importers/test_collect_fix_commit.py @@ -52,7 +52,9 @@ def test_collect_fix_commits_groups_by_vuln(mock_repo, pipeline): side_effect=lambda c: ( ["CVE-2021-0001"] if "CVE" in c.message - else ["GHSA-dead-beef-baad"] if "GHSA" in c.message else [] + else ["GHSA-dead-beef-baad"] + if "GHSA" in c.message + else [] ) ) diff --git a/vulnerabilities/tests/test_api.py b/vulnerabilities/tests/test_api.py index 31f2b7774..9ed647099 100644 --- a/vulnerabilities/tests/test_api.py +++ b/vulnerabilities/tests/test_api.py @@ -75,9 +75,9 @@ def cleaned_response(response): reference["scores"] = sorted( reference["scores"], key=lambda x: (x["value"], x["scoring_system"]) ) - package_data["resolved_vulnerabilities"][index]["references"][index2]["scores"] = ( - reference["scores"] - ) + package_data["resolved_vulnerabilities"][index]["references"][index2][ + "scores" + ] = reference["scores"] cleaned_response.append(package_data) From 1c67f3ef193b96490b234dad513dbe807c14f4dd Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Tue, 7 Apr 2026 01:07:25 +0530 Subject: [PATCH 363/390] feat: track pipeline priority in model Signed-off-by: Keshav Priyadarshi --- vulnerabilities/models.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index 90e7b0287..0837d2500 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -2262,6 +2262,10 @@ def requeue(self): class PipelineSchedule(models.Model): """The Database representation of a pipeline schedule.""" + class ExecutionPriority(models.IntegerChoices): + HIGH = 1, "high" + DEFAULT = 2, "default" + pipeline_id = models.CharField( max_length=600, help_text=("Identify a registered Pipeline class."), @@ -2306,6 +2310,14 @@ class PipelineSchedule(models.Model): help_text=("Number of hours to wait between run of this pipeline."), ) + run_priority = models.IntegerField( + null=False, + blank=False, + choices=ExecutionPriority.choices, + default=ExecutionPriority.DEFAULT, + help_text=("Select the pipeline execution priority"), + ) + schedule_work_id = models.CharField( max_length=255, unique=True, From c1c775951e97a2c59f71b09feea45a07df1020e2 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Tue, 7 Apr 2026 01:14:00 +0530 Subject: [PATCH 364/390] feat: enque pipelines based on priority Signed-off-by: Keshav Priyadarshi --- vulnerabilities/schedules.py | 13 ++++++++++++- vulnerabilities/tasks.py | 16 +++++++++++++--- 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/vulnerabilities/schedules.py b/vulnerabilities/schedules.py index e6443e5ab..215129e10 100644 --- a/vulnerabilities/schedules.py +++ b/vulnerabilities/schedules.py @@ -95,10 +95,21 @@ def update_pipeline_schedule(): PipelineSchedule.objects.exclude(pipeline_id__in=pipelines.keys()).delete() for id, pipeline_class in pipelines.items(): run_once = getattr(pipeline_class, "run_once", False) + run_interval = getattr(pipeline_class, "run_interval", 24) + run_priority = getattr( + pipeline_class, "run_priority", PipelineSchedule.ExecutionPriority.DEFAULT + ) - PipelineSchedule.objects.get_or_create( + pipeline, created = PipelineSchedule.objects.get_or_create( pipeline_id=id, defaults={ "is_run_once": run_once, + "run_interval": run_interval, + "run_priority": run_priority, }, ) + + if not created: + pipeline.run_priority = run_priority + pipeline.run_interval = run_interval + pipeline.save() diff --git a/vulnerabilities/tasks.py b/vulnerabilities/tasks.py index 2e7ac2b10..5a5968831 100644 --- a/vulnerabilities/tasks.py +++ b/vulnerabilities/tasks.py @@ -20,7 +20,13 @@ logger = logging.getLogger(__name__) -queue = django_rq.get_queue("default") +default_queue = django_rq.get_queue("default") +high_queue = django_rq.get_queue("high") + +queues = { + "default": django_rq.get_queue("default"), + "high": django_rq.get_queue("high"), +} def execute_pipeline(pipeline_id, run_id): @@ -112,6 +118,8 @@ def set_run_failure(job, connection, type, value, traceback): def enqueue_pipeline(pipeline_id): pipeline_schedule = models.PipelineSchedule.objects.get(pipeline_id=pipeline_id) + queue = queues.get(pipeline_schedule.get_priority_display()) + if pipeline_schedule.status in [ models.PipelineRun.Status.RUNNING, models.PipelineRun.Status.QUEUED, @@ -139,5 +147,7 @@ def enqueue_pipeline(pipeline_id): def dequeue_job(job_id): """Remove a job from queue if it hasn't been executed yet.""" - if job_id in queue.jobs: - queue.remove(job_id) + + for queue in queues.values(): + if job_id in queue.jobs: + queue.remove(job_id) From bfcc0c88b96d3f4b67ecefefa564618ce0b16ddc Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Tue, 7 Apr 2026 01:14:43 +0530 Subject: [PATCH 365/390] feat: add high priority rq worker Signed-off-by: Keshav Priyadarshi --- docker-compose.yml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/docker-compose.yml b/docker-compose.yml index 76b645560..45ee678cf 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -57,6 +57,17 @@ services: - db - vulnerablecode + vulnerablecode_rqworker_high: + build: . + command: wait-for-it web:8000 -- python ./manage.py rqworker high + env_file: + - docker.env + volumes: + - /etc/vulnerablecode/:/etc/vulnerablecode/ + depends_on: + - vulnerablecode_redis + - db + - vulnerablecode nginx: image: nginx From 974bbb506f517735c170104d27d8b84f296b199a Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Tue, 7 Apr 2026 18:02:09 +0530 Subject: [PATCH 366/390] feat: add config for high priority redis queue Signed-off-by: Keshav Priyadarshi --- vulnerablecode/settings.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/vulnerablecode/settings.py b/vulnerablecode/settings.py index 4c480cbc8..8ec5f6e31 100644 --- a/vulnerablecode/settings.py +++ b/vulnerablecode/settings.py @@ -392,7 +392,13 @@ "PORT": env.str("VULNERABLECODE_REDIS_PORT", default="6379"), "PASSWORD": env.str("VULNERABLECODE_REDIS_PASSWORD", default=""), "DEFAULT_TIMEOUT": env.int("VULNERABLECODE_REDIS_DEFAULT_TIMEOUT", default=3600), - } + }, + "high": { + "HOST": env.str("VULNERABLECODE_REDIS_HOST", default="localhost"), + "PORT": env.str("VULNERABLECODE_REDIS_PORT", default="6379"), + "PASSWORD": env.str("VULNERABLECODE_REDIS_PASSWORD", default=""), + "DEFAULT_TIMEOUT": env.int("VULNERABLECODE_REDIS_DEFAULT_TIMEOUT", default=3600), + }, } From d420b97fbe1898985410c6cdd8d05b6087a62ea9 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Tue, 7 Apr 2026 18:21:54 +0530 Subject: [PATCH 367/390] refactor: process never unfurled version ranges first - re-unfurl version ranges every 2 days - run unfurl pipeline every 2 hours Signed-off-by: Keshav Priyadarshi --- ...edpackage_last_range_unfurl_at_and_more.py | 32 +++++++++ vulnerabilities/models.py | 7 ++ vulnerabilities/pipelines/__init__.py | 10 +++ .../v2_improvers/unfurl_version_range.py | 72 +++++++++++++++---- 4 files changed, 107 insertions(+), 14 deletions(-) create mode 100644 vulnerabilities/migrations/0120_impactedpackage_last_range_unfurl_at_and_more.py diff --git a/vulnerabilities/migrations/0120_impactedpackage_last_range_unfurl_at_and_more.py b/vulnerabilities/migrations/0120_impactedpackage_last_range_unfurl_at_and_more.py new file mode 100644 index 000000000..f6b5d8a93 --- /dev/null +++ b/vulnerabilities/migrations/0120_impactedpackage_last_range_unfurl_at_and_more.py @@ -0,0 +1,32 @@ +# Generated by Django 5.2.11 on 2026-04-06 20:51 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("vulnerabilities", "0119_remove_advisoryset_identifiers_and_more"), + ] + + operations = [ + migrations.AddField( + model_name="impactedpackage", + name="last_range_unfurl_at", + field=models.DateTimeField( + blank=True, + db_index=True, + help_text="Timestamp of the last vers range unfurl.", + null=True, + ), + ), + migrations.AddField( + model_name="pipelineschedule", + name="run_priority", + field=models.IntegerField( + choices=[(1, "high"), (2, "default")], + default=2, + help_text="Select the pipeline execution priority", + ), + ), + ] diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index 0837d2500..bb4390f2a 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -3252,6 +3252,13 @@ class ImpactedPackage(models.Model): help_text="Timestamp indicating when this impact was added.", ) + last_range_unfurl_at = models.DateTimeField( + blank=True, + null=True, + db_index=True, + help_text="Timestamp of the last vers range unfurl.", + ) + def to_dict(self): from vulnerabilities.utils import purl_to_dict diff --git a/vulnerabilities/pipelines/__init__.py b/vulnerabilities/pipelines/__init__.py index 632fd95f6..499f53331 100644 --- a/vulnerabilities/pipelines/__init__.py +++ b/vulnerabilities/pipelines/__init__.py @@ -24,6 +24,7 @@ from vulnerabilities.improver import MAX_CONFIDENCE from vulnerabilities.models import Advisory from vulnerabilities.models import PipelineRun +from vulnerabilities.models import PipelineSchedule from vulnerabilities.pipes.advisory import import_advisory from vulnerabilities.pipes.advisory import insert_advisory from vulnerabilities.pipes.advisory import insert_advisory_v2 @@ -144,6 +145,9 @@ class VulnerableCodePipeline(PipelineDefinition, BasePipelineRun): # When set to true pipeline is run only once. # To rerun onetime pipeline reset is_active field to True via migration. run_once = False + # Interval between runs in hour. + run_interval = 24 + run_priority = PipelineSchedule.ExecutionPriority.DEFAULT def on_failure(self): """ @@ -176,6 +180,9 @@ class VulnerableCodeBaseImporterPipeline(VulnerableCodePipeline): # When set to true pipeline is run only once. # To rerun onetime pipeline reset is_active field to True via migration. run_once = False + # Interval between runs in hour. + run_interval = 24 + run_priority = PipelineSchedule.ExecutionPriority.DEFAULT @classmethod def steps(cls): @@ -277,6 +284,9 @@ class VulnerableCodeBaseImporterPipelineV2(VulnerableCodePipeline): # When set to true pipeline is run only once. # To rerun onetime pipeline reset is_active field to True via migration. run_once = False + # Interval between runs in hour. + run_interval = 24 + run_priority = PipelineSchedule.ExecutionPriority.DEFAULT @classmethod def steps(cls): diff --git a/vulnerabilities/pipelines/v2_improvers/unfurl_version_range.py b/vulnerabilities/pipelines/v2_improvers/unfurl_version_range.py index f18f43fbf..cb65f5f93 100644 --- a/vulnerabilities/pipelines/v2_improvers/unfurl_version_range.py +++ b/vulnerabilities/pipelines/v2_improvers/unfurl_version_range.py @@ -8,9 +8,13 @@ # import logging +from datetime import timedelta from traceback import format_exc as traceback_format_exc from aboutcode.pipeline import LoopProgress +from django.db.models import F +from django.db.models import Q +from django.utils import timezone from fetchcode.package_versions import SUPPORTED_ECOSYSTEMS as FETCHCODE_SUPPORTED_ECOSYSTEMS from packageurl import PackageURL from univers.version_range import RANGE_CLASS_BY_SCHEMES @@ -19,29 +23,45 @@ from vulnerabilities.models import ImpactedPackage from vulnerabilities.models import ImpactedPackageAffecting from vulnerabilities.models import PackageV2 +from vulnerabilities.models import PipelineSchedule from vulnerabilities.pipelines import VulnerableCodePipeline from vulnerabilities.pipes.fetchcode_utils import get_versions from vulnerabilities.utils import update_purl_version class UnfurlVersionRangePipeline(VulnerableCodePipeline): + """ + Unfurl affected version ranges by first processing those that have + never been unfurled and then handling ranges that were last unfurled + two or more days ago. + """ pipeline_id = "unfurl_version_range_v2" + run_interval = 2 + run_priority = PipelineSchedule.ExecutionPriority.HIGH + + # Days elapsed before version range is re-unfurled + reunfurl_after_days = 2 + @classmethod def steps(cls): return (cls.unfurl_version_range,) def unfurl_version_range(self): - impacted_packages = ImpactedPackage.objects.all().order_by("-created_at") - impacted_packages_count = impacted_packages.count() - processed_impacted_packages_count = 0 processed_affected_packages_count = 0 cached_versions = {} + impacts_to_update = [] + update_batch_size = 5 + + impacted_packages = impacted_package_qs(cutoff_day=self.reunfurl_after_days) + impacted_packages_count = impacted_packages.count() self.log(f"Unfurl affected vers range for {impacted_packages_count:,d} ImpactedPackage.") + progress = LoopProgress(total_iterations=impacted_packages_count, logger=self.log) - for impact in progress.iter(impacted_packages): + for impact in progress.iter(impacted_packages.iterator(chunk_size=5000)): + impacts_to_update.append(impact.pk) purl = PackageURL.from_string(impact.base_purl) if not impact.affecting_vers or not any( c in impact.affecting_vers for c in ("<", ">", "!") @@ -52,11 +72,10 @@ def unfurl_version_range(self): if purl.type not in RANGE_CLASS_BY_SCHEMES: continue - versions = get_purl_versions(purl, cached_versions) or [] + versions = get_purl_versions(purl, cached_versions, self.log) or [] affected_purls = get_affected_purls( versions=versions, - affecting_vers=impact.affecting_vers, - base_purl=purl, + impact=impact, logger=self.log, ) if not affected_purls: @@ -70,12 +89,21 @@ def unfurl_version_range(self): ) processed_impacted_packages_count += 1 + if len(impacts_to_update) > update_batch_size: + ImpactedPackage.objects.filter(pk__in=impacts_to_update).update( + last_range_unfurl_at=timezone.now() + ) + impacts_to_update.clear() + + ImpactedPackage.objects.filter(pk__in=impacts_to_update).update( + last_range_unfurl_at=timezone.now() + ) self.log(f"Successfully processed {processed_impacted_packages_count:,d} ImpactedPackage.") self.log(f"{processed_affected_packages_count:,d} new Impact-Package relation created.") -def get_affected_purls(versions, affecting_vers, base_purl, logger): - affecting_version_range = VersionRange.from_string(affecting_vers) +def get_affected_purls(versions, impact, logger): + affecting_version_range = VersionRange.from_string(impact.affecting_vers) version_class = affecting_version_range.version_class try: @@ -84,7 +112,7 @@ def get_affected_purls(versions, affecting_vers, base_purl, logger): versions = [version_class(v) for v in versions] except Exception as e: logger( - f"Error while parsing versions for {base_purl!s}: {e!r} \n {traceback_format_exc()}", + f"Error while parsing versions for {impact.base_purl!s}: {e!r} \n {traceback_format_exc()}", level=logging.ERROR, ) return @@ -95,21 +123,24 @@ def get_affected_purls(versions, affecting_vers, base_purl, logger): if version in affecting_version_range: affected_purls.append( update_purl_version( - purl=base_purl, + purl=impact.base_purl, version=str(version), ) ) except Exception as e: logger( - f"Error while checking {version!s} in {affecting_version_range!s}: {e!r} \n {traceback_format_exc()}", + ( + f"Error while checking {version!s} in {affecting_version_range!s} for " + f"advisory {impact.advisory.avid}: {e!r} \n {traceback_format_exc()}" + ), level=logging.ERROR, ) return affected_purls -def get_purl_versions(purl, cached_versions): +def get_purl_versions(purl, cached_versions, logger): if not purl in cached_versions: - purls = get_versions(purl) + purls = get_versions(purl, logger) if purls is not None: cached_versions[purl] = purls return cached_versions.get(purl) or [] @@ -135,3 +166,16 @@ def bulk_create_with_m2m(purls, impact, relation, logger): return 0 return len(relations) + + +def impacted_package_qs(cutoff_day=2): + cutoff = timezone.now() - timedelta(days=cutoff_day) + return ( + ImpactedPackage.objects.filter( + (Q(last_range_unfurl_at__isnull=True) | Q(last_range_unfurl_at__lte=cutoff)) + & Q(affecting_vers__isnull=False) + & ~Q(affecting_vers="") + ) + .order_by(F("last_range_unfurl_at").asc(nulls_first=True)) + .only("pk", "affecting_vers", "advisory", "base_purl") + ) From 5c8f7db29e182e1d05b5b76d188d925e98bf1bef Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Tue, 7 Apr 2026 18:38:46 +0530 Subject: [PATCH 368/390] test: test order of vers range processing Signed-off-by: Keshav Priyadarshi --- .../v2_improvers/test_unfurl_version_range.py | 102 +++++++++++++++++- 1 file changed, 97 insertions(+), 5 deletions(-) diff --git a/vulnerabilities/tests/pipelines/v2_improvers/test_unfurl_version_range.py b/vulnerabilities/tests/pipelines/v2_improvers/test_unfurl_version_range.py index a1927a426..ae23930f6 100644 --- a/vulnerabilities/tests/pipelines/v2_improvers/test_unfurl_version_range.py +++ b/vulnerabilities/tests/pipelines/v2_improvers/test_unfurl_version_range.py @@ -13,14 +13,18 @@ from unittest.mock import patch from django.test import TestCase +from django.utils import timezone from packageurl import PackageURL from univers.version_range import VersionRange from vulnerabilities.importer import AdvisoryDataV2 from vulnerabilities.importer import AffectedPackageV2 +from vulnerabilities.importer import PackageCommitPatchData from vulnerabilities.models import AdvisoryV2 +from vulnerabilities.models import ImpactedPackage from vulnerabilities.models import PackageV2 from vulnerabilities.pipelines.v2_improvers.unfurl_version_range import UnfurlVersionRangePipeline +from vulnerabilities.pipelines.v2_improvers.unfurl_version_range import impacted_package_qs from vulnerabilities.pipes.advisory import insert_advisory_v2 from vulnerabilities.tests.pipelines import TestLogger @@ -28,7 +32,7 @@ class TestUnfurlVersionRangePipeline(TestCase): def setUp(self): self.logger = TestLogger() - advisory1 = AdvisoryDataV2( + self.advisory1 = AdvisoryDataV2( summary="Test advisory", aliases=["CVE-2025-0001"], references=[], @@ -48,14 +52,54 @@ def setUp(self): date_published=datetime.now() - timedelta(days=10), url="https://example.com/advisory", ) - insert_advisory_v2( - advisory=advisory1, - pipeline_id="test_pipeline_v2", - logger=self.logger.write, + + self.advisory2 = AdvisoryDataV2( + summary="Test advisory", + aliases=["CVE-2025-0001"], + references=[], + severities=[], + weaknesses=[], + affected_packages=[ + AffectedPackageV2( + package=PackageURL.from_string("pkg:npm/foobar"), + affected_version_range=VersionRange.from_string("vers:npm/>3.2.1|<4.0.0"), + fixed_version_range=VersionRange.from_string("vers:npm/4.0.0"), + introduced_by_commit_patches=[], + fixed_by_commit_patches=[], + ), + AffectedPackageV2( + package=PackageURL.from_string("pkg:npm/foobar"), + affected_version_range=VersionRange.from_string("vers:npm/>4.2.1|<5.0.0"), + fixed_version_range=VersionRange.from_string("vers:npm/5.0.0"), + introduced_by_commit_patches=[], + fixed_by_commit_patches=[], + ), + AffectedPackageV2( + package=PackageURL.from_string("pkg:npm/foobar"), + affected_version_range=None, + fixed_version_range=None, + introduced_by_commit_patches=[], + fixed_by_commit_patches=[ + PackageCommitPatchData( + vcs_url="https://foobar.vcs/", + commit_hash="982f801f", + ), + ], + ), + ], + patches=[], + advisory_id="GHSA-1234", + date_published=datetime.now() - timedelta(days=10), + url="https://example.com/advisory", ) @patch("vulnerabilities.pipelines.v2_improvers.unfurl_version_range.get_purl_versions") def test_affecting_version_range_unfurl(self, mock_fetch): + insert_advisory_v2( + advisory=self.advisory1, + pipeline_id="test_pipeline_v2", + logger=self.logger.write, + ) self.assertEqual(1, PackageV2.objects.count()) mock_fetch.return_value = {"3.4.1", "3.9.0", "2.1.0", "4.0.0", "4.1.0"} pipeline = UnfurlVersionRangePipeline() @@ -67,3 +111,51 @@ def test_affecting_version_range_unfurl(self, mock_fetch): self.assertEqual(3, PackageV2.objects.count()) self.assertEqual(1, impact.fixed_by_packages.count()) self.assertEqual(2, impact.affecting_packages.count()) + + def test_impacted_package_qs_dont_process_empty_vers(self): + insert_advisory_v2( + advisory=self.advisory2, + pipeline_id="test_pipeline_v2", + logger=self.logger.write, + ) + + self.assertEqual(3, ImpactedPackage.objects.count()) + self.assertEqual(2, impacted_package_qs().count()) + + def test_impacted_package_qs_dont_process_empty_vers(self): + insert_advisory_v2( + advisory=self.advisory2, + pipeline_id="test_pipeline_v2", + logger=self.logger.write, + ) + impact = ImpactedPackage.objects.filter(affecting_vers__isnull=False).first() + impact.last_range_unfurl_at = timezone.now() + impact.save() + + self.assertEqual(1, impacted_package_qs().count()) + + def test_impacted_package_qs_prioritize_never_unfurled_impact_first(self): + insert_advisory_v2( + advisory=self.advisory2, + pipeline_id="test_pipeline_v2", + logger=self.logger.write, + ) + impact = ImpactedPackage.objects.filter(affecting_vers__isnull=False).first() + impact.last_range_unfurl_at = timezone.now() - timedelta(days=4) + impact.save() + + self.assertEqual(2, impacted_package_qs().count()) + first_impact_to_process = impacted_package_qs().first() + self.assertEqual(None, first_impact_to_process.last_range_unfurl_at) + + def test_impacted_package_reunfurl_vers(self): + insert_advisory_v2( + advisory=self.advisory2, + pipeline_id="test_pipeline_v2", + logger=self.logger.write, + ) + impact = ImpactedPackage.objects.filter(affecting_vers__isnull=False).first() + impact.last_range_unfurl_at = timezone.now() + impact.save() + + self.assertEqual(1, impacted_package_qs().count()) From 58b7873ea11b7ae00461773ab65199b21e142137 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Tue, 7 Apr 2026 18:49:20 +0530 Subject: [PATCH 369/390] feat: indicate pipeline priority in dashboard Signed-off-by: Keshav Priyadarshi --- vulnerabilities/tasks.py | 2 +- vulnerabilities/templates/pipeline_dashboard.html | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/vulnerabilities/tasks.py b/vulnerabilities/tasks.py index 5a5968831..6c2be3fac 100644 --- a/vulnerabilities/tasks.py +++ b/vulnerabilities/tasks.py @@ -118,7 +118,7 @@ def set_run_failure(job, connection, type, value, traceback): def enqueue_pipeline(pipeline_id): pipeline_schedule = models.PipelineSchedule.objects.get(pipeline_id=pipeline_id) - queue = queues.get(pipeline_schedule.get_priority_display()) + queue = queues.get(pipeline_schedule.get_run_priority_display()) if pipeline_schedule.status in [ models.PipelineRun.Status.RUNNING, diff --git a/vulnerabilities/templates/pipeline_dashboard.html b/vulnerabilities/templates/pipeline_dashboard.html index a7f4139a4..fc474efe7 100644 --- a/vulnerabilities/templates/pipeline_dashboard.html +++ b/vulnerabilities/templates/pipeline_dashboard.html @@ -62,6 +62,7 @@

Pipeline Dashboard

Pipeline ID
Active
+
Priority
Interval
Status
Last Run End Time
@@ -79,6 +80,7 @@

Pipeline Dashboard

{{ schedule.pipeline_id }}
{{ schedule.is_active|yesno:"Yes,No" }}
+
{{ schedule.get_run_priority_display|capfirst}}
{% if schedule.is_run_once %} Once From 7f30e3a343f94748fce6fef805c11abb0465ca64 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Wed, 8 Apr 2026 15:02:21 +0530 Subject: [PATCH 370/390] feat: track last successful unfurl date Signed-off-by: Keshav Priyadarshi --- ...edpackage_last_range_unfurl_at_and_more.py | 2 +- ...package_last_successful_range_unfurl_at.py | 23 ++++++++++++++ vulnerabilities/models.py | 13 ++++++-- .../v2_improvers/unfurl_version_range.py | 31 +++++++++++++------ vulnerabilities/pipes/openssl.py | 4 +-- .../v2_importers/test_collect_fix_commit.py | 4 +-- .../v2_improvers/test_unfurl_version_range.py | 2 ++ vulnerabilities/tests/test_api.py | 6 ++-- 8 files changed, 63 insertions(+), 22 deletions(-) create mode 100644 vulnerabilities/migrations/0121_impactedpackage_last_successful_range_unfurl_at.py diff --git a/vulnerabilities/migrations/0120_impactedpackage_last_range_unfurl_at_and_more.py b/vulnerabilities/migrations/0120_impactedpackage_last_range_unfurl_at_and_more.py index f6b5d8a93..1cd5d7828 100644 --- a/vulnerabilities/migrations/0120_impactedpackage_last_range_unfurl_at_and_more.py +++ b/vulnerabilities/migrations/0120_impactedpackage_last_range_unfurl_at_and_more.py @@ -1,4 +1,4 @@ -# Generated by Django 5.2.11 on 2026-04-06 20:51 +# Generated by Django 5.2.11 on 2026-04-08 09:28 from django.db import migrations, models diff --git a/vulnerabilities/migrations/0121_impactedpackage_last_successful_range_unfurl_at.py b/vulnerabilities/migrations/0121_impactedpackage_last_successful_range_unfurl_at.py new file mode 100644 index 000000000..f9277183b --- /dev/null +++ b/vulnerabilities/migrations/0121_impactedpackage_last_successful_range_unfurl_at.py @@ -0,0 +1,23 @@ +# Generated by Django 5.2.11 on 2026-04-08 09:28 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("vulnerabilities", "0120_impactedpackage_last_range_unfurl_at_and_more"), + ] + + operations = [ + migrations.AddField( + model_name="impactedpackage", + name="last_successful_range_unfurl_at", + field=models.DateTimeField( + blank=True, + db_index=True, + help_text="Timestamp of the last successful vers range unfurl.", + null=True, + ), + ), + ] diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index bb4390f2a..a802f7011 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -1139,9 +1139,9 @@ def get_affecting_vulnerabilities(self): next_fixed_package_vulns = list(fixed_by_pkg.affected_by) fixed_by_package_details["fixed_by_purl"] = fixed_by_purl - fixed_by_package_details[ - "fixed_by_purl_vulnerabilities" - ] = next_fixed_package_vulns + fixed_by_package_details["fixed_by_purl_vulnerabilities"] = ( + next_fixed_package_vulns + ) fixed_by_pkgs.append(fixed_by_package_details) vuln_details["fixed_by_package_details"] = fixed_by_pkgs @@ -3259,6 +3259,13 @@ class ImpactedPackage(models.Model): help_text="Timestamp of the last vers range unfurl.", ) + last_successful_range_unfurl_at = models.DateTimeField( + blank=True, + null=True, + db_index=True, + help_text="Timestamp of the last successful vers range unfurl.", + ) + def to_dict(self): from vulnerabilities.utils import purl_to_dict diff --git a/vulnerabilities/pipelines/v2_improvers/unfurl_version_range.py b/vulnerabilities/pipelines/v2_improvers/unfurl_version_range.py index cb65f5f93..48d691fe0 100644 --- a/vulnerabilities/pipelines/v2_improvers/unfurl_version_range.py +++ b/vulnerabilities/pipelines/v2_improvers/unfurl_version_range.py @@ -52,20 +52,25 @@ def unfurl_version_range(self): processed_impacted_packages_count = 0 processed_affected_packages_count = 0 cached_versions = {} - impacts_to_update = [] - update_batch_size = 5 + update_unfurl_date = [] + update_successful_unfurl_date = [] + update_batch_size = 5000 + chunk_size = 5000 impacted_packages = impacted_package_qs(cutoff_day=self.reunfurl_after_days) impacted_packages_count = impacted_packages.count() self.log(f"Unfurl affected vers range for {impacted_packages_count:,d} ImpactedPackage.") - progress = LoopProgress(total_iterations=impacted_packages_count, logger=self.log) - for impact in progress.iter(impacted_packages.iterator(chunk_size=5000)): - impacts_to_update.append(impact.pk) + progress = LoopProgress( + total_iterations=impacted_packages_count, progress_step=5, logger=self.log + ) + for impact in progress.iter(impacted_packages.iterator(chunk_size=chunk_size)): + update_unfurl_date.append(impact.pk) purl = PackageURL.from_string(impact.base_purl) if not impact.affecting_vers or not any( c in impact.affecting_vers for c in ("<", ">", "!") ): + update_successful_unfurl_date.append(impact.pk) continue if purl.type not in FETCHCODE_SUPPORTED_ECOSYSTEMS: continue @@ -87,17 +92,25 @@ def unfurl_version_range(self): relation=ImpactedPackageAffecting, logger=self.log, ) + update_successful_unfurl_date.append(impact.pk) processed_impacted_packages_count += 1 - if len(impacts_to_update) > update_batch_size: - ImpactedPackage.objects.filter(pk__in=impacts_to_update).update( + if len(update_unfurl_date) > update_batch_size: + ImpactedPackage.objects.filter(pk__in=update_unfurl_date).update( last_range_unfurl_at=timezone.now() ) - impacts_to_update.clear() + ImpactedPackage.objects.filter(pk__in=update_successful_unfurl_date).update( + last_successful_range_unfurl_at=timezone.now() + ) + update_unfurl_date.clear() + update_successful_unfurl_date.clear() - ImpactedPackage.objects.filter(pk__in=impacts_to_update).update( + ImpactedPackage.objects.filter(pk__in=update_unfurl_date).update( last_range_unfurl_at=timezone.now() ) + ImpactedPackage.objects.filter(pk__in=update_successful_unfurl_date).update( + last_successful_range_unfurl_at=timezone.now() + ) self.log(f"Successfully processed {processed_impacted_packages_count:,d} ImpactedPackage.") self.log(f"{processed_affected_packages_count:,d} new Impact-Package relation created.") diff --git a/vulnerabilities/pipes/openssl.py b/vulnerabilities/pipes/openssl.py index b240f416c..1dffdedc1 100644 --- a/vulnerabilities/pipes/openssl.py +++ b/vulnerabilities/pipes/openssl.py @@ -89,9 +89,7 @@ def get_reference(reference_name, tag, reference_url): ref_type = ( AdvisoryReference.COMMIT if "commit" in name or tag == "patch" - else AdvisoryReference.ADVISORY - if "advisory" in name - else AdvisoryReference.OTHER + else AdvisoryReference.ADVISORY if "advisory" in name else AdvisoryReference.OTHER ) return ReferenceV2( diff --git a/vulnerabilities/tests/pipelines/v2_importers/test_collect_fix_commit.py b/vulnerabilities/tests/pipelines/v2_importers/test_collect_fix_commit.py index dac2c7781..9a687a3b7 100644 --- a/vulnerabilities/tests/pipelines/v2_importers/test_collect_fix_commit.py +++ b/vulnerabilities/tests/pipelines/v2_importers/test_collect_fix_commit.py @@ -52,9 +52,7 @@ def test_collect_fix_commits_groups_by_vuln(mock_repo, pipeline): side_effect=lambda c: ( ["CVE-2021-0001"] if "CVE" in c.message - else ["GHSA-dead-beef-baad"] - if "GHSA" in c.message - else [] + else ["GHSA-dead-beef-baad"] if "GHSA" in c.message else [] ) ) diff --git a/vulnerabilities/tests/pipelines/v2_improvers/test_unfurl_version_range.py b/vulnerabilities/tests/pipelines/v2_improvers/test_unfurl_version_range.py index ae23930f6..3d73c6884 100644 --- a/vulnerabilities/tests/pipelines/v2_improvers/test_unfurl_version_range.py +++ b/vulnerabilities/tests/pipelines/v2_improvers/test_unfurl_version_range.py @@ -111,6 +111,8 @@ def test_affecting_version_range_unfurl(self, mock_fetch): self.assertEqual(3, PackageV2.objects.count()) self.assertEqual(1, impact.fixed_by_packages.count()) self.assertEqual(2, impact.affecting_packages.count()) + self.assertNotEqual(None, impact.last_range_unfurl_at) + self.assertNotEqual(None, impact.last_successful_range_unfurl_at) def test_impacted_package_qs_dont_process_empty_vers(self): insert_advisory_v2( diff --git a/vulnerabilities/tests/test_api.py b/vulnerabilities/tests/test_api.py index 9ed647099..31f2b7774 100644 --- a/vulnerabilities/tests/test_api.py +++ b/vulnerabilities/tests/test_api.py @@ -75,9 +75,9 @@ def cleaned_response(response): reference["scores"] = sorted( reference["scores"], key=lambda x: (x["value"], x["scoring_system"]) ) - package_data["resolved_vulnerabilities"][index]["references"][index2][ - "scores" - ] = reference["scores"] + package_data["resolved_vulnerabilities"][index]["references"][index2]["scores"] = ( + reference["scores"] + ) cleaned_response.append(package_data) From 41484a582936db313a3a846f00216190895d9026 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Wed, 8 Apr 2026 16:18:56 +0530 Subject: [PATCH 371/390] refactor: squash migrations Signed-off-by: Keshav Priyadarshi --- ...edpackage_last_range_unfurl_at_and_more.py | 12 +++++++++- ...package_last_successful_range_unfurl_at.py | 23 ------------------- 2 files changed, 11 insertions(+), 24 deletions(-) delete mode 100644 vulnerabilities/migrations/0121_impactedpackage_last_successful_range_unfurl_at.py diff --git a/vulnerabilities/migrations/0120_impactedpackage_last_range_unfurl_at_and_more.py b/vulnerabilities/migrations/0120_impactedpackage_last_range_unfurl_at_and_more.py index 1cd5d7828..6e070bde4 100644 --- a/vulnerabilities/migrations/0120_impactedpackage_last_range_unfurl_at_and_more.py +++ b/vulnerabilities/migrations/0120_impactedpackage_last_range_unfurl_at_and_more.py @@ -1,4 +1,4 @@ -# Generated by Django 5.2.11 on 2026-04-08 09:28 +# Generated by Django 5.2.11 on 2026-04-08 10:48 from django.db import migrations, models @@ -20,6 +20,16 @@ class Migration(migrations.Migration): null=True, ), ), + migrations.AddField( + model_name="impactedpackage", + name="last_successful_range_unfurl_at", + field=models.DateTimeField( + blank=True, + db_index=True, + help_text="Timestamp of the last successful vers range unfurl.", + null=True, + ), + ), migrations.AddField( model_name="pipelineschedule", name="run_priority", diff --git a/vulnerabilities/migrations/0121_impactedpackage_last_successful_range_unfurl_at.py b/vulnerabilities/migrations/0121_impactedpackage_last_successful_range_unfurl_at.py deleted file mode 100644 index f9277183b..000000000 --- a/vulnerabilities/migrations/0121_impactedpackage_last_successful_range_unfurl_at.py +++ /dev/null @@ -1,23 +0,0 @@ -# Generated by Django 5.2.11 on 2026-04-08 09:28 - -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ("vulnerabilities", "0120_impactedpackage_last_range_unfurl_at_and_more"), - ] - - operations = [ - migrations.AddField( - model_name="impactedpackage", - name="last_successful_range_unfurl_at", - field=models.DateTimeField( - blank=True, - db_index=True, - help_text="Timestamp of the last successful vers range unfurl.", - null=True, - ), - ), - ] From e7bbd898ab4efcc2326744eb7c4fc30bcfa3a552 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Wed, 8 Apr 2026 18:46:45 +0530 Subject: [PATCH 372/390] chore: prepare v38.2.0 release Signed-off-by: Keshav Priyadarshi --- CHANGELOG.rst | 5 +++++ setup.cfg | 2 +- vulnerablecode/__init__.py | 2 +- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index f52bf437a..3aaed5212 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,11 @@ Release notes ============= +Version v38.2.0 +--------------------- + +- feat: add high priority queue and run version range unfurling pipeline more frequently (https://github.com/aboutcode-org/vulnerablecode/pull/2256) + Version v38.1.0 --------------------- diff --git a/setup.cfg b/setup.cfg index 16dbe9b9a..b91fd55a3 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = vulnerablecode -version = 38.1.0 +version = 38.2.0 license = Apache-2.0 AND CC-BY-SA-4.0 # description must be on ONE line https://github.com/pypa/setuptools/issues/1390 diff --git a/vulnerablecode/__init__.py b/vulnerablecode/__init__.py index 4966e4c04..86794a0b9 100644 --- a/vulnerablecode/__init__.py +++ b/vulnerablecode/__init__.py @@ -14,7 +14,7 @@ import git -__version__ = "38.1.0" +__version__ = "38.2.0" PROJECT_DIR = Path(__file__).resolve().parent From 8b90efbc11ad0d3f889fdfb0ad39b64a73408ae9 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Wed, 8 Apr 2026 19:34:57 +0530 Subject: [PATCH 373/390] chore: enable verbose logging release ci and prepare v38.3.0 release Signed-off-by: Keshav Priyadarshi --- .github/workflows/pypi-release.yml | 2 ++ CHANGELOG.rst | 2 +- setup.cfg | 2 +- vulnerablecode/__init__.py | 2 +- 4 files changed, 5 insertions(+), 3 deletions(-) diff --git a/.github/workflows/pypi-release.yml b/.github/workflows/pypi-release.yml index 9fa9534c5..c56392a1e 100644 --- a/.github/workflows/pypi-release.yml +++ b/.github/workflows/pypi-release.yml @@ -85,3 +85,5 @@ jobs: - name: Publish to PyPI if: startsWith(github.ref, 'refs/tags/') uses: pypa/gh-action-pypi-publish@ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e # v1.13.0 + with: + verbose: true diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 3aaed5212..a517b4561 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,7 +1,7 @@ Release notes ============= -Version v38.2.0 +Version v38.3.0 --------------------- - feat: add high priority queue and run version range unfurling pipeline more frequently (https://github.com/aboutcode-org/vulnerablecode/pull/2256) diff --git a/setup.cfg b/setup.cfg index b91fd55a3..00b785eea 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = vulnerablecode -version = 38.2.0 +version = 38.3.0 license = Apache-2.0 AND CC-BY-SA-4.0 # description must be on ONE line https://github.com/pypa/setuptools/issues/1390 diff --git a/vulnerablecode/__init__.py b/vulnerablecode/__init__.py index 86794a0b9..7027b67c2 100644 --- a/vulnerablecode/__init__.py +++ b/vulnerablecode/__init__.py @@ -14,7 +14,7 @@ import git -__version__ = "38.2.0" +__version__ = "38.3.0" PROJECT_DIR = Path(__file__).resolve().parent From c6dca58011218c1b41fa140dcdc8f52ae10dcf81 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Fri, 10 Apr 2026 17:45:32 +0530 Subject: [PATCH 374/390] fix: run pipeline scheduling jobs in respective queues - Instead of running all scheduling jobs in default queue, use each pipeline's assigned queue for scheduling. Signed-off-by: Keshav Priyadarshi --- vulnerabilities/models.py | 6 +++++- vulnerabilities/schedules.py | 2 ++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index a802f7011..497f35a69 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -2351,7 +2351,11 @@ def save(self, *args, **kwargs): if not self.pk: self.schedule_work_id = self.create_new_job(execute_now=True) elif self.pk and (existing := PipelineSchedule.objects.get(pk=self.pk)): - if existing.is_active != self.is_active or existing.run_interval != self.run_interval: + if ( + existing.is_active != self.is_active + or existing.run_interval != self.run_interval + or existing.run_priority != self.run_priority + ): self.schedule_work_id = self.create_new_job() self.full_clean() return super().save(*args, **kwargs) diff --git a/vulnerabilities/schedules.py b/vulnerabilities/schedules.py index 215129e10..27fb7e09a 100644 --- a/vulnerabilities/schedules.py +++ b/vulnerabilities/schedules.py @@ -24,6 +24,7 @@ def schedule_execution(pipeline_schedule, execute_now=False): Takes a `PackageSchedule` object as input and schedule a recurring job using `rq_scheduler` to execute the pipeline. """ + queue_name = pipeline_schedule.get_run_priority_display() first_execution = datetime.datetime.now(tz=datetime.timezone.utc) if not execute_now: first_execution = pipeline_schedule.next_run_date @@ -36,6 +37,7 @@ def schedule_execution(pipeline_schedule, execute_now=False): args=[pipeline_schedule.pipeline_id], interval=interval_in_seconds, repeat=None, + queue_name=queue_name, ) return job._id From 5962ce66abb90cee3f1df7455aabd3e5306afe0e Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Fri, 10 Apr 2026 19:54:05 +0530 Subject: [PATCH 375/390] feat: add function to compute queue load factor Signed-off-by: Keshav Priyadarshi --- vulnerabilities/models.py | 5 +++ vulnerabilities/tasks.py | 67 +++++++++++++++++++++++++++++++++++---- 2 files changed, 65 insertions(+), 7 deletions(-) diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index 497f35a69..e00f067c5 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -2387,6 +2387,11 @@ def all_runs(self): def latest_run(self): return self.pipelineruns.first() if self.pipelineruns.exists() else None + @property + def latest_successful_run(self): + successful_runs = self.pipelineruns.filter(run_end_date__isnull=False, run_exitcode=0) + return successful_runs.first() if successful_runs.exists() else None + @property def earliest_run(self): return self.pipelineruns.earliest("run_start_date") if self.pipelineruns.exists() else None diff --git a/vulnerabilities/tasks.py b/vulnerabilities/tasks.py index 6c2be3fac..2454e041b 100644 --- a/vulnerabilities/tasks.py +++ b/vulnerabilities/tasks.py @@ -9,24 +9,23 @@ import logging +from collections import Counter +from contextlib import suppress from io import StringIO from traceback import format_exc as traceback_format_exc import django_rq +from redis.exceptions import ConnectionError +from rq import Worker from vulnerabilities import models from vulnerabilities.importer import Importer from vulnerabilities.improver import Improver +from vulnerablecode.settings import RQ_QUEUES logger = logging.getLogger(__name__) -default_queue = django_rq.get_queue("default") -high_queue = django_rq.get_queue("high") - -queues = { - "default": django_rq.get_queue("default"), - "high": django_rq.get_queue("high"), -} +queues = {queue: django_rq.get_queue(queue) for queue in RQ_QUEUES.keys()} def execute_pipeline(pipeline_id, run_id): @@ -151,3 +150,57 @@ def dequeue_job(job_id): for queue in queues.values(): if job_id in queue.jobs: queue.remove(job_id) + + +def compute_queue_load_factor(): + """ + Compute worker load per queue. + + Load factor is the ratio of the total compute required to run all active pipelines + in a queue to the available worker capacity for that queue over a 24-hour period. + A value greater than 1 indicates that the number of workers is insufficient to + run all pipelines within the schedule. + + Also compute the additional workers needed to balance each queue + """ + field = models.PipelineSchedule._meta.get_field("run_priority") + label_to_value = {label: value for value, label in field.choices} + total_compute_seconds_per_queue = {} + worker_per_queue = {} + load_per_queue = {} + seconds_in_24_hr = 86400 + + for queue in RQ_QUEUES.keys(): + total_compute_seconds_per_queue[queue] = sum( + (p.latest_successful_run.runtime / (p.run_interval / 24)) + for p in models.PipelineSchedule.objects.filter( + is_active=True, run_priority=label_to_value[queue] + ) + if p.latest_successful_run + ) + + with suppress(ConnectionError): + redis_conn = django_rq.get_connection() + queue_names = [ + w.queue_names()[0] for w in Worker.all(connection=redis_conn) if w.queue_names() + ] + worker_per_queue = dict(Counter(queue_names)) + + for queue_name, worker_count in worker_per_queue.items(): + total_compute = total_compute_seconds_per_queue.get(queue_name, 0) + if worker_count == 0 or total_compute == 0: + continue + + unit_load_on_queue = total_compute / seconds_in_24_hr + + num_of_worker_for_balanced_queue = round(unit_load_on_queue) + addition_worker_needed = max(num_of_worker_for_balanced_queue - worker_count, 0) + + net_load_on_queue = unit_load_on_queue / worker_count + + load_per_queue[queue_name] = { + "load_factor": net_load_on_queue, + "additional_worker": addition_worker_needed, + } + + return dict(sorted(load_per_queue.items(), key=lambda x: x[0], reverse=True)) From 4240d36a21c2301a5719f7f64916f9b1141457c2 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Fri, 10 Apr 2026 20:42:42 +0530 Subject: [PATCH 376/390] fix: track queues with no workers in load factor Signed-off-by: Keshav Priyadarshi --- vulnerabilities/tasks.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/vulnerabilities/tasks.py b/vulnerabilities/tasks.py index 2454e041b..a78c13551 100644 --- a/vulnerabilities/tasks.py +++ b/vulnerabilities/tasks.py @@ -170,6 +170,13 @@ def compute_queue_load_factor(): load_per_queue = {} seconds_in_24_hr = 86400 + with suppress(ConnectionError): + redis_conn = django_rq.get_connection() + queue_names = [ + w.queue_names()[0] for w in Worker.all(connection=redis_conn) if w.queue_names() + ] + worker_per_queue = dict(Counter(queue_names)) + for queue in RQ_QUEUES.keys(): total_compute_seconds_per_queue[queue] = sum( (p.latest_successful_run.runtime / (p.run_interval / 24)) @@ -178,17 +185,13 @@ def compute_queue_load_factor(): ) if p.latest_successful_run ) - - with suppress(ConnectionError): - redis_conn = django_rq.get_connection() - queue_names = [ - w.queue_names()[0] for w in Worker.all(connection=redis_conn) if w.queue_names() - ] - worker_per_queue = dict(Counter(queue_names)) + if queue not in worker_per_queue: + worker_per_queue[queue] = 0 for queue_name, worker_count in worker_per_queue.items(): + net_load_on_queue = "no_worker" total_compute = total_compute_seconds_per_queue.get(queue_name, 0) - if worker_count == 0 or total_compute == 0: + if total_compute == 0: continue unit_load_on_queue = total_compute / seconds_in_24_hr @@ -196,7 +199,8 @@ def compute_queue_load_factor(): num_of_worker_for_balanced_queue = round(unit_load_on_queue) addition_worker_needed = max(num_of_worker_for_balanced_queue - worker_count, 0) - net_load_on_queue = unit_load_on_queue / worker_count + if worker_count > 0: + net_load_on_queue = unit_load_on_queue / worker_count load_per_queue[queue_name] = { "load_factor": net_load_on_queue, From 6742ba61154ab5182d4a0eeb22c02129b3fe531e Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Fri, 10 Apr 2026 20:44:11 +0530 Subject: [PATCH 377/390] feat: cache computed load factor for 5 minutes Signed-off-by: Keshav Priyadarshi --- vulnerabilities/views.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/vulnerabilities/views.py b/vulnerabilities/views.py index b984fbb51..5b9406f87 100644 --- a/vulnerabilities/views.py +++ b/vulnerabilities/views.py @@ -15,6 +15,7 @@ from cvss.exceptions import CVSS4MalformedError from django.contrib import messages from django.contrib.auth.views import LoginView +from django.core.cache import cache from django.core.exceptions import ValidationError from django.core.mail import send_mail from django.db.models import Exists @@ -48,6 +49,7 @@ from vulnerabilities.pipelines.v2_importers.epss_importer_v2 import EPSSImporterPipeline from vulnerabilities.severity_systems import EPSS from vulnerabilities.severity_systems import SCORING_SYSTEMS +from vulnerabilities.tasks import compute_queue_load_factor from vulnerabilities.throttling import AnonUserUIThrottle from vulnerabilities.utils import TYPES_WITH_MULTIPLE_IMPORTERS from vulnerabilities.utils import get_advisories_from_groups @@ -57,6 +59,8 @@ PAGE_SIZE = 10 +CACHE_TIMEOUT = 60 * 5 + class VulnerableCodeView(View): """ @@ -961,6 +965,13 @@ def get_queryset(self): def get_context_data(self, **kwargs): context = super().get_context_data(**kwargs) + load_per_queue = cache.get("load_per_queue") + + if load_per_queue is None: + load_per_queue = compute_queue_load_factor() + cache.set("load_per_queue", load_per_queue, CACHE_TIMEOUT) + + context["load_per_queue"] = load_per_queue context["active_pipeline_count"] = PipelineSchedule.objects.filter(is_active=True).count() context["disabled_pipeline_count"] = PipelineSchedule.objects.filter( is_active=False From 065d5a738114dc88a23f049dbcfb2f04334ddd57 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Fri, 10 Apr 2026 20:45:18 +0530 Subject: [PATCH 378/390] feat: show load factor on pipeline dashboard Signed-off-by: Keshav Priyadarshi --- .../templates/pipeline_dashboard.html | 76 +++++++++++++++++-- 1 file changed, 71 insertions(+), 5 deletions(-) diff --git a/vulnerabilities/templates/pipeline_dashboard.html b/vulnerabilities/templates/pipeline_dashboard.html index fc474efe7..826c614ea 100644 --- a/vulnerabilities/templates/pipeline_dashboard.html +++ b/vulnerabilities/templates/pipeline_dashboard.html @@ -1,5 +1,7 @@ {% extends "base.html" %} +{% load utils %} + {% block title %} Pipeline Dashboard {% endblock %} @@ -22,6 +24,18 @@ .column { word-break: break-word; } + + .has-text-orange { + color: #ff8c42 !important; + } + + .has-tooltip-orange::before { + background-color: #ff8c42 !important; + } + + .has-tooltip-orange::after { + border-top-color: #ff8c42 !important; + } {% endblock %} @@ -48,11 +62,63 @@

Pipeline Dashboard

-
-

- {{ active_pipeline_count|default:0 }} active pipeline{{ active_pipeline_count|default:0|pluralize }}, - {{ disabled_pipeline_count|default:0 }} disabled pipeline{{ disabled_pipeline_count|default:0|pluralize }} -

+
+
+ {% if load_per_queue %} +

+ + Load Factor: + + {% for queue_name, values in load_per_queue.items %} + + + {{ queue_name| capfirst }} + + {% with load_factor=values|get_item:"load_factor" additional=values|get_item:"additional_worker" %} + {% if load_factor == "no_worker" %} + + + + {% elif load_factor < 1 %} + + {{ load_factor|floatformat:2 }} + + + {% elif load_factor < 1.6 %} + + {{ load_factor|floatformat:2 }} + + + {% else %} + + {{ load_factor|floatformat:2 }} + + + {% endif %} + {% endwith %} + + {% if not forloop.last %} • {% endif %} + + {% endfor %} +

+ {% endif %} +
+
+

+ {{ active_pipeline_count|default:0 }} active pipeline{{ active_pipeline_count|default:0|pluralize }}, + {{ disabled_pipeline_count|default:0 }} disabled pipeline{{ disabled_pipeline_count|default:0|pluralize }} +

+
From 104a6115d4fba05a0c7ed94f1cfa17ba452ac308 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Fri, 10 Apr 2026 21:21:09 +0530 Subject: [PATCH 379/390] chore: prepare for v38.4.0 release Signed-off-by: Keshav Priyadarshi --- CHANGELOG.rst | 6 ++++++ setup.cfg | 2 +- vulnerablecode/__init__.py | 2 +- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index a517b4561..85d04ed9c 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,12 @@ Release notes ============= +Version v38.4.0 +--------------------- + +- fix: run pipeline scheduling jobs in respective queues (https://github.com/aboutcode-org/vulnerablecode/pull/2263) +- feat: show queue load factors on the pipeline dashboard (https://github.com/aboutcode-org/vulnerablecode/pull/2264) + Version v38.3.0 --------------------- diff --git a/setup.cfg b/setup.cfg index 00b785eea..e1275dae2 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = vulnerablecode -version = 38.3.0 +version = 38.4.0 license = Apache-2.0 AND CC-BY-SA-4.0 # description must be on ONE line https://github.com/pypa/setuptools/issues/1390 diff --git a/vulnerablecode/__init__.py b/vulnerablecode/__init__.py index 7027b67c2..f8263d4c5 100644 --- a/vulnerablecode/__init__.py +++ b/vulnerablecode/__init__.py @@ -14,7 +14,7 @@ import git -__version__ = "38.3.0" +__version__ = "38.4.0" PROJECT_DIR = Path(__file__).resolve().parent From 11d718e3db6401062e6afaeb516c6e16c153d3d1 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Wed, 8 Apr 2026 20:20:36 +0530 Subject: [PATCH 380/390] feat: add field to track the latest advisory for an avid Signed-off-by: Keshav Priyadarshi --- vulnerabilities/models.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index e00f067c5..0be2cf515 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -2886,11 +2886,7 @@ def latest_for_avid(self, avid: str): ) def latest_per_avid(self): - return self.order_by( - "avid", - F("date_collected").desc(nulls_last=True), - "-id", - ).distinct("avid") + return self.filter(is_latest=True) def latest_for_avids(self, avids): return self.filter(avid__in=avids).latest_per_avid() @@ -3007,6 +3003,7 @@ class AdvisoryV2(models.Model): max_length=200, blank=False, null=False, + db_index=True, help_text="Unique ID for the datasource used for this advisory ." "e.g.: nginx_importer_v2", ) @@ -3090,6 +3087,14 @@ class AdvisoryV2(models.Model): help_text="UTC Date on which the advisory was collected", ) + is_latest = models.BooleanField( + default=False, + blank=False, + null=False, + db_index=True, + help_text="Indicates whether this is the latest version of the advisory identified by its AVID.", + ) + original_advisory_text = models.TextField( blank=True, null=True, @@ -3142,6 +3147,11 @@ class AdvisoryV2(models.Model): class Meta: unique_together = ["datasource_id", "advisory_id", "unique_content_id"] ordering = ["datasource_id", "advisory_id", "date_published", "unique_content_id"] + constraints = [ + models.UniqueConstraint( + fields=["avid"], condition=Q(is_latest=True), name="unique_latest_per_avid" + ) + ] indexes = [ models.Index( fields=["avid", "-date_collected", "-id"], From 43f634c7d0f18323bfd2757bfafaa34dd0f95d1d Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Wed, 8 Apr 2026 20:25:32 +0530 Subject: [PATCH 381/390] feat: mark most recent avid as latest when inserting advisory Signed-off-by: Keshav Priyadarshi --- vulnerabilities/pipes/advisory.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/vulnerabilities/pipes/advisory.py b/vulnerabilities/pipes/advisory.py index bcdd95075..9250f2679 100644 --- a/vulnerabilities/pipes/advisory.py +++ b/vulnerabilities/pipes/advisory.py @@ -334,6 +334,13 @@ def insert_advisory_v2( if not created: return advisory_obj + AdvisoryV2.objects.filter( + avid=f"{pipeline_id}/{advisory.advisory_id}", + is_latest=True, + ).update(is_latest=False) + advisory_obj.is_latest = True + advisory_obj.save() + aliases = get_or_create_advisory_aliases(aliases=advisory.aliases) references = get_or_create_advisory_references(references=advisory.references) severities = get_or_create_advisory_severities(severities=advisory.severities) From 7798b964a0a6776ac26a602574a9fc71f98c3691 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Tue, 14 Apr 2026 02:21:53 +0530 Subject: [PATCH 382/390] feat: enable db index on advisory_id field Signed-off-by: Keshav Priyadarshi --- vulnerabilities/models.py | 1 + 1 file changed, 1 insertion(+) diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index 0be2cf515..896da7c76 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -3013,6 +3013,7 @@ class AdvisoryV2(models.Model): blank=False, null=False, unique=False, + db_index=True, help_text="An advisory is a unique vulnerability identifier in some database, " "such as PYSEC-2020-2233", ) From ea463ce0a124fd21a19934a204dfde6946544d62 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Tue, 14 Apr 2026 02:23:07 +0530 Subject: [PATCH 383/390] feat: backfill latest advisory for existing v2 advisories Signed-off-by: Keshav Priyadarshi --- ...t_alter_advisoryv2_advisory_id_and_more.py | 65 +++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 vulnerabilities/migrations/0121_advisoryv2_is_latest_alter_advisoryv2_advisory_id_and_more.py diff --git a/vulnerabilities/migrations/0121_advisoryv2_is_latest_alter_advisoryv2_advisory_id_and_more.py b/vulnerabilities/migrations/0121_advisoryv2_is_latest_alter_advisoryv2_advisory_id_and_more.py new file mode 100644 index 000000000..be1db5016 --- /dev/null +++ b/vulnerabilities/migrations/0121_advisoryv2_is_latest_alter_advisoryv2_advisory_id_and_more.py @@ -0,0 +1,65 @@ +# Generated by Django 5.2.11 on 2026-04-13 19:05 + +from django.db import migrations +from django.db import models +from django.db.models import F + + +class Migration(migrations.Migration): + def add_is_latest_on_existing_advisory(apps, schema_editor): + Advisory = apps.get_model("vulnerabilities", "AdvisoryV2") + + print(f"\nUpdating is_latest on existing V2 Advisory.") + latest_qs = Advisory.objects.order_by( + "avid", + F("date_collected").desc(nulls_last=True), + "-id", + ).distinct("avid") + + Advisory.objects.filter(id__in=latest_qs.values("id")).update(is_latest=True) + + dependencies = [ + ("vulnerabilities", "0120_impactedpackage_last_range_unfurl_at_and_more"), + ] + + operations = [ + migrations.AddField( + model_name="advisoryv2", + name="is_latest", + field=models.BooleanField( + db_index=True, + default=False, + help_text="Indicates whether this is the latest version of the advisory identified by its AVID.", + ), + ), + migrations.AlterField( + model_name="advisoryv2", + name="advisory_id", + field=models.CharField( + db_index=True, + help_text="An advisory is a unique vulnerability identifier in some database, such as PYSEC-2020-2233", + max_length=500, + ), + ), + migrations.AlterField( + model_name="advisoryv2", + name="datasource_id", + field=models.CharField( + db_index=True, + help_text="Unique ID for the datasource used for this advisory .e.g.: nginx_importer_v2", + max_length=200, + ), + ), + migrations.AddConstraint( + model_name="advisoryv2", + constraint=models.UniqueConstraint( + condition=models.Q(("is_latest", True)), + fields=("avid",), + name="unique_latest_per_avid", + ), + ), + migrations.RunPython( + code=add_is_latest_on_existing_advisory, + reverse_code=migrations.RunPython.noop, + ), + ] From d671ebd9a35ebbcc89f6e3cacd65042507da60a6 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Tue, 14 Apr 2026 02:26:51 +0530 Subject: [PATCH 384/390] test: add test for latest advisory data migration Signed-off-by: Keshav Priyadarshi --- vulnerabilities/tests/test_data_migrations.py | 57 +++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/vulnerabilities/tests/test_data_migrations.py b/vulnerabilities/tests/test_data_migrations.py index 8303c4003..c32abb83f 100644 --- a/vulnerabilities/tests/test_data_migrations.py +++ b/vulnerabilities/tests/test_data_migrations.py @@ -12,6 +12,7 @@ from django.db import IntegrityError from django.db import connection from django.db.migrations.executor import MigrationExecutor +from django.db.models import Count from django.test import TestCase from django.utils import timezone from packageurl import PackageURL @@ -1031,3 +1032,59 @@ def test_m2m_relationships_work(self): self.assertIn(commit1, impacted.affecting_commits.all()) self.assertIn(commit2, impacted.fixed_by_commits.all()) + + +class TestLatestAdvisoryV2Migration(TestMigrations): + """Tests is_latest field population on existing v2 advisory.""" + + app_name = "vulnerabilities" + migrate_from = "0120_impactedpackage_last_range_unfurl_at_and_more" + migrate_to = "0121_advisoryv2_is_latest_alter_advisoryv2_advisory_id_and_more" + + def setUpBeforeMigration(self, apps): + AdvisoryV2 = apps.get_model("vulnerabilities", "AdvisoryV2") + + AdvisoryV2.objects.create( + unique_content_id="content_id_old", + url="https://old.example.com", + summary="Old advisory", + advisory_id="test_adv", + avid="test_pipeline/test_adv", + datasource_id="test_pipeline", + ) + + AdvisoryV2.objects.create( + unique_content_id="content_id_old2", + url="https://old.example.com", + summary="Old 2 advisory", + advisory_id="test_adv", + avid="test_pipeline/test_adv", + datasource_id="test_pipeline", + ) + + AdvisoryV2.objects.create( + unique_content_id="content_id_new", + url="https://old.example.com", + summary="New advisory", + advisory_id="test_adv", + avid="test_pipeline/test_adv", + datasource_id="test_pipeline", + ) + + def test_no_duplicate_is_latest_for_avid(self): + AdvisoryV2 = apps.get_model("vulnerabilities", "AdvisoryV2") + + duplicate = ( + AdvisoryV2.objects.filter(is_latest=True) + .values("avid") + .annotate(cnt=Count("id")) + .filter(cnt__gt=1) + ) + + self.assertFalse(duplicate.exists()) + + def test_latest_is_actually_recent(self): + AdvisoryV2 = apps.get_model("vulnerabilities", "AdvisoryV2") + + latest = AdvisoryV2.objects.get(avid="test_pipeline/test_adv", is_latest=True) + self.assertEqual("New advisory", latest.summary) From 55b87aee28c56e885419069eb82d4b943dab9fa2 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Tue, 14 Apr 2026 02:32:47 +0530 Subject: [PATCH 385/390] test: verify latest advisory is updated on new advisory insertion Signed-off-by: Keshav Priyadarshi --- vulnerabilities/tests/pipes/test_advisory.py | 78 ++++++++++++++++++++ 1 file changed, 78 insertions(+) diff --git a/vulnerabilities/tests/pipes/test_advisory.py b/vulnerabilities/tests/pipes/test_advisory.py index 8710b2ea4..67c073b61 100644 --- a/vulnerabilities/tests/pipes/test_advisory.py +++ b/vulnerabilities/tests/pipes/test_advisory.py @@ -18,12 +18,15 @@ from vulnerabilities import models from vulnerabilities.importer import AdvisoryData +from vulnerabilities.importer import AdvisoryDataV2 from vulnerabilities.importer import AffectedPackage +from vulnerabilities.importer import AffectedPackageV2 from vulnerabilities.importer import PackageCommitPatchData from vulnerabilities.importer import Reference from vulnerabilities.models import AdvisoryAlias from vulnerabilities.models import AdvisoryReference from vulnerabilities.models import AdvisorySeverity +from vulnerabilities.models import AdvisoryV2 from vulnerabilities.models import AdvisoryWeakness from vulnerabilities.models import PackageCommitPatch from vulnerabilities.pipes.advisory import get_or_create_advisory_aliases @@ -33,6 +36,8 @@ from vulnerabilities.pipes.advisory import get_or_create_advisory_weaknesses from vulnerabilities.pipes.advisory import get_or_create_aliases from vulnerabilities.pipes.advisory import import_advisory +from vulnerabilities.pipes.advisory import insert_advisory_v2 +from vulnerabilities.tests.pipelines import TestLogger from vulnerabilities.utils import compute_content_id @@ -257,3 +262,76 @@ def test_get_or_create_advisory_commit(advisory_commit): assert isinstance(commit, PackageCommitPatch) assert commit.commit_hash in [c.commit_hash for c in advisory_commit] assert commit.vcs_url in [c.vcs_url for c in advisory_commit] + + +class TestLatestAdvisoryV2(TestCase): + def setUp(self): + self.logger = TestLogger() + self.advisory1 = AdvisoryDataV2( + summary="Test advisory old", + aliases=["CVE-2025-0001"], + references=[], + severities=[], + weaknesses=[], + affected_packages=[ + AffectedPackageV2( + package=PackageURL.from_string("pkg:npm/foobar"), + affected_version_range=VersionRange.from_string("vers:npm/>3.2.1|<4.0.0"), + fixed_version_range=VersionRange.from_string("vers:npm/4.0.0"), + introduced_by_commit_patches=[], + fixed_by_commit_patches=[], + ), + ], + patches=[], + advisory_id="GHSA-1234", + url="https://example.com/advisory", + ) + + self.advisory2 = AdvisoryDataV2( + summary="Test advisory new", + aliases=["CVE-2025-0001"], + references=[], + severities=[], + weaknesses=[], + affected_packages=[ + AffectedPackageV2( + package=PackageURL.from_string("pkg:npm/foobar"), + affected_version_range=VersionRange.from_string("vers:npm/>3.2.1|<4.0.0"), + fixed_version_range=VersionRange.from_string("vers:npm/4.0.0"), + introduced_by_commit_patches=[], + fixed_by_commit_patches=[], + ), + AffectedPackageV2( + package=PackageURL.from_string("pkg:npm/foobar"), + affected_version_range=None, + fixed_version_range=None, + introduced_by_commit_patches=[], + fixed_by_commit_patches=[ + PackageCommitPatchData( + vcs_url="https://foobar.vcs/", + commit_hash="982f801f", + ), + ], + ), + ], + patches=[], + advisory_id="GHSA-1234", + url="https://example.com/advisory", + ) + + insert_advisory_v2( + advisory=self.advisory1, + pipeline_id="test_pipeline_v2", + logger=self.logger.write, + ) + + def test_latest_advisory_update_on_advisory_insert(self): + adv_old = AdvisoryV2.objects.get(avid="test_pipeline_v2/GHSA-1234", is_latest=True) + insert_advisory_v2( + advisory=self.advisory2, + pipeline_id="test_pipeline_v2", + logger=self.logger.write, + ) + adv_new = AdvisoryV2.objects.get(avid="test_pipeline_v2/GHSA-1234", is_latest=True) + self.assertEqual("Test advisory old", adv_old.summary) + self.assertEqual("Test advisory new", adv_new.summary) From afd9d53232cbfaf669cd13ec2f119e6b2087e6ca Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Tue, 14 Apr 2026 03:38:49 +0530 Subject: [PATCH 386/390] test: populate is_latest in old advisory fixture Signed-off-by: Keshav Priyadarshi --- .../v2_improvers/test_collect_ssvc_trees.py | 2 + .../test_compute_package_risk_v2.py | 1 + .../v2_improvers/test_relate_severities.py | 9 ++ vulnerabilities/tests/test_api_v3.py | 18 ++- .../test_same_avid_different_content_id.py | 136 +++++------------- 5 files changed, 60 insertions(+), 106 deletions(-) diff --git a/vulnerabilities/tests/pipelines/v2_improvers/test_collect_ssvc_trees.py b/vulnerabilities/tests/pipelines/v2_improvers/test_collect_ssvc_trees.py index fa6719311..ad4a6bcb6 100644 --- a/vulnerabilities/tests/pipelines/v2_improvers/test_collect_ssvc_trees.py +++ b/vulnerabilities/tests/pipelines/v2_improvers/test_collect_ssvc_trees.py @@ -36,6 +36,7 @@ def vulnrichment_advisory(db): url="https://example.com/advisory/TEST-2024-0001", unique_content_id="unique-1234", date_collected=datetime.now(), + is_latest=True, ) @@ -59,6 +60,7 @@ def related_advisory(db): url="https://example.com/related/TEST-2024-0001", unique_content_id="unique-5678", date_collected=datetime.now(), + is_latest=True, ) diff --git a/vulnerabilities/tests/pipelines/v2_improvers/test_compute_package_risk_v2.py b/vulnerabilities/tests/pipelines/v2_improvers/test_compute_package_risk_v2.py index db6ffd5d3..305abf429 100644 --- a/vulnerabilities/tests/pipelines/v2_improvers/test_compute_package_risk_v2.py +++ b/vulnerabilities/tests/pipelines/v2_improvers/test_compute_package_risk_v2.py @@ -34,6 +34,7 @@ def test_simple_risk_pipeline(): unique_content_id="ajkef", url="https://test.com", date_collected=datetime.now(), + is_latest=True, ) adv.save() diff --git a/vulnerabilities/tests/pipelines/v2_improvers/test_relate_severities.py b/vulnerabilities/tests/pipelines/v2_improvers/test_relate_severities.py index 2dadbc679..27cf1f849 100644 --- a/vulnerabilities/tests/pipelines/v2_improvers/test_relate_severities.py +++ b/vulnerabilities/tests/pipelines/v2_improvers/test_relate_severities.py @@ -25,6 +25,7 @@ def test_relate_severities_by_advisory_id(): unique_content_id="ab1", url="https://example.com/advisory/CVE-2024-0001", date_collected="2024-01-01", + is_latest=True, ) severity_advisory = AdvisoryV2.objects.create( @@ -34,6 +35,7 @@ def test_relate_severities_by_advisory_id(): unique_content_id="ab2", url="https://example.com/epss/CVE-2024-0001", date_collected="2024-01-02", + is_latest=True, ) severity_advisory.severities.create( scoring_system=EPSS.identifier, @@ -59,6 +61,7 @@ def test_relate_severities_via_alias(): unique_content_id="ab3", url="https://example.com/advisory/CVE-2024-0002", date_collected="2024-01-01", + is_latest=True, ) base.aliases.create(alias="CVE-2024-ALIAS") @@ -70,6 +73,7 @@ def test_relate_severities_via_alias(): unique_content_id="ab4", url="https://example.com/epss/CVE-2024-ALIAS", date_collected="2024-01-02", + is_latest=True, ) severity_advisory.severities.create( scoring_system=EPSS.identifier, @@ -91,6 +95,7 @@ def test_no_self_relation_created(): url="https://example.com/advisory/CVE-2024-0003", date_collected="2024-01-03", avid="epss/CVE-2024-0003", + is_latest=True, ) advisory.severities.create( scoring_system=EPSS.identifier, @@ -112,6 +117,7 @@ def test_unsupported_severity_system_is_ignored(): url="https://example.com/advisory/CVE-2024-0004", date_collected="2024-01-01", avid="nvd/CVE-2024-0004", + is_latest=True, ) severity_advisory = AdvisoryV2.objects.create( @@ -121,6 +127,7 @@ def test_unsupported_severity_system_is_ignored(): url="https://example.com/epss/CVE-2024-0004", date_collected="2024-01-02", avid="epss/CVE-2024-0004", + is_latest=True, ) severity_advisory.severities.create( scoring_system="UNKNOWN_SYSTEM", @@ -142,6 +149,7 @@ def test_pipeline_is_idempotent(): url="https://example.com/advisory/CVE-2024-0005", date_collected="2024-01-01", avid="nvd/CVE-2024-0005", + is_latest=True, ) severity = AdvisoryV2.objects.create( @@ -150,6 +158,7 @@ def test_pipeline_is_idempotent(): unique_content_id="ab9", url="https://example.com/epss/CVE-2024-0005", date_collected="2024-01-02", + is_latest=True, avid="epss/CVE-2024-0005", ) severity.severities.create( diff --git a/vulnerabilities/tests/test_api_v3.py b/vulnerabilities/tests/test_api_v3.py index 36dd7fba1..137692abf 100644 --- a/vulnerabilities/tests/test_api_v3.py +++ b/vulnerabilities/tests/test_api_v3.py @@ -14,22 +14,26 @@ from rest_framework.test import APITestCase from univers.version_range import PypiVersionRange +from vulnerabilities.importer import AdvisoryDataV2 from vulnerabilities.models import AdvisoryV2 from vulnerabilities.models import PackageV2 from vulnerabilities.pipes.advisory import insert_advisory_v2 +from vulnerabilities.tests.pipelines import TestLogger class APIV3TestCase(APITestCase): def setUp(self): from vulnerabilities.models import ImpactedPackage - self.advisory = AdvisoryV2.objects.create( - datasource_id="ghsa", - advisory_id="GHSA-1234", - avid="ghsa/GHSA-1234", - unique_content_id="f" * 64, - url="https://example.com/advisory", - date_collected="2025-07-01T00:00:00Z", + self.logger = TestLogger() + self.advisory = insert_advisory_v2( + advisory=AdvisoryDataV2( + summary="summary", + advisory_id="GHSA-1234", + url="https://example.com/advisory", + ), + pipeline_id="ghsa", + logger=self.logger.write, ) self.package = PackageV2.objects.from_purl(purl="pkg:pypi/sample@1.0.0") diff --git a/vulnerabilities/tests/test_same_avid_different_content_id.py b/vulnerabilities/tests/test_same_avid_different_content_id.py index a366d1872..1dc6dd686 100644 --- a/vulnerabilities/tests/test_same_avid_different_content_id.py +++ b/vulnerabilities/tests/test_same_avid_different_content_id.py @@ -7,13 +7,14 @@ # See https://aboutcode.org for more information about nexB OSS projects. # -import uuid -from datetime import timedelta - import pytest -from django.utils.timezone import now +from vulnerabilities.importer import AdvisoryDataV2 from vulnerabilities.models import AdvisoryV2 +from vulnerabilities.pipes.advisory import insert_advisory_v2 +from vulnerabilities.tests.pipelines import TestLogger + +logger = TestLogger() @pytest.fixture @@ -22,45 +23,29 @@ def advisory_factory(db): Factory to create AdvisoryV2 objects with minimal required fields. """ - def _create(*, avid, advisory_id, collected_at): - return AdvisoryV2.objects.create( - datasource_id="test_source", - advisory_id=advisory_id, - avid=avid, - unique_content_id=str(uuid.uuid4()), - url="https://example.com/advisory", - date_collected=collected_at, + def _create(*, advisory_id, summary): + + return insert_advisory_v2( + advisory=AdvisoryDataV2( + summary=summary, + advisory_id=advisory_id, + url="https://example.com/advisory", + ), + pipeline_id="source", + logger=logger.write, ) return _create -@pytest.fixture -def timestamps(): - now_ts = now() - return { - "old": now_ts - timedelta(days=3), - "mid": now_ts - timedelta(days=1), - "new": now_ts, - } - - @pytest.mark.django_db def test_latest_for_avid_returns_latest_by_date_collected( - advisory_factory, timestamps, django_assert_num_queries + advisory_factory, django_assert_num_queries ): avid = "source/ADV-1" - older = advisory_factory( - avid=avid, - advisory_id="ADV-1", - collected_at=timestamps["old"], - ) - newer = advisory_factory( - avid=avid, - advisory_id="ADV-1", - collected_at=timestamps["new"], - ) + older = advisory_factory(advisory_id="ADV-1", summary="old advisory") + newer = advisory_factory(advisory_id="ADV-1", summary="new advisory") with django_assert_num_queries(1): result = AdvisoryV2.objects.latest_for_avid(avid) @@ -70,20 +55,11 @@ def test_latest_for_avid_returns_latest_by_date_collected( @pytest.mark.django_db -def test_latest_for_avid_tie_breaks_by_id(advisory_factory, timestamps, django_assert_num_queries): +def test_latest_for_avid_tie_breaks_by_id(advisory_factory, django_assert_num_queries): avid = "source/ADV-2" - ts = timestamps["mid"] - - first = advisory_factory( - avid=avid, - advisory_id="ADV-2", - collected_at=ts, - ) - second = advisory_factory( - avid=avid, - advisory_id="ADV-2", - collected_at=ts, - ) + + first = advisory_factory(advisory_id="ADV-2", summary="old advisory") + second = advisory_factory(advisory_id="ADV-2", summary="new advisory") with django_assert_num_queries(1): result = AdvisoryV2.objects.latest_for_avid(avid) @@ -92,25 +68,11 @@ def test_latest_for_avid_tie_breaks_by_id(advisory_factory, timestamps, django_a @pytest.mark.django_db -def test_latest_per_avid_returns_one_row_per_avid( - advisory_factory, timestamps, django_assert_num_queries -): - advisory_factory( - avid="source/A", - advisory_id="A", - collected_at=timestamps["old"], - ) - latest_a = advisory_factory( - avid="source/A", - advisory_id="A", - collected_at=timestamps["new"], - ) - - latest_b = advisory_factory( - avid="source/B", - advisory_id="B", - collected_at=timestamps["mid"], - ) +def test_latest_per_avid_returns_one_row_per_avid(advisory_factory, django_assert_num_queries): + advisory_factory(advisory_id="A", summary="old advisory") + latest_a = advisory_factory(advisory_id="A", summary="new advisory") + + latest_b = advisory_factory(advisory_id="B", summary="new advisory") with django_assert_num_queries(1): qs = AdvisoryV2.objects.latest_per_avid() @@ -122,19 +84,11 @@ def test_latest_per_avid_returns_one_row_per_avid( @pytest.mark.django_db -def test_latest_per_avid_excludes_older_versions(advisory_factory, timestamps): +def test_latest_per_avid_excludes_older_versions(advisory_factory): avid = "source/C" - older = advisory_factory( - avid=avid, - advisory_id="C", - collected_at=timestamps["old"], - ) - latest = advisory_factory( - avid=avid, - advisory_id="C", - collected_at=timestamps["new"], - ) + older = advisory_factory(advisory_id="C", summary="old advisory") + latest = advisory_factory(advisory_id="C", summary="new advisory") results = list(AdvisoryV2.objects.latest_per_avid()) @@ -144,30 +98,14 @@ def test_latest_per_avid_excludes_older_versions(advisory_factory, timestamps): @pytest.mark.django_db def test_latest_for_avids_filters_and_collapses_correctly( - advisory_factory, timestamps, django_assert_num_queries + advisory_factory, django_assert_num_queries ): - advisory_factory( - avid="source/A", - advisory_id="A", - collected_at=timestamps["old"], - ) - latest_a = advisory_factory( - avid="source/A", - advisory_id="A", - collected_at=timestamps["new"], - ) - - latest_b = advisory_factory( - avid="source/B", - advisory_id="B", - collected_at=timestamps["mid"], - ) - - advisory_factory( - avid="source/C", - advisory_id="C", - collected_at=timestamps["new"], - ) + + advisory_factory(advisory_id="A", summary="old advisory") + latest_a = advisory_factory(advisory_id="A", summary="new advisory") + + advisory_factory(advisory_id="B", summary="old advisory") + latest_b = advisory_factory(advisory_id="B", summary="new advisory") with django_assert_num_queries(1): qs = AdvisoryV2.objects.latest_for_avids({"source/A", "source/B"}) From 3d413f252052c00eee311e2dc3edfc9079958203 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Tue, 14 Apr 2026 17:40:59 +0530 Subject: [PATCH 387/390] Optimize V3 API Signed-off-by: Tushar Goel --- vulnerabilities/api_v3.py | 253 ++++++++++++++++++--------- vulnerabilities/models.py | 42 +++-- vulnerabilities/tests/test_api_v3.py | 2 +- 3 files changed, 202 insertions(+), 95 deletions(-) diff --git a/vulnerabilities/api_v3.py b/vulnerabilities/api_v3.py index c17202f25..a0cb24c91 100644 --- a/vulnerabilities/api_v3.py +++ b/vulnerabilities/api_v3.py @@ -7,6 +7,7 @@ # See https://aboutcode.org for more information about nexB OSS projects. # +from collections import defaultdict from typing import List from urllib.parse import urlencode @@ -21,6 +22,7 @@ from rest_framework.reverse import reverse from rest_framework.throttling import AnonRateThrottle +from vulnerabilities.models import AdvisoryAlias from vulnerabilities.models import AdvisoryReference from vulnerabilities.models import AdvisorySet from vulnerabilities.models import AdvisorySetMember @@ -216,6 +218,26 @@ def get_fixing_vulnerabilities_url(self, obj): def get_affected_by_vulnerabilities(self, package): """Return a dictionary with advisory as keys and their details, including fixed_by_packages.""" + advisories = self.context["advisory_map"].get(package.id, []) + impact_map = self.context["impact_map"].get(package.id, {}) + + if advisories: + result = [] + + for adv in advisories: + fixed = impact_map.get(adv["avid"]) + if not fixed: + continue + + result.append( + { + **adv, + "fixed_by_packages": fixed, + } + ) + + return result + advisories_qs = AdvisoryV2.objects.latest_affecting_advisories_for_purl(package.package_url) advisories = [] @@ -250,56 +272,35 @@ def get_affected_by_vulnerabilities(self, package): "advisory_id": advisory.advisory_id.split("/")[-1], "aliases": [alias.alias for alias in advisory.aliases.all()], "summary": advisory.summary, - "fixed_by_packages": [pkg.purl for pkg in impact.fixed_by_packages.all()], "severity": advisory.weighted_severity, "exploitability": advisory.exploitability, "risk_score": advisory.risk_score, + "fixed_by_packages": [pkg.purl for pkg in impact.fixed_by_packages.all()], } ) return result - is_grouped = AdvisorySet.objects.filter(package=package, relation_type="affecting").exists() - - if is_grouped: - affected_by_advisories_qs = ( - AdvisorySet.objects.filter(package=package, relation_type="affecting") - .select_related("primary_advisory") - .prefetch_related( - Prefetch( - "members", - queryset=AdvisorySetMember.objects.filter(is_primary=False).select_related( - "advisory" - ), - to_attr="secondary_members", - ) + if not advisories: + if package.type in TYPES_WITH_MULTIPLE_IMPORTERS: + advisories_qs = advisories_qs.prefetch_related( + "aliases", + "impacted_packages__affecting_packages", + "impacted_packages__fixed_by_packages", ) - ) - - affected_groups = [ - Group( - aliases=list(adv.aliases.all()), - primary=adv.primary_advisory, - secondaries=[member.advisory for member in adv.secondary_members], + advisories: List[GroupedAdvisory] = merge_and_save_grouped_advisories( + package, advisories_qs, "affecting" ) - for adv in affected_by_advisories_qs - ] + return self.return_advisories_data(package, advisories_qs, advisories) - advisories: List[GroupedAdvisory] = get_advisories_from_groups(affected_groups) - return self.return_advisories_data(package, advisories_qs, advisories) + def get_fixing_vulnerabilities(self, package): + fixing_advisories = AdvisorySet.objects.filter( + package=package, relation_type="fixing" + ).values_list("primary_advisory__advisory_id", flat=True) - if package.type in TYPES_WITH_MULTIPLE_IMPORTERS: - advisories_qs = advisories_qs.prefetch_related( - "aliases", - "impacted_packages__affecting_packages", - "impacted_packages__fixed_by_packages", - ) - advisories: List[GroupedAdvisory] = merge_and_save_grouped_advisories( - package, advisories_qs, "affecting" - ) - return self.return_advisories_data(package, advisories_qs, advisories) + if fixing_advisories: + return [{"advisory_id": adv_id.split("/")[-1]} for adv_id in fixing_advisories] - def get_fixing_vulnerabilities(self, package): advisories_qs = AdvisoryV2.objects.latest_fixed_by_advisories_for_purl(package.package_url) if not package.type in TYPES_WITH_MULTIPLE_IMPORTERS: @@ -319,37 +320,6 @@ def get_fixing_vulnerabilities(self, package): ) return results - advisories = [] - - is_grouped = AdvisorySet.objects.filter(package=package, relation_type="fixing").exists() - - if is_grouped: - fixing_advisories_qs = ( - AdvisorySet.objects.filter(package=package, relation_type="fixing") - .select_related("primary_advisory") - .prefetch_related( - Prefetch( - "members", - queryset=AdvisorySetMember.objects.filter(is_primary=False).select_related( - "advisory" - ), - to_attr="secondary_members", - ) - ) - ) - - fixing_groups = [ - Group( - aliases=list(adv.aliases.all()), - primary=adv.primary_advisory, - secondaries=[member.advisory for member in adv.secondary_members], - ) - for adv in fixing_advisories_qs - ] - - advisories: List[GroupedAdvisory] = get_advisories_from_groups(fixing_groups) - return self.return_fixing_advisories_data(advisories) - if package.type in TYPES_WITH_MULTIPLE_IMPORTERS: advisories_qs = advisories_qs.prefetch_related( "aliases", @@ -409,11 +379,11 @@ def return_advisories_data(self, package, advisories_qs, advisories): return result def get_next_non_vulnerable_version(self, package): - if next_non_vulnerable := package.get_non_vulnerable_versions()[0]: + if next_non_vulnerable := package.next_non_vulnerable_version: return next_non_vulnerable.version def get_latest_non_vulnerable_version(self, package): - if latest_non_vulnerable := package.get_non_vulnerable_versions()[-1]: + if latest_non_vulnerable := package.latest_non_vulnerable_version: return latest_non_vulnerable.version @@ -464,13 +434,11 @@ def create(self, request, *args, **kwargs): query = ( PackageV2.objects.filter(plain_package_url__in=plain_purls) .values_list("plain_package_url", flat=True) - .distinct() .order_by("plain_package_url") ) else: query = ( PackageV2.objects.filter(package_url__in=purls) - .distinct() .order_by("package_url") .values_list("package_url", flat=True) ) @@ -479,20 +447,20 @@ def create(self, request, *args, **kwargs): return self.get_paginated_response(page) if ignore_qualifiers_subpath: - query = ( - PackageV2.objects.filter(plain_package_url__in=plain_purls) - .order_by("plain_package_url") - .distinct("plain_package_url") + query = PackageV2.objects.filter(plain_package_url__in=plain_purls).order_by( + "plain_package_url" ) else: - query = ( - PackageV2.objects.filter(package_url__in=purls) - .order_by("package_url") - .distinct("package_url") - ) + query = PackageV2.objects.filter(package_url__in=purls).order_by("package_url") page = self.paginate_queryset(query) - serializer = self.get_serializer(page, many=True, context={"request": request}) + advisory_map = get_grouped_advisories_bulk(page) + impact_map = get_impacts_bulk(page) + serializer = self.get_serializer( + page, + many=True, + context={"request": request, "advisory_map": advisory_map, "impact_map": impact_map}, + ) return self.get_paginated_response(serializer.data) @@ -592,3 +560,124 @@ class FixingAdvisoriesViewSet(PackageAdvisoriesViewSet): class AffectedByAdvisoriesViewSet(PackageAdvisoriesViewSet): relation = "impacted_packages__affecting_packages__package_url" serializer_class = AffectedByAdvisoryV3Serializer + + +def get_grouped_advisories_bulk(packages): + package_ids = [p.id for p in packages] + + advisory_sets = list( + AdvisorySet.objects.filter( + package_id__in=package_ids, + relation_type="affecting", + ) + .select_related("primary_advisory", "package") + .prefetch_related( + Prefetch("aliases", queryset=AdvisoryAlias.objects.only("alias")), + Prefetch( + "members", + queryset=AdvisorySetMember.objects.filter(is_primary=False) + .select_related("advisory") + .only( + "advisory__avid", + "advisory__weighted_severity", + "advisory__exploitability", + ), + to_attr="secondary_members", + ), + ) + .only( + "id", + "package_id", + "primary_advisory__avid", + "primary_advisory__summary", + "primary_advisory__weighted_severity", + "primary_advisory__exploitability", + "primary_advisory__advisory_id", + ) + ) + + package_map = defaultdict(list) + for adv in advisory_sets: + adv._aliases_cache = [a.alias for a in adv.aliases.all()] + package_map[adv.package_id].append(adv) + + result = {} + + for package in packages: + groups = package_map.get(package.id, []) + grouped = [] + + for adv in groups: + primary = adv.primary_advisory + secondaries = [m.advisory for m in adv.secondary_members] + + max_sev = primary.weighted_severity or 0.0 + max_exp = primary.exploitability or 0.0 + + for sec in secondaries: + if sec.weighted_severity: + max_sev = max(max_sev, sec.weighted_severity) + if sec.exploitability: + max_exp = max(max_exp, sec.exploitability) + + weighted_severity = round(max_sev, 1) if max_sev else None + exploitability = max_exp or None + + risk_score = None + if exploitability and weighted_severity: + risk_score = round(min(exploitability * weighted_severity, 10.0), 1) + + identifier = primary.advisory_id.split("/")[-1] + + aliases = [a for a in adv._aliases_cache if a != identifier] + + grouped.append( + { + "avid": primary.avid, + "advisory_id": identifier, + "aliases": aliases, + "weighted_severity": weighted_severity, + "exploitability": exploitability, + "risk_score": risk_score, + "summary": primary.summary, + } + ) + + result[package.id] = grouped + + return result + + +def get_impacts_bulk(packages): + package_ids = [p.id for p in packages] + + impacts = ( + ImpactedPackageAffecting.objects.filter(package_id__in=package_ids) + .select_related("impacted_package__advisory") + .prefetch_related( + Prefetch( + "impacted_package__fixed_by_packages", + queryset=PackageV2.objects.only("package_url"), + ) + ) + .only( + "package_id", + "impacted_package_id", + "impacted_package__advisory_id", + "impacted_package__advisory__avid", + ) + ) + + impact_map = defaultdict(dict) + fixed_cache = {} + + for impact in impacts: + ip = impact.impacted_package + avid = ip.advisory.avid + + if ip.id not in fixed_cache: + fixed_cache[ip.id] = list({pkg.purl for pkg in ip.fixed_by_packages.all()}) + + impact_map[impact.package_id][avid] = fixed_cache[ip.id] + + return impact_map diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index 896da7c76..f7b6f75ee 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -2907,6 +2907,13 @@ def latest_affecting_advisories_for_purls(self, purls): ) return self.filter(id__in=Subquery(adv_ids)).latest_per_avid() + def latest_affecting_advisories_for_packages(self, purls): + adv_ids = ImpactedPackageAffecting.objects.filter(package__in=purls).values_list( + "impacted_package__advisory_id", + flat=True, + ) + return self.filter(id__in=Subquery(adv_ids)).latest_per_avid() + def latest_fixed_by_advisories_for_purl(self, purl): adv_ids = ImpactedPackageFixedBy.objects.filter(package__package_url=purl).values_list( "impacted_package__advisory_id", @@ -3577,25 +3584,36 @@ def calculate_version_rank(self): PackageV2.objects.bulk_update(sorted_packages, fields=["version_rank"]) return self.version_rank - def get_non_vulnerable_versions(self): + @cached_property + def _non_vulnerable_versions(self): """ - Return a tuple of the next and latest non-vulnerable versions as Package instance. - Return a tuple of (None, None) if there is no non-vulnerable version. + Cached computation to avoid duplicate queries. + Returns (next, latest) """ if self.version_rank == 0: self.calculate_version_rank - non_vulnerable_versions = PackageV2.objects.get_fixed_by_package_versions( - self, fix=False - ).only_non_vulnerable() - later_non_vulnerable = non_vulnerable_versions.filter( - version_rank__gte=self.version_rank - ).order_by("version_rank") + qs = ( + PackageV2.objects.get_fixed_by_package_versions(self, fix=False) + .only_non_vulnerable() + .filter(version_rank__gt=self.version_rank) + .order_by("version_rank") + ) - if later_non_vulnerable.exists(): - return later_non_vulnerable.first(), later_non_vulnerable.last() + next_non_vulnerable = qs.first() + latest_non_vulnerable = qs.last() - return None, None + return next_non_vulnerable, latest_non_vulnerable + + @property + def next_non_vulnerable_version(self): + next_nv, _ = self._non_vulnerable_versions + return next_nv if next_nv else None + + @property + def latest_non_vulnerable_version(self): + _, latest_nv = self._non_vulnerable_versions + return latest_nv if latest_nv else None @cached_property def version_class(self): diff --git a/vulnerabilities/tests/test_api_v3.py b/vulnerabilities/tests/test_api_v3.py index 137692abf..84e1cf94c 100644 --- a/vulnerabilities/tests/test_api_v3.py +++ b/vulnerabilities/tests/test_api_v3.py @@ -66,7 +66,7 @@ def test_packages_post_without_details(self): def test_packages_post_with_details(self): url = reverse("package-v3-list") - with self.assertNumQueries(33): + with self.assertNumQueries(34): response = self.client.post( url, data={ From c9552b433ce28afd4868470a4f360b7f2114f4d8 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Tue, 14 Apr 2026 18:31:47 +0530 Subject: [PATCH 388/390] Optimize get non vulnerable versions Signed-off-by: Tushar Goel --- vulnerabilities/api_v3.py | 97 ++++++++++++++++++++++++--------------- vulnerabilities/models.py | 6 +-- vulnerabilities/views.py | 2 +- 3 files changed, 63 insertions(+), 42 deletions(-) diff --git a/vulnerabilities/api_v3.py b/vulnerabilities/api_v3.py index a0cb24c91..ee62ea1d5 100644 --- a/vulnerabilities/api_v3.py +++ b/vulnerabilities/api_v3.py @@ -12,6 +12,7 @@ from urllib.parse import urlencode from django.db.models import Exists +from django.db.models import Max from django.db.models import OuterRef from django.db.models import Prefetch from django_filters import rest_framework as filters @@ -226,8 +227,7 @@ def get_affected_by_vulnerabilities(self, package): for adv in advisories: fixed = impact_map.get(adv["avid"]) - if not fixed: - continue + adv.pop("avid", None) result.append( { @@ -294,12 +294,9 @@ def get_affected_by_vulnerabilities(self, package): return self.return_advisories_data(package, advisories_qs, advisories) def get_fixing_vulnerabilities(self, package): - fixing_advisories = AdvisorySet.objects.filter( - package=package, relation_type="fixing" - ).values_list("primary_advisory__advisory_id", flat=True) - - if fixing_advisories: - return [{"advisory_id": adv_id.split("/")[-1]} for adv_id in fixing_advisories] + advisories = self.context["fixing_advisory_map"].get(package.id, []) + if advisories: + return advisories advisories_qs = AdvisoryV2.objects.latest_fixed_by_advisories_for_purl(package.package_url) @@ -326,6 +323,8 @@ def get_fixing_vulnerabilities(self, package): "impacted_packages__affecting_packages", "impacted_packages__fixed_by_packages", ) + if not advisories_qs.exists(): + return [] advisories: List[GroupedAdvisory] = merge_and_save_grouped_advisories( package, advisories_qs, "fixing" ) @@ -454,12 +453,13 @@ def create(self, request, *args, **kwargs): query = PackageV2.objects.filter(package_url__in=purls).order_by("package_url") page = self.paginate_queryset(query) - advisory_map = get_grouped_advisories_bulk(page) + affected_advisory_map = get_affected_advisories_bulk(page) + fixing_advisory_map = get_fixing_advisories_bulk(page) impact_map = get_impacts_bulk(page) serializer = self.get_serializer( page, many=True, - context={"request": request, "advisory_map": advisory_map, "impact_map": impact_map}, + context={"request": request, "advisory_map": affected_advisory_map, "impact_map": impact_map, "fixing_advisory_map": fixing_advisory_map}, ) return self.get_paginated_response(serializer.data) @@ -562,7 +562,7 @@ class AffectedByAdvisoriesViewSet(PackageAdvisoriesViewSet): serializer_class = AffectedByAdvisoryV3Serializer -def get_grouped_advisories_bulk(packages): +def get_affected_advisories_bulk(packages): package_ids = [p.id for p in packages] advisory_sets = list( @@ -570,19 +570,14 @@ def get_grouped_advisories_bulk(packages): package_id__in=package_ids, relation_type="affecting", ) - .select_related("primary_advisory", "package") - .prefetch_related( - Prefetch("aliases", queryset=AdvisoryAlias.objects.only("alias")), - Prefetch( - "members", - queryset=AdvisorySetMember.objects.filter(is_primary=False) - .select_related("advisory") - .only( - "advisory__avid", - "advisory__weighted_severity", - "advisory__exploitability", - ), - to_attr="secondary_members", + .select_related("primary_advisory") + .prefetch_related(Prefetch("aliases", queryset=AdvisoryAlias.objects.only("alias"))) + .annotate( + max_severity=Max( + "members__advisory__weighted_severity", + ), + max_exploitability=Max( + "members__advisory__exploitability", ), ) .only( @@ -590,13 +585,12 @@ def get_grouped_advisories_bulk(packages): "package_id", "primary_advisory__avid", "primary_advisory__summary", - "primary_advisory__weighted_severity", - "primary_advisory__exploitability", "primary_advisory__advisory_id", ) ) package_map = defaultdict(list) + for adv in advisory_sets: adv._aliases_cache = [a.alias for a in adv.aliases.all()] package_map[adv.package_id].append(adv) @@ -609,23 +603,14 @@ def get_grouped_advisories_bulk(packages): for adv in groups: primary = adv.primary_advisory - secondaries = [m.advisory for m in adv.secondary_members] - max_sev = primary.weighted_severity or 0.0 - max_exp = primary.exploitability or 0.0 - - for sec in secondaries: - if sec.weighted_severity: - max_sev = max(max_sev, sec.weighted_severity) - if sec.exploitability: - max_exp = max(max_exp, sec.exploitability) + max_sev = adv.max_severity or 0.0 + max_exp = adv.max_exploitability or 0.0 weighted_severity = round(max_sev, 1) if max_sev else None exploitability = max_exp or None - risk_score = None - if exploitability and weighted_severity: - risk_score = round(min(exploitability * weighted_severity, 10.0), 1) + risk_score = round(min(max_exp * max_sev, 10.0), 1) if max_exp and max_sev else None identifier = primary.advisory_id.split("/")[-1] @@ -681,3 +666,39 @@ def get_impacts_bulk(packages): impact_map[impact.package_id][avid] = fixed_cache[ip.id] return impact_map + + +def get_fixing_advisories_bulk(packages): + package_ids = [p.id for p in packages] + + advisory_sets = list( + AdvisorySet.objects.filter( + package_id__in=package_ids, + relation_type="fixing", + ) + .only( + "id", + "package_id", + "primary_advisory__advisory_id", + ) + ) + + package_map = defaultdict(list) + + for adv in advisory_sets: + package_map[adv.package_id].append(adv.primary_advisory.advisory_id) + + result = {} + + for package in packages: + groups = package_map.get(package.id, []) + grouped = [] + + for adv_id in groups: + grouped.append( + {"advisory_id": adv_id.split("/")[-1]} + ) + + result[package.id] = grouped + + return result diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index f7b6f75ee..4b6c17627 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -3585,7 +3585,7 @@ def calculate_version_rank(self): return self.version_rank @cached_property - def _non_vulnerable_versions(self): + def get_non_vulnerable_versions(self): """ Cached computation to avoid duplicate queries. Returns (next, latest) @@ -3607,12 +3607,12 @@ def _non_vulnerable_versions(self): @property def next_non_vulnerable_version(self): - next_nv, _ = self._non_vulnerable_versions + next_nv, _ = self.get_non_vulnerable_versions return next_nv if next_nv else None @property def latest_non_vulnerable_version(self): - _, latest_nv = self._non_vulnerable_versions + _, latest_nv = self.get_non_vulnerable_versions return latest_nv if latest_nv else None @cached_property diff --git a/vulnerabilities/views.py b/vulnerabilities/views.py index 5b9406f87..371dcd217 100644 --- a/vulnerabilities/views.py +++ b/vulnerabilities/views.py @@ -257,7 +257,7 @@ def get_context_data(self, **kwargs): context = super().get_context_data(**kwargs) package = self.object - next_non_vulnerable, latest_non_vulnerable = package.get_non_vulnerable_versions() + next_non_vulnerable, latest_non_vulnerable = package.get_non_vulnerable_versions context["package"] = package context["next_non_vulnerable"] = next_non_vulnerable From d83fce384e6de4103d7e45970007ac306099a447 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Tue, 14 Apr 2026 18:33:05 +0530 Subject: [PATCH 389/390] Fix tests Signed-off-by: Tushar Goel --- vulnerabilities/api_v3.py | 14 ++++++++------ vulnerabilities/tests/test_api_v3.py | 2 +- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/vulnerabilities/api_v3.py b/vulnerabilities/api_v3.py index ee62ea1d5..12f10ed1c 100644 --- a/vulnerabilities/api_v3.py +++ b/vulnerabilities/api_v3.py @@ -459,7 +459,12 @@ def create(self, request, *args, **kwargs): serializer = self.get_serializer( page, many=True, - context={"request": request, "advisory_map": affected_advisory_map, "impact_map": impact_map, "fixing_advisory_map": fixing_advisory_map}, + context={ + "request": request, + "advisory_map": affected_advisory_map, + "impact_map": impact_map, + "fixing_advisory_map": fixing_advisory_map, + }, ) return self.get_paginated_response(serializer.data) @@ -675,8 +680,7 @@ def get_fixing_advisories_bulk(packages): AdvisorySet.objects.filter( package_id__in=package_ids, relation_type="fixing", - ) - .only( + ).only( "id", "package_id", "primary_advisory__advisory_id", @@ -695,9 +699,7 @@ def get_fixing_advisories_bulk(packages): grouped = [] for adv_id in groups: - grouped.append( - {"advisory_id": adv_id.split("/")[-1]} - ) + grouped.append({"advisory_id": adv_id.split("/")[-1]}) result[package.id] = grouped diff --git a/vulnerabilities/tests/test_api_v3.py b/vulnerabilities/tests/test_api_v3.py index 84e1cf94c..be4b1d923 100644 --- a/vulnerabilities/tests/test_api_v3.py +++ b/vulnerabilities/tests/test_api_v3.py @@ -66,7 +66,7 @@ def test_packages_post_without_details(self): def test_packages_post_with_details(self): url = reverse("package-v3-list") - with self.assertNumQueries(34): + with self.assertNumQueries(31): response = self.client.post( url, data={ From bb7d3a21844ba969281b1263ae6a1ebebaeb1b62 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Wed, 15 Apr 2026 00:10:11 +0530 Subject: [PATCH 390/390] fix: use is_latest field to get latest advisory for an avid Signed-off-by: Keshav Priyadarshi --- vulnerabilities/models.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index 896da7c76..6ce9f29df 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -2876,14 +2876,7 @@ def to_dict(self): class AdvisoryV2QuerySet(BaseQuerySet): def latest_for_avid(self, avid: str): - return ( - self.filter(avid=avid) - .order_by( - F("date_collected").desc(nulls_last=True), - "-id", - ) - .first() - ) + return self.get(avid=avid, is_latest=True) def latest_per_avid(self): return self.filter(is_latest=True)