diff --git a/aboutcode/federated/__init__.py b/aboutcode/federated/__init__.py index 321013710..ff3f07e84 100644 --- a/aboutcode/federated/__init__.py +++ b/aboutcode/federated/__init__.py @@ -1028,7 +1028,7 @@ def large_size_configs(cls): "mlflow": 16, "pub": 16, "rpm": 16, - # Small Ecosystem all use the defaul + # Small Ecosystem all use the default "default": 1, } return [ @@ -1069,7 +1069,7 @@ def medium_size_configs(cls): "mlflow": 8, "pub": 8, "rpm": 8, - # Small Ecosystem all use the defaul + # Small Ecosystem all use the default "default": 1, } return [ @@ -1110,7 +1110,7 @@ def small_size_configs(cls): "mlflow": 4, "pub": 4, "rpm": 4, - # Small Ecosystem all use the defaul + # Small Ecosystem all use the default "default": 1, } return [ @@ -1181,7 +1181,7 @@ def cluster_preset(): DataCluster( data_kind="security_advisories", description="VulnerableCode security advisories for each package version.", - datafile_path_template="{/namespace}/{name}/{version}/advisories.json", + datafile_path_template="{/namespace}/{name}/{version}/advisories.yml", purl_type_configs=[PurlTypeConfig.default_config()], data_schema_url="", documentation_url="", diff --git a/aboutcode/federated/tests/test_data/all-presets/foo/aboutcode-federated-config.yml b/aboutcode/federated/tests/test_data/all-presets/foo/aboutcode-federated-config.yml index f7a7b89c6..ca4204bb7 100644 --- a/aboutcode/federated/tests/test_data/all-presets/foo/aboutcode-federated-config.yml +++ b/aboutcode/federated/tests/test_data/all-presets/foo/aboutcode-federated-config.yml @@ -933,7 +933,7 @@ data_clusters: data_license: CC-BY-4.0 data_maintainers: [] - data_kind: security_advisories - datafile_path_template: '{/namespace}/{name}/{version}/advisories.json' + datafile_path_template: '{/namespace}/{name}/{version}/advisories.yml' purl_type_configs: - purl_type: default number_of_repos: 1 diff --git a/vulnerabilities/importers/__init__.py b/vulnerabilities/importers/__init__.py index 067f64a7b..594021092 100644 --- a/vulnerabilities/importers/__init__.py +++ b/vulnerabilities/importers/__init__.py @@ -55,6 +55,7 @@ ) from vulnerabilities.pipelines.v2_importers import epss_importer_v2 from vulnerabilities.pipelines.v2_importers import fireeye_importer_v2 +from vulnerabilities.pipelines.v2_importers import gentoo_importer as gentoo_importer_v2 from vulnerabilities.pipelines.v2_importers import github_osv_importer as github_osv_importer_v2 from vulnerabilities.pipelines.v2_importers import gitlab_importer as gitlab_importer_v2 from vulnerabilities.pipelines.v2_importers import istio_importer as istio_importer_v2 @@ -108,6 +109,7 @@ project_kb_msr2019_importer_v2.ProjectKBMSR2019Pipeline, ruby_importer_v2.RubyImporterPipeline, epss_importer_v2.EPSSImporterPipeline, + gentoo_importer_v2.GentooImporterPipeline, nginx_importer_v2.NginxImporterPipeline, debian_importer_v2.DebianImporterPipeline, mattermost_importer_v2.MattermostImporterPipeline, diff --git a/vulnerabilities/importers/fireeye.py b/vulnerabilities/importers/fireeye.py index 03fb3a8d5..404f2d367 100644 --- a/vulnerabilities/importers/fireeye.py +++ b/vulnerabilities/importers/fireeye.py @@ -112,7 +112,7 @@ def matcher_url(ref) -> str: """ Returns URL of the reference markup from reference url in Markdown format """ - markup_regex = "\[([^\[]+)]\(\s*(http[s]?://.+)\s*\)" + markup_regex = r"\[([^\[]+)]\(\s*(http[s]?://.+)\s*\)" matched_markup = re.findall(markup_regex, ref) if matched_markup: return matched_markup[0][1] diff --git a/vulnerabilities/importers/gentoo.py b/vulnerabilities/importers/gentoo.py index 2f569cdf1..0f3be4431 100644 --- a/vulnerabilities/importers/gentoo.py +++ b/vulnerabilities/importers/gentoo.py @@ -6,8 +6,7 @@ # See https://github.com/aboutcode-org/vulnerablecode for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # - - +import logging import re import xml.etree.ElementTree as ET from pathlib import Path @@ -17,12 +16,15 @@ from univers.version_constraint import VersionConstraint from univers.version_range import EbuildVersionRange from univers.versions import GentooVersion +from univers.versions import InvalidVersion from vulnerabilities.importer import AdvisoryData from vulnerabilities.importer import AffectedPackage from vulnerabilities.importer import Importer from vulnerabilities.importer import Reference +logger = logging.getLogger(__name__) + class GentooImporter(Importer): repo_url = "git+https://anongit.gentoo.org/git/data/glsa.git" @@ -104,14 +106,20 @@ def affected_and_safe_purls(affected_elem): safe_versions, affected_versions = GentooImporter.get_safe_and_affected_versions(pkg) for version in safe_versions: - constraints.append( - VersionConstraint(version=GentooVersion(version), comparator="=").invert() - ) + try: + constraints.append( + VersionConstraint(version=GentooVersion(version), comparator="=").invert() + ) + except InvalidVersion as e: + logger.error(f"Invalid safe_version {version} - error: {e}") for version in affected_versions: - constraints.append( - VersionConstraint(version=GentooVersion(version), comparator="=") - ) + try: + constraints.append( + VersionConstraint(version=GentooVersion(version), comparator="=") + ) + except InvalidVersion as e: + logger.error(f"Invalid affected_version {version} - error: {e}") if not constraints: continue diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index c102a697a..a83db0ad6 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -2344,13 +2344,14 @@ def save(self, *args, **kwargs): @property def pipeline_class(self): """Return the pipeline class.""" + from vulnerabilities.importers import IMPORTERS_REGISTRY from vulnerabilities.improvers import IMPROVERS_REGISTRY + from vulnerabilities.pipelines.exporters import EXPORTERS_REGISTRY + + pipeline_registry = IMPORTERS_REGISTRY | IMPROVERS_REGISTRY | EXPORTERS_REGISTRY - if self.pipeline_id in IMPROVERS_REGISTRY: - return IMPROVERS_REGISTRY.get(self.pipeline_id) - if self.pipeline_id in IMPORTERS_REGISTRY: - return IMPORTERS_REGISTRY.get(self.pipeline_id) + return pipeline_registry[self.pipeline_id] @property def description(self): diff --git a/vulnerabilities/pipelines/__init__.py b/vulnerabilities/pipelines/__init__.py index e563846fe..632fd95f6 100644 --- a/vulnerabilities/pipelines/__init__.py +++ b/vulnerabilities/pipelines/__init__.py @@ -141,6 +141,10 @@ def log(self, message, level=logging.INFO): class VulnerableCodePipeline(PipelineDefinition, BasePipelineRun): pipeline_id = None # Unique Pipeline ID + # When set to true pipeline is run only once. + # To rerun onetime pipeline reset is_active field to True via migration. + run_once = False + def on_failure(self): """ Tasks to run in the event that pipeline execution fails. diff --git a/vulnerabilities/pipelines/exporters/__init__.py b/vulnerabilities/pipelines/exporters/__init__.py new file mode 100644 index 000000000..d158a8967 --- /dev/null +++ b/vulnerabilities/pipelines/exporters/__init__.py @@ -0,0 +1,16 @@ +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +from vulnerabilities.pipelines.exporters import federate_vulnerabilities +from vulnerabilities.utils import create_registry + +EXPORTERS_REGISTRY = create_registry( + [ + federate_vulnerabilities.FederatePackageVulnerabilities, + ] +) diff --git a/vulnerabilities/pipelines/exporters/federate_vulnerabilities.py b/vulnerabilities/pipelines/exporters/federate_vulnerabilities.py new file mode 100644 index 000000000..27c6546ff --- /dev/null +++ b/vulnerabilities/pipelines/exporters/federate_vulnerabilities.py @@ -0,0 +1,307 @@ +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + + +import itertools +import shutil +from operator import attrgetter +from pathlib import Path + +import saneyaml +from aboutcode.pipeline import LoopProgress +from django.conf import settings +from django.db.models import Prefetch + +from aboutcode.federated import DataFederation +from vulnerabilities.models import AdvisoryV2 +from vulnerabilities.models import ImpactedPackage +from vulnerabilities.models import PackageV2 +from vulnerabilities.pipelines import VulnerableCodePipeline +from vulnerabilities.pipes import federatedcode + + +class FederatePackageVulnerabilities(VulnerableCodePipeline): + """Export package vulnerabilities and advisory to FederatedCode.""" + + pipeline_id = "federate_vulnerabilities_v2" + + @classmethod + def steps(cls): + return ( + cls.check_federatedcode_eligibility, + cls.create_federatedcode_working_dir, + cls.fetch_federation_config, + cls.clone_federation_repository, + cls.publish_package_related_advisories, + cls.publish_advisories, + cls.delete_working_dir, + ) + + def check_federatedcode_eligibility(self): + """Check if FederatedCode is configured.""" + federatedcode.check_federatedcode_configured_and_available(self.log) + + def create_federatedcode_working_dir(self): + """Create temporary working dir.""" + self.working_path = federatedcode.create_federatedcode_working_dir() + + def fetch_federation_config(self): + """Fetch config for PackageURL Federation.""" + data_federation = DataFederation.from_url( + name="aboutcode-data", + remote_root_url="https://github.com/aboutcode-data", + ) + self.data_cluster = data_federation.get_cluster("security_advisories") + + def clone_federation_repository(self): + self.repo = federatedcode.clone_repository( + repo_url=settings.FEDERATEDCODE_VULNERABILITIES_REPO, + clone_path=self.working_path / "advisories-data", + logger=self.log, + ) + + def publish_package_related_advisories(self): + """Publish package advisories relations to FederatedCode""" + repo_path = Path(self.repo.working_dir) + commit_count = 1 + batch_size = 2000 + chunk_size = 500 + files_to_commit = set() + + distinct_packages_count = ( + PackageV2.objects.values("type", "namespace", "name", "version") + .distinct("type", "namespace", "name", "version") + .count() + ) + package_qs = package_prefetched_qs() + grouped_packages = itertools.groupby( + package_qs.iterator(chunk_size=chunk_size), + key=attrgetter("type", "namespace", "name", "version"), + ) + + self.log(f"Exporting advisory relation for {distinct_packages_count} packages.") + progress = LoopProgress( + total_iterations=distinct_packages_count, + progress_step=5, + logger=self.log, + ) + for _, packages in progress.iter(grouped_packages): + purl, package_vulnerabilities = get_package_related_advisory(packages) + package_repo, datafile_path = self.data_cluster.get_datafile_repo_and_path(purl) + package_vulnerability_path = f"packages/{package_repo}/{datafile_path}" + + write_file( + repo_path=repo_path, + file_path=package_vulnerability_path, + data=package_vulnerabilities, + ) + files_to_commit.add(package_vulnerability_path) + + if len(files_to_commit) > batch_size: + if federatedcode.commit_and_push_changes( + commit_message=self.commit_message("package advisory relations", commit_count), + repo=self.repo, + files_to_commit=files_to_commit, + logger=self.log, + ): + commit_count += 1 + files_to_commit.clear() + + if files_to_commit: + federatedcode.commit_and_push_changes( + commit_message=self.commit_message( + "package advisory relations", + commit_count, + commit_count, + ), + repo=self.repo, + files_to_commit=files_to_commit, + logger=self.log, + ) + + self.log(f"Federated {distinct_packages_count} package advisories.") + + def publish_advisories(self): + """Publish advisory to FederatedCode""" + repo_path = Path(self.repo.working_dir) + commit_count = 1 + batch_size = 2000 + chunk_size = 1000 + files_to_commit = set() + advisory_qs = advisory_prefetched_qs() + advisory_count = advisory_qs.count() + + self.log(f"Exporting {advisory_count} advisory.") + progress = LoopProgress( + total_iterations=advisory_count, + progress_step=5, + logger=self.log, + ) + for advisory in progress.iter(advisory_qs.iterator(chunk_size=chunk_size)): + advisory_data = serialize_advisory(advisory) + adv_file = f"advisories/{advisory.avid}.yml" + write_file( + repo_path=repo_path, + file_path=adv_file, + data=advisory_data, + ) + files_to_commit.add(adv_file) + + if len(files_to_commit) > batch_size: + if federatedcode.commit_and_push_changes( + commit_message=self.commit_message("advisories", commit_count), + repo=self.repo, + files_to_commit=files_to_commit, + logger=self.log, + ): + commit_count += 1 + files_to_commit.clear() + + if files_to_commit: + federatedcode.commit_and_push_changes( + commit_message=self.commit_message( + "advisories", + commit_count, + commit_count, + ), + repo=self.repo, + files_to_commit=files_to_commit, + logger=self.log, + ) + + self.log(f"Successfully federated {advisory_count} advisories.") + + def delete_working_dir(self): + """Remove temporary working dir.""" + if hasattr(self, "working_path") and self.working_path: + shutil.rmtree(self.working_path) + + def on_failure(self): + self.delete_working_dir() + + def commit_message( + self, + item_type, + commit_count, + total_commit_count="many", + ): + """Commit message for pushing package vulnerability.""" + return federatedcode.commit_message( + item_type=item_type, + commit_count=commit_count, + total_commit_count=total_commit_count, + ) + + +def package_prefetched_qs(): + return ( + PackageV2.objects.order_by("type", "namespace", "name", "version") + .only("package_url", "type", "namespace", "name", "version") + .prefetch_related( + Prefetch( + "affected_in_impacts", + queryset=ImpactedPackage.objects.only("advisory_id").prefetch_related( + Prefetch( + "advisory", + queryset=AdvisoryV2.objects.only("avid"), + ) + ), + ), + Prefetch( + "fixed_in_impacts", + queryset=ImpactedPackage.objects.only("advisory_id").prefetch_related( + Prefetch( + "advisory", + queryset=AdvisoryV2.objects.only("avid"), + ) + ), + ), + ) + ) + + +def get_package_related_advisory(packages): + package_vulnerabilities = [] + for package in packages: + affected_by_vulnerabilities = [ + impact.advisory.avid for impact in package.affected_in_impacts.all() + ] + fixing_vulnerabilities = [impact.advisory.avid for impact in package.fixed_in_impacts.all()] + + package_vulnerability = { + "purl": package.package_url, + "affected_by_advisories": sorted(affected_by_vulnerabilities), + "fixing_advisories": sorted(fixing_vulnerabilities), + } + package_vulnerabilities.append(package_vulnerability) + + return package.package_url, package_vulnerabilities + + +def advisory_prefetched_qs(): + return AdvisoryV2.objects.prefetch_related( + "impacted_packages", + "aliases", + "references", + "severities", + "weaknesses", + ) + + +def serialize_severity(sev): + return { + "score": sev.value, + "scoring_system": sev.scoring_system, + "scoring_elements": sev.scoring_elements, + "published_at": str(sev.published_at), + "url": sev.url, + } + + +def serialize_references(reference): + return { + "url": reference.url, + "reference_type": reference.reference_type, + "reference_id": reference.reference_id, + } + + +def serialize_advisory(advisory): + """Return a plain data mapping serialized from advisory object.""" + aliases = sorted([a.alias for a in advisory.aliases.all()]) + severities = [serialize_severity(sev) for sev in advisory.severities.all()] + weaknesses = [wkns.cwe for wkns in advisory.weaknesses.all()] + references = [serialize_references(ref) for ref in advisory.references.all()] + impacts = [ + { + "purl": impact.base_purl, + "affected_versions": impact.affecting_vers, + "fixed_versions": impact.fixed_vers, + } + for impact in advisory.impacted_packages.all() + ] + + return { + "advisory_id": advisory.advisory_id, + "datasource_id": advisory.avid, + "datasource_url": advisory.url, + "aliases": aliases, + "summary": advisory.summary, + "impacted_packages": impacts, + "severities": severities, + "weaknesses": weaknesses, + "references": references, + } + + +def write_file(repo_path, file_path, data): + """Write ``data`` as YAML to ``repo_path``.""" + write_to = repo_path / file_path + write_to.parent.mkdir(parents=True, exist_ok=True) + with open(write_to, encoding="utf-8", mode="w") as f: + f.write(saneyaml.dump(data)) diff --git a/vulnerabilities/pipelines/v2_importers/fireeye_importer_v2.py b/vulnerabilities/pipelines/v2_importers/fireeye_importer_v2.py index 89c3875ed..b1283d132 100644 --- a/vulnerabilities/pipelines/v2_importers/fireeye_importer_v2.py +++ b/vulnerabilities/pipelines/v2_importers/fireeye_importer_v2.py @@ -154,7 +154,7 @@ def matcher_url(ref) -> str: """ Returns URL of the reference markup from reference url in Markdown format """ - markup_regex = "\[([^\[]+)]\(\s*(http[s]?://.+)\s*\)" + markup_regex = r"\[([^\[]+)]\(\s*(http[s]?://.+)\s*\)" matched_markup = re.findall(markup_regex, ref) if matched_markup: return matched_markup[0][1] diff --git a/vulnerabilities/pipelines/v2_importers/gentoo_importer.py b/vulnerabilities/pipelines/v2_importers/gentoo_importer.py new file mode 100644 index 000000000..5db00a649 --- /dev/null +++ b/vulnerabilities/pipelines/v2_importers/gentoo_importer.py @@ -0,0 +1,187 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import re +import xml.etree.ElementTree as ET +from pathlib import Path +from typing import Iterable + +from fetchcode.vcs import fetch_via_vcs +from packageurl import PackageURL +from univers.version_constraint import VersionConstraint +from univers.version_range import EbuildVersionRange +from univers.versions import GentooVersion +from univers.versions import InvalidVersion + +from vulnerabilities.importer import AdvisoryDataV2 +from vulnerabilities.importer import AffectedPackageV2 +from vulnerabilities.importer import ReferenceV2 +from vulnerabilities.importer import VulnerabilitySeverity +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2 +from vulnerabilities.severity_systems import GENERIC + + +class GentooImporterPipeline(VulnerableCodeBaseImporterPipelineV2): + repo_url = "git+https://anongit.gentoo.org/git/data/glsa.git" + spdx_license_expression = "CC-BY-SA-4.0" + # the license notice is at this url https://anongit.gentoo.org/ says: + # The contents of this document, unless otherwise expressly stated, are licensed + # under the [CC-BY-SA-4.0](https://creativecommons.org/licenses/by-sa/4.0/) license. + license_url = "https://creativecommons.org/licenses/by-sa/4.0/" + pipeline_id = "gentoo_importer_v2" + + @classmethod + def steps(cls): + return ( + cls.clone, + cls.collect_and_store_advisories, + cls.clean_downloads, + ) + + def clone(self): + self.log(f"Cloning `{self.repo_url}`") + self.vcs_response = fetch_via_vcs(self.repo_url) + + def advisories_count(self): + advisory_dir = Path(self.vcs_response.dest_dir) + return sum(1 for _ in advisory_dir.rglob("*.xml")) + + def collect_advisories(self) -> Iterable[AdvisoryDataV2]: + base_path = Path(self.vcs_response.dest_dir) + for file_path in base_path.glob("**/*.xml"): + yield from self.process_file(file_path) + + def process_file(self, file): + cves = [] + summary = "" + xml_root = ET.parse(file).getroot() + id = xml_root.attrib.get("id", "") + glsa = "GLSA-" + id + vuln_references = [ + ReferenceV2( + reference_id=glsa, + url=f"https://security.gentoo.org/glsa/{id}", + ) + ] + + severities = [] + affected_packages = [] + for child in xml_root: + if child.tag == "references": + cves = self.cves_from_reference(child) + + if child.tag == "synopsis": + summary = child.text + + if child.tag == "affected": + affected_packages = [] + for purl, constraints, is_unaffected in get_affected_and_fixed_purls( + child, logger=self.log + ): + constraints = build_constraints([constraints], logger=self.log) + version_range = EbuildVersionRange(constraints=constraints) + + if is_unaffected: + affected_package = AffectedPackageV2( + package=purl, + fixed_version_range=version_range, + ) + else: + affected_package = AffectedPackageV2( + package=purl, + affected_version_range=version_range, + ) + + affected_packages.append(affected_package) + + if child.tag == "impact": + severity_value = child.attrib.get("type") + if severity_value: + severities.append(VulnerabilitySeverity(system=GENERIC, value=severity_value)) + + yield AdvisoryDataV2( + advisory_id=glsa, + aliases=cves, + summary=summary, + references=vuln_references, + severities=severities, + affected_packages=affected_packages, + url=f"https://security.gentoo.org/glsa/{id}", + original_advisory_text=file, + ) + + def clean_downloads(self): + if self.vcs_response: + self.log("Removing cloned repository") + self.vcs_response.delete() + + def on_failure(self): + self.clean_downloads() + + @staticmethod + def cves_from_reference(reference): + cves = [] + for child in reference: + txt = child.text.strip() + match = re.match(r"CVE-\d{4}-\d{4,}", txt) + if match: + cves.append(match.group()) + return cves + + +def build_constraints(constraint_pairs, logger): + """ + Build a list of VersionConstraint objects from comparators, versions pairs. + """ + constraints = [] + for comparator, version in constraint_pairs: + try: + constraint = VersionConstraint(version=GentooVersion(version), comparator=comparator) + constraints.append(constraint) + except InvalidVersion as e: + logger(f"InvalidVersion constraints version: {version} error:{e}") + return constraints + + +def get_affected_and_fixed_purls(affected_elem, logger): + """ + Parses XML elements to extract PURLs associated with affected and fixed versions. + """ + + for pkg in affected_elem: + name = pkg.attrib.get("name") + if not name: + continue + + pkg_ns, _, pkg_name = name.rpartition("/") + for info in pkg: + # All possible values of info.attrib['range'] = + # {'gt', 'lt', 'rle', 'rge', 'rgt', 'le', 'ge', 'eq'} + # rge means revision greater than equals and rgt means revision greater than + # TODO Revisit issue: https://github.com/aboutcode-org/vulnerablecode/issues/2180 + range_value = info.attrib.get("range") + slot_value = info.attrib.get("slot") + comparator_dict = { + "gt": ">", + "lt": "<", + "ge": ">=", + "le": "<=", + "eq": "=", + "rle": "<=", + "rge": ">=", + "rgt": ">", + } + comparator = comparator_dict.get(range_value) + if not comparator: + logger(f"Unsupported range value {range_value}:{info.text}") + continue + + qualifiers = {"slot": slot_value} if slot_value else {} + purl = PackageURL(type="ebuild", name=pkg_name, namespace=pkg_ns, qualifiers=qualifiers) + yield purl, (comparator, info.text), (info.tag == "unaffected") diff --git a/vulnerabilities/pipelines/v2_improvers/compute_package_risk.py b/vulnerabilities/pipelines/v2_improvers/compute_package_risk.py index ac7caa49d..9caaaeb95 100644 --- a/vulnerabilities/pipelines/v2_improvers/compute_package_risk.py +++ b/vulnerabilities/pipelines/v2_improvers/compute_package_risk.py @@ -7,7 +7,10 @@ # See https://aboutcode.org for more information about nexB OSS projects. # from aboutcode.pipeline import LoopProgress +from django.db.models import Prefetch +from django.db.models import Q +from vulnerabilities.models import AdvisorySeverity from vulnerabilities.models import AdvisoryV2 from vulnerabilities.models import PackageV2 from vulnerabilities.pipelines import VulnerableCodePipeline @@ -35,7 +38,15 @@ def steps(cls): def compute_and_store_vulnerability_risk_score(self): affected_advisories = ( AdvisoryV2.objects.filter(impacted_packages__affecting_packages__isnull=False) - .prefetch_related("references", "severities", "exploits") + .prefetch_related( + "references", + "severities", + "exploits", + Prefetch( + "related_advisory_severities", + queryset=AdvisoryV2.objects.prefetch_related("severities"), + ), + ) .distinct() ) @@ -50,10 +61,13 @@ def compute_and_store_vulnerability_risk_score(self): batch_size = 5000 for advisory in progress.iter(affected_advisories.iterator(chunk_size=batch_size)): - severities = advisory.severities.all() references = advisory.references.all() exploits = advisory.exploits.all() + severities = AdvisorySeverity.objects.filter( + Q(advisories=advisory) | Q(advisories__related_to_advisory_severities=advisory) + ).distinct() + weighted_severity, exploitability = compute_vulnerability_risk_factors( references=references, severities=severities, diff --git a/vulnerabilities/pipes/federatedcode.py b/vulnerabilities/pipes/federatedcode.py new file mode 100644 index 000000000..560519c8d --- /dev/null +++ b/vulnerabilities/pipes/federatedcode.py @@ -0,0 +1,175 @@ +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + + +import logging +import tempfile +import textwrap +from pathlib import Path +from urllib.parse import urlparse + +import requests +from django.conf import settings +from git import GitCommandError +from git import Repo + +logger = logging.getLogger(__name__) + + +def url_exists(url, timeout=5): + """ + Check if the given `url` is reachable by doing head request. + Return True if response status is 200, else False. + """ + try: + response = requests.head(url, timeout=timeout) + response.raise_for_status() + except requests.exceptions.RequestException as request_exception: + logger.debug(f"Error while checking {url}: {request_exception}") + return False + + return response.status_code == requests.codes.ok + + +def is_configured(): + """Return True if the required FederatedCode settings have been set.""" + if all( + [ + settings.FEDERATEDCODE_VULNERABILITIES_REPO, + settings.FEDERATEDCODE_GIT_SERVICE_TOKEN, + settings.FEDERATEDCODE_GIT_SERVICE_EMAIL, + settings.FEDERATEDCODE_GIT_SERVICE_NAME, + ] + ): + return True + return False + + +def create_federatedcode_working_dir(): + """Create temporary working dir for cloning federatedcode repositories.""" + return Path(tempfile.mkdtemp()) + + +def is_available(): + """Return True if the configured Git repo is available.""" + if not is_configured(): + return False + + return url_exists(settings.FEDERATEDCODE_VULNERABILITIES_REPO) + + +def check_federatedcode_configured_and_available(logger): + """ + Check if the criteria for pushing the results to FederatedCode + is satisfied. + + Criteria: + - FederatedCode is configured and available. + """ + if not is_configured(): + raise Exception("FederatedCode is not configured.") + + if not is_available(): + raise Exception("FederatedCode Git account is not available.") + + logger("Federatedcode repositories are configured and available.") + + +def clone_repository(repo_url, clone_path, logger, shallow_clone=True): + """Clone repository to clone_path.""" + logger(f"Cloning repository {repo_url}") + + authenticated_repo_url = repo_url.replace( + "https://", + f"https://{settings.FEDERATEDCODE_GIT_SERVICE_TOKEN}@", + ) + clone_args = { + "url": authenticated_repo_url, + "to_path": clone_path, + } + if shallow_clone: + clone_args["depth"] = 1 + + repo = Repo.clone_from(**clone_args) + repo.config_writer(config_level="repository").set_value( + "user", "name", settings.FEDERATEDCODE_GIT_SERVICE_NAME + ).release() + repo.config_writer(config_level="repository").set_value( + "user", "email", settings.FEDERATEDCODE_GIT_SERVICE_EMAIL + ).release() + + return repo + + +def get_github_org(url): + """Return org username from GitHub account URL.""" + github_account_url = urlparse(url) + path_after_domain = github_account_url.path.lstrip("/") + org_name = path_after_domain.split("/")[0] + return org_name + + +def push_changes(repo, remote_name="origin", branch_name=""): + """Push changes to remote repository.""" + if not branch_name: + branch_name = repo.active_branch.name + repo.git.push(remote_name, branch_name, "--no-verify") + + +def commit_and_push_changes( + repo, + files_to_commit, + commit_message, + logger, + remote_name="origin", +): + """ + Commit and push changes to remote repository. + Returns True if changes are successfully pushed, False otherwise. + """ + try: + commit_changes(repo, files_to_commit, commit_message) + push_changes(repo, remote_name) + except GitCommandError as e: + if "nothing to commit" in e.stdout.lower(): + logger("Nothing to commit, working tree clean.") + else: + logger(f"Error while committing change: {e}") + return False + return True + + +def commit_changes(repo, files_to_commit, commit_message): + """Commit changes in files to a remote repository.""" + if not files_to_commit: + return + + repo.index.add(files_to_commit) + repo.git.commit( + m=textwrap.dedent(commit_message), + allow_empty=False, + no_verify=True, + ) + + +def commit_message(item_type, commit_count, total_commit_count): + """Commit message for pushing Package vulnerability.""" + from vulnerablecode import __version__ as VERSION + + author_name = settings.FEDERATEDCODE_GIT_SERVICE_NAME + author_email = settings.FEDERATEDCODE_GIT_SERVICE_EMAIL + + tool_name = "pkg:github/aboutcode-org/vulnerablecode" + + return f"""\ + Add new {item_type} ({commit_count}/{total_commit_count}) + + Tool: {tool_name}@v{VERSION} + + Signed-off-by: {author_name} <{author_email}> + """ diff --git a/vulnerabilities/schedules.py b/vulnerabilities/schedules.py index 8ae3bbb93..e6443e5ab 100644 --- a/vulnerabilities/schedules.py +++ b/vulnerabilities/schedules.py @@ -88,8 +88,9 @@ def update_pipeline_schedule(): from vulnerabilities.importers import IMPORTERS_REGISTRY from vulnerabilities.improvers import IMPROVERS_REGISTRY from vulnerabilities.models import PipelineSchedule + from vulnerabilities.pipelines.exporters import EXPORTERS_REGISTRY - pipelines = IMPORTERS_REGISTRY | IMPROVERS_REGISTRY + pipelines = IMPORTERS_REGISTRY | IMPROVERS_REGISTRY | EXPORTERS_REGISTRY PipelineSchedule.objects.exclude(pipeline_id__in=pipelines.keys()).delete() for id, pipeline_class in pipelines.items(): diff --git a/vulnerabilities/templates/package_details_v2.html b/vulnerabilities/templates/package_details_v2.html index e01f5243e..9cc9ea343 100644 --- a/vulnerabilities/templates/package_details_v2.html +++ b/vulnerabilities/templates/package_details_v2.html @@ -45,7 +45,7 @@