Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
109 commits
Select commit Hold shift + click to select a range
29995d5
Bump commons-io:commons-io from 2.10.0 to 2.14.0 in /tests
dependabot[bot] Nov 19, 2024
bbf1bf4
Bump requests from 2.12.1 to 2.32.2
dependabot[bot] Nov 19, 2024
a2b5958
Merge pull request #1 from cognitivegears/dependabot/maven/tests/comm…
cognitivegears Nov 19, 2024
3fe4f10
Merge pull request #2 from cognitivegears/dependabot/pip/requests-2.32.2
cognitivegears Nov 19, 2024
109788d
Fix multiline string formatting in main function
cognitivegears Nov 19, 2024
4a89de0
Handle 404 status code and improve error handling in recv_pkg_info fu…
cognitivegears Nov 19, 2024
ccff951
Add PyPI scanner and update requirements for requirements-parser
cognitivegears Nov 19, 2024
0f9b594
Refactor file path handling in Maven and NPM scanners to use os.path.…
cognitivegears Nov 19, 2024
1fdb1fc
Fix argument parsing and improve error handling in package functions
cognitivegears Nov 20, 2024
8ddbc6a
Refactor package manager handling and error codes; introduce constant…
cognitivegears Nov 20, 2024
a81e6c9
Add logging functionality and improve error handling across package r…
cognitivegears Nov 20, 2024
9a82db0
Add recursive scanning option for package managers and enhance error …
cognitivegears Nov 20, 2024
8d100be
Remove duplicate entries in dependency lists across Maven, NPM, and P…
cognitivegears Nov 20, 2024
bebd71f
Changed packages not exist to a warning
cognitivegears Nov 20, 2024
98aa05d
Add error handling for warnings and new exit code for package not found
cognitivegears Nov 20, 2024
3455502
Update project configuration and dependencies
cognitivegears Nov 20, 2024
05a0c23
Enhance package analysis with detailed docstrings and logging improve…
cognitivegears Nov 20, 2024
b277998
Add request timeout handling and constant for HTTP requests
cognitivegears Nov 20, 2024
11460d6
Moved argument parsing for Combobulator
cognitivegears Nov 20, 2024
f8a39b2
Remove currently unused GitHub token argument
cognitivegears Nov 20, 2024
60dc060
Update README to include 'pypi' as a supported package manager type a…
cognitivegears Nov 20, 2024
5a52358
Possible fix/workaround for old scan issue
cognitivegears Nov 22, 2024
6bdbfcb
Version count conditional backwards
cognitivegears Nov 22, 2024
5593b01
Added rate limiting, added additional npm info for heuristics, and ad…
cognitivegears Nov 22, 2024
2976da0
Remove unused imports from combobulator.py
cognitivegears Nov 22, 2024
ae5c7d7
Add JSON export functionality and update README with new argument
cognitivegears Nov 22, 2024
e12bfa2
Refactor heuristics scoring logic to use default thresholds from Defa…
cognitivegears Nov 22, 2024
e665ccb
Add risk assessment properties and update heuristics logic for packag…
cognitivegears Nov 22, 2024
68ec38d
Add risk assessment check and update export functions to include risk…
cognitivegears Nov 22, 2024
f9a2ea9
Update risk handling in combobulator.py to log identified risks and a…
cognitivegears Nov 22, 2024
65e6d1d
Add quiet mode option to suppress console output and adjust logging c…
cognitivegears Nov 22, 2024
d220fbd
Added CONTRIBUTERS.md file
cognitivegears Nov 24, 2024
676c0a5
Bump requests from 2.32.2 to 2.32.4
dependabot[bot] Jun 10, 2025
e61e19d
Bump org.apache.commons:commons-lang3 from 3.10 to 3.18.0 in /tests
dependabot[bot] Jul 12, 2025
6a8b963
Merge pull request #3 from cognitivegears/dependabot/pip/requests-2.32.4
cognitivegears Sep 3, 2025
37b7f70
Merge pull request #4 from cognitivegears/dependabot/maven/tests/org.…
cognitivegears Sep 3, 2025
ea0a833
Moved to uv
cognitivegears Sep 3, 2025
da2cf7c
Renamed to depgate
cognitivegears Sep 3, 2025
cf8709e
Updates for release
cognitivegears Sep 3, 2025
2d3c6a1
Updated README
cognitivegears Sep 4, 2025
19b3bc3
Bump actions/checkout from 4 to 5
dependabot[bot] Sep 4, 2025
f8e04b1
Update requests requirement from <2.32.5,>=2.32.4 to >=2.32.4,<2.32.6
dependabot[bot] Sep 4, 2025
904f163
Bump actions/download-artifact from 4 to 5
dependabot[bot] Sep 4, 2025
b6729ea
Merge pull request #5 from cognitivegears/dependabot/github_actions/a…
cognitivegears Sep 4, 2025
5e3ebaa
Merge pull request #6 from cognitivegears/dependabot/pip/requests-gte…
cognitivegears Sep 4, 2025
eb81f98
Merge pull request #7 from cognitivegears/dependabot/github_actions/a…
cognitivegears Sep 4, 2025
5a63e28
Small visual improvements
cognitivegears Sep 4, 2025
15e569a
Fixed some pylint warnings
cognitivegears Sep 7, 2025
1c6dd78
Added e2e tests
cognitivegears Sep 7, 2025
993168a
Added github action
cognitivegears Sep 7, 2025
70f259d
refactor(cli): extract helpers to reduce branches; keep lazy imports …
cognitivegears Sep 8, 2025
065f1d1
lint: add targeted pylint disables for data-holder classes; document …
cognitivegears Sep 8, 2025
bbac403
Changes for gitignore
cognitivegears Sep 8, 2025
38ff25d
Initial version of source code repository integration
cognitivegears Sep 8, 2025
346cb27
Modified to reduce duplicate code
cognitivegears Sep 8, 2025
f16666c
Extracted common code and moved
cognitivegears Sep 8, 2025
0b93293
Bump actions/setup-python from 5 to 6
dependabot[bot] Sep 8, 2025
e91dbe8
Bump actions/checkout from 4 to 5
dependabot[bot] Sep 8, 2025
be2b4a1
Added logging
cognitivegears Sep 9, 2025
9e56fc3
Added debug logging
cognitivegears Sep 9, 2025
73acbf2
Fixed lookup of npm repo information
cognitivegears Sep 9, 2025
bf9f95d
Matched version checking fixed
cognitivegears Sep 9, 2025
0d582af
Small change to wording
cognitivegears Sep 9, 2025
12e4657
Setting version information
cognitivegears Sep 9, 2025
95ae5c8
Fixed version lookup
cognitivegears Sep 9, 2025
6d27239
Fixed npm resolution for latest
cognitivegears Sep 9, 2025
abd8b82
Improved release and tag comparisons
cognitivegears Sep 10, 2025
3dffc03
Added tests for recent changes
cognitivegears Sep 10, 2025
ac013f3
General cleanup
cognitivegears Sep 10, 2025
4545380
Enhanced config file
cognitivegears Sep 10, 2025
c9d36a4
Added http rate limiting and retry support
cognitivegears Sep 10, 2025
7bec27f
updated example
cognitivegears Sep 10, 2025
ffadbf1
Initial version of policy based scans
cognitivegears Sep 10, 2025
6d6086e
Changed command line arguments
cognitivegears Sep 10, 2025
25a352d
Fixed pypi license checking
cognitivegears Sep 11, 2025
affbe69
Fixed npm license checking
cognitivegears Sep 11, 2025
02ffa8f
Fixed small bug with maven lookup
cognitivegears Sep 11, 2025
a5abc74
Added depsdev for further enrichment
cognitivegears Sep 11, 2025
5cd1532
Fixes scanning lock files
cognitivegears Sep 11, 2025
7f7735f
Added dev and test dep detection, transitive and direct
cognitivegears Sep 11, 2025
d49ca6f
Fixed regression in npm
cognitivegears Sep 11, 2025
851f8b4
Fixed bug with npm naming
cognitivegears Sep 12, 2025
0ac25b6
Refactoring
cognitivegears Sep 12, 2025
fd8eb39
Added new linked mode to validate package linkage
cognitivegears Sep 12, 2025
a916748
Updated to use scan semantics
cognitivegears Sep 12, 2025
4bdbe42
Updated version
cognitivegears Sep 12, 2025
c5a94fe
Added linked policy type
cognitivegears Sep 12, 2025
93a23b1
Added partial match support
cognitivegears Sep 16, 2025
1ba0c19
Merge pull request #8 from cognitivegears/dependabot/github_actions/a…
cognitivegears Sep 16, 2025
5815d8b
Merge pull request #9 from cognitivegears/dependabot/github_actions/a…
cognitivegears Sep 16, 2025
362e6b5
Initial version of MCP
cognitivegears Oct 18, 2025
878d5f1
Fixed pylint issues
cognitivegears Oct 18, 2025
41a188b
Bump actions/upload-artifact from 4 to 5
dependabot[bot] Oct 27, 2025
3c6742b
Bump actions/download-artifact from 5 to 6
dependabot[bot] Oct 27, 2025
f6813b3
Landed MCP support for Depgate
cognitivegears Nov 5, 2025
beeb516
Merge pull request #10 from cognitivegears/dependabot/github_actions/…
cognitivegears Nov 5, 2025
81b8f14
Merge pull request #11 from cognitivegears/dependabot/github_actions/…
cognitivegears Nov 5, 2025
675e2f9
Code review changes
cognitivegears Nov 5, 2025
9881774
Additional code improvements
cognitivegears Nov 5, 2025
ec9e64c
Additional code cleanup
cognitivegears Nov 5, 2025
7700f1e
Merge pull request #12 from cognitivegears/feature/mcp
cognitivegears Nov 5, 2025
007b190
Added fix for hanging
cognitivegears Nov 6, 2025
a082ed9
Changes to make warnings more obvious
cognitivegears Nov 6, 2025
b0c1d46
Additional code review changes
cognitivegears Nov 6, 2025
88f1f8f
Code review security changes
cognitivegears Nov 6, 2025
f73cb2a
Code review changes
cognitivegears Nov 6, 2025
dd92f25
Merge pull request #13 from cognitivegears/bugfix/mcp_hanging
cognitivegears Nov 6, 2025
21ec675
Bug fix for warnings
cognitivegears Nov 6, 2025
822f110
Merge pull request #14 from cognitivegears/bugfix/version_warning
cognitivegears Nov 6, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Matched version checking fixed
  • Loading branch information
cognitivegears committed Sep 9, 2025
commit bf9f95d7c2e16d7c5c860fd974f854e8a9ae1737
212 changes: 206 additions & 6 deletions src/analysis/heuristics.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import math
from datetime import datetime, timezone
from constants import Constants, DefaultHeuristics
from common.logging_utils import is_debug_enabled, extra_context

STG = f"{Constants.ANALYSIS} "
# Repository signals scoring constants
Expand Down Expand Up @@ -97,6 +98,173 @@ def compute_repo_signals_score(mp):

# Clamp the final score
return max(REPO_SCORE_CLAMP_MIN, min(REPO_SCORE_CLAMP_MAX, score))

def _clamp01(value):
"""Clamp a numeric value into [0.0, 1.0]."""
try:
v = float(value)
except Exception:
return 0.0
return 0.0 if v < 0.0 else 1.0 if v > 1.0 else v

def _norm_base_score(base):
"""Normalize an existing base score (already expected to be 0..1, but clamp defensively)."""
if base is None:
return None
try:
return _clamp01(float(base))
except Exception:
return None

def _norm_repo_stars(stars):
"""Normalize repository stars to [0,1] using a log scale that saturates around 10^3."""
if stars is None:
return None
try:
s = float(stars)
if s < 0:
s = 0.0
# Matches design: min(1.0, log10(stars+1)/3.0) — ~1.0 around 1k stars
return min(1.0, max(0.0, math.log10(s + 1.0) / 3.0))
except Exception:
return None

def _norm_repo_contributors(contrib):
"""Normalize repository contributors to [0,1], saturating at ~50 contributors."""
if contrib is None:
return None
try:
c = float(contrib)
if c < 0:
c = 0.0
return min(1.0, max(0.0, c / 50.0))
except Exception:
return None

def _parse_iso_to_days(iso_ts):
"""Parse ISO-8601 timestamp and return days since that time (int)."""
try:
if isinstance(iso_ts, str):
if iso_ts.endswith('Z'):
dt = datetime.fromisoformat(iso_ts[:-1])
else:
dt = datetime.fromisoformat(iso_ts)
if dt.tzinfo is None:
dt = dt.replace(tzinfo=timezone.utc)
now = datetime.now(timezone.utc)
return (now - dt).days
except Exception:
return None
return None

def _norm_repo_last_activity(iso_ts):
"""Normalize last activity recency into [0,1] using tiered thresholds."""
if not iso_ts:
return None
days = _parse_iso_to_days(iso_ts)
if days is None:
return None
if days <= 30:
return 1.0
if days <= 365:
return 0.6
if days <= 730:
return 0.3
return 0.0

def _norm_bool(flag):
"""Normalize boolean to [0,1]; None -> None (missing)."""
if flag is None:
return None
return 1.0 if bool(flag) else 0.0

def _norm_version_match(vm):
"""Normalize version match dict to [0,1]. True match => 1.0; else 0.0; None => missing."""
if vm is None:
return None
try:
return 1.0 if bool(vm.get('matched', False)) else 0.0
except Exception:
return None

def compute_final_score(mp):
"""Compute the final normalized score in [0,1] with per-heuristic breakdown and weights.

Normalized inputs (each in [0,1], None if missing):
- base_score (existing pkg.score if provided)
- repo_version_match
- repo_stars
- repo_contributors
- repo_last_activity
- repo_present_in_registry

Default weights (sum to 1.0 when all present; re-normalized when some are missing):
- base_score: 0.30
- repo_version_match: 0.30
- repo_stars: 0.15
- repo_contributors: 0.10
- repo_last_activity: 0.10
- repo_present_in_registry: 0.05

Returns:
tuple(final_score: float, breakdown: dict, weights_used: dict)
"""
# Raw values
raw = {
'base_score': getattr(mp, 'score', None),
'repo_version_match': getattr(mp, 'repo_version_match', None),
'repo_stars': getattr(mp, 'repo_stars', None),
'repo_contributors': getattr(mp, 'repo_contributors', None),
'repo_last_activity': getattr(mp, 'repo_last_activity_at', None),
'repo_present_in_registry': getattr(mp, 'repo_present_in_registry', None),
}

# Normalized values
norm = {
'base_score': _norm_base_score(raw['base_score']),
'repo_version_match': _norm_version_match(raw['repo_version_match']),
'repo_stars': _norm_repo_stars(raw['repo_stars']),
'repo_contributors': _norm_repo_contributors(raw['repo_contributors']),
'repo_last_activity': _norm_repo_last_activity(raw['repo_last_activity']),
# Treat default/unknown False as missing to avoid penalizing base-only scenarios
'repo_present_in_registry': _norm_bool(raw['repo_present_in_registry']),
}
# If present_in_registry is False (normalized 0.0) and no normalized repo URL exists,
# consider it missing (None) for scoring/weight renormalization purposes.
if norm['repo_present_in_registry'] == 0.0 and getattr(mp, 'repo_url_normalized', None) is None:
norm['repo_present_in_registry'] = None

# Default weights
weights = {
'base_score': 0.30,
'repo_version_match': 0.30,
'repo_stars': 0.15,
'repo_contributors': 0.10,
'repo_last_activity': 0.10,
'repo_present_in_registry': 0.05,
}

# Re-normalize weights to only those metrics that are present (norm != None)
available = [k for k, v in norm.items() if v is not None]
total_w = sum(weights[k] for k in available) if available else 0.0
if total_w <= 0.0:
breakdown = {k: {'raw': raw[k], 'normalized': norm[k]} for k in norm.keys()}
return 0.0, breakdown, {}

weights_used = {k: weights[k] / total_w for k in available}

# Weighted sum ensures range [0,1] since each component is clamped and weights sum to 1
final = 0.0
for k in available:
val = norm.get(k)
if val is None:
continue
final += float(val) * weights_used[k]
final = _clamp01(final)

breakdown = {k: {'raw': raw[k], 'normalized': norm[k]} for k in norm.keys()}
return final, breakdown, weights_used

def combobulate_min(pkgs):
"""Run to check the existence of the packages in the registry.

Expand All @@ -112,15 +280,47 @@ def combobulate_heur(pkgs):
Args:
pkgs (list): List of packages to check.
"""
logger = logging.getLogger(__name__)
for x in pkgs:
test_exists(x)
if x.exists is True:
# Add repository signals score to existing score
repo_score = compute_repo_signals_score(x)
if x.score is not None:
x.score += repo_score
else:
x.score = repo_score
# Compute final normalized score in [0,1] using available metrics
final_score, breakdown, weights_used = compute_final_score(x)
x.score = final_score
if is_debug_enabled(logger):
logger.debug(
"Heuristics score breakdown",
extra=extra_context(
event="analysis",
component="heuristics",
action="score_breakdown",
package_name=str(x),
final_score=final_score,
weights=weights_used,
breakdown=breakdown,
),
)
# Emit [ANALYSIS] lines for repository signals
try:
if getattr(x, "repo_stars", None) is not None:
logging.info("%s.... repository stars: %s.", STG, str(x.repo_stars))
if getattr(x, "repo_contributors", None) is not None:
logging.info("%s.... repository contributors: %s.", STG, str(x.repo_contributors))
if getattr(x, "repo_last_activity_at", None):
_days = _parse_iso_to_days(x.repo_last_activity_at)
if _days is not None:
logging.info("%s.... repository last activity %d days ago.", STG, int(_days))
if getattr(x, "repo_present_in_registry", None) is not None:
logging.info("%s.... repository present in registry: %s.", STG, str(x.repo_present_in_registry))
if getattr(x, "repo_version_match", None) is not None:
try:
_matched = bool(x.repo_version_match.get('matched', False))
logging.info("%s.... repository version match: %s.", STG, "yes" if _matched else "no")
except Exception:
logging.info("%s.... repository version match: unavailable.", STG)
except Exception:
# Do not break analysis on logging issues
pass
test_score(x)
test_timestamp(x)
test_version_count(x)
Expand Down
10 changes: 10 additions & 0 deletions src/depgate.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,11 @@ def export_csv(instances, path):
"Risk: Min Versions",
"Risk: Too New",
"Risk: Any Risks",
"repo_stars",
"repo_contributors",
"repo_last_activity",
"repo_present_in_registry",
"repo_version_match",
]
rows = [headers]
for x in instances:
Expand Down Expand Up @@ -140,6 +145,11 @@ def export_json(instances, path):
"score": x.score,
"versionCount": x.version_count,
"createdTimestamp": x.timestamp,
"repo_stars": x.repo_stars,
"repo_contributors": x.repo_contributors,
"repo_last_activity": x.repo_last_activity_at,
"repo_present_in_registry": (None if (getattr(x, "repo_url_normalized", None) is None and x.repo_present_in_registry is False) else x.repo_present_in_registry),
"repo_version_match": x.repo_version_match,
"risk": {
"hasRisk": x.has_risk(),
"isMissing": x.risk_missing,
Expand Down
26 changes: 24 additions & 2 deletions src/metapackage.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,9 @@ def listall(self):
Returns:
list: List of all the attributes of the class.
"""
def nv(v):
return "" if v is None else v

lister = []
lister.append(self._pkg_name)
lister.append(self._pkg_type)
Expand All @@ -79,6 +82,25 @@ def listall(self):
lister.append(self._risk_min_versions)
lister.append(self._risk_too_new)
lister.append(self.has_risk())

# New repo_* CSV columns (empty string for missing)
lister.append(nv(self._repo_stars))
lister.append(nv(self._repo_contributors))
lister.append(nv(self._repo_last_activity_at))
# CSV default handling: empty when not set; if explicitly False but no normalized repo URL,
# treat as missing for CSV (empty)
if (self._repo_present_in_registry is False) and (self._repo_url_normalized is None):
lister.append("")
else:
lister.append(nv(self._repo_present_in_registry))
if self._repo_version_match is None:
lister.append("")
else:
try:
lister.append(bool(self._repo_version_match.get('matched')))
except Exception: # defensive: malformed dict
lister.append("")

return lister

@staticmethod
Expand Down Expand Up @@ -405,7 +427,7 @@ def repo_present_in_registry(self):
"""Property for repository presence in registry.

Returns:
bool: True if repository URL is present in package registry
bool or None: True if repository URL is present in package registry; None if unknown
"""
return self._repo_present_in_registry

Expand All @@ -418,7 +440,7 @@ def repo_resolved(self):
"""Property for repository resolution status.

Returns:
bool: True if repository URL has been resolved and validated
bool or None: True if repository URL has been resolved and validated; None if unknown
"""
return self._repo_resolved

Expand Down
4 changes: 2 additions & 2 deletions src/registry/maven/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from typing import List

from constants import ExitCodes, Constants
from common.http_client import safe_get
import common.http_client as http_client
from common.logging_utils import extra_context, is_debug_enabled, Timer, safe_url, redact

from .enrich import _enrich_with_repo # Not used here but kept for parity if needed later
Expand Down Expand Up @@ -49,7 +49,7 @@ def recv_pkg_info(pkgs, url: str = Constants.REGISTRY_URL_MAVEN) -> None:
headers = {"Accept": "application/json", "Content-Type": "application/json"}
# Sleep to avoid rate limiting
time.sleep(0.1)
res = safe_get(url, context="maven", params=payload, headers=headers)
res = http_client.safe_get(url, context="maven", params=payload, headers=headers)
except SystemExit:
# safe_get calls sys.exit on errors, so we need to catch and re-raise as exception
logger.error(
Expand Down
20 changes: 16 additions & 4 deletions src/repository/provider_adapters.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,15 @@ def get_releases(self, owner: str, repo: str) -> List[Dict[str, str]]:
repo: Repository name

Returns:
List of release dictionaries
List of release dictionaries. Falls back to tags if releases are empty.
"""
return self.client.get_releases(owner, repo)
releases = self.client.get_releases(owner, repo)
if releases:
return releases

# Fallback: use tags when releases are unavailable to enable version matching
tags = self.client.get_tags(owner, repo)
return tags or []


class GitLabProviderAdapter(ProviderClient):
Expand Down Expand Up @@ -123,6 +129,12 @@ def get_releases(self, owner: str, repo: str) -> List[Dict[str, str]]:
repo: Project name

Returns:
List of release dictionaries
List of release dictionaries. Falls back to tags if releases are empty.
"""
return self.client.get_releases(owner, repo)
releases = self.client.get_releases(owner, repo)
if releases:
return releases

# Fallback: use tags when releases are unavailable to enable version matching
tags = self.client.get_tags(owner, repo)
return tags or []
Loading