-
-
Notifications
You must be signed in to change notification settings - Fork 630
feat(runfiles): support for --incompatible_compact_repo_mapping_manifest #3277
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
2759525
590637f
f471b7f
ab34ab7
7009cef
0ab9cf6
0497f17
268f00b
79b36c5
fe77b24
c4b89d6
ffda0af
0e14bca
ca06786
f0558a1
76e04d0
b18cd54
ba410b7
48ff63f
13c756c
8f9b0f7
7491694
f158d87
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,14 +15,132 @@ | |
"""Runfiles lookup library for Bazel-built Python binaries and tests. | ||
|
||
See @rules_python//python/runfiles/README.md for usage instructions. | ||
|
||
:::{versionadded} VERSION_NEXT_FEATURE | ||
Support for Bazel's `--incompatible_compact_repo_mapping_manifest` flag was added. | ||
This enables prefix-based repository mappings to reduce memory usage for large | ||
dependency graphs under bzlmod. | ||
::: | ||
""" | ||
import collections.abc | ||
import inspect | ||
import os | ||
import posixpath | ||
import sys | ||
from collections import defaultdict | ||
from typing import Dict, Optional, Tuple, Union | ||
|
||
|
||
class _RepositoryMapping: | ||
"""Repository mapping for resolving apparent repository names to canonical ones. | ||
|
||
Handles both exact mappings and prefix-based mappings introduced by the | ||
--incompatible_compact_repo_mapping_manifest flag. | ||
""" | ||
|
||
def __init__( | ||
self, | ||
exact_mappings: Dict[Tuple[str, str], str], | ||
prefixed_mappings: Dict[Tuple[str, str], str], | ||
) -> None: | ||
"""Initialize repository mapping with exact and prefixed mappings. | ||
|
||
Args: | ||
exact_mappings: Dict mapping (source_canonical, target_apparent) -> target_canonical | ||
prefixed_mappings: Dict mapping (source_prefix, target_apparent) -> target_canonical | ||
""" | ||
self._exact_mappings = exact_mappings | ||
self._prefixed_mappings = prefixed_mappings | ||
|
||
# Group prefixed mappings by target_apparent for faster lookups | ||
self._grouped_prefixed_mappings = defaultdict(list) | ||
for ( | ||
prefix_source, | ||
target_app, | ||
), target_canonical in self._prefixed_mappings.items(): | ||
self._grouped_prefixed_mappings[target_app].append( | ||
(prefix_source, target_canonical) | ||
) | ||
|
||
@staticmethod | ||
def create_from_file(repo_mapping_path: Optional[str]) -> "_RepositoryMapping": | ||
"""Create RepositoryMapping from a repository mapping manifest file. | ||
|
||
Args: | ||
repo_mapping_path: Path to the repository mapping file, or None if not available | ||
|
||
Returns: | ||
RepositoryMapping instance with parsed mappings | ||
""" | ||
# If the repository mapping file can't be found, that is not an error: We | ||
# might be running without Bzlmod enabled or there may not be any runfiles. | ||
# In this case, just apply empty repo mappings. | ||
if not repo_mapping_path: | ||
return _RepositoryMapping({}, {}) | ||
|
||
try: | ||
with open(repo_mapping_path, "r", encoding="utf-8", newline="\n") as f: | ||
content = f.read() | ||
except FileNotFoundError: | ||
return _RepositoryMapping({}, {}) | ||
|
||
exact_mappings = {} | ||
prefixed_mappings = {} | ||
for line in content.splitlines(): | ||
source_canonical, target_apparent, target_canonical = line.split(",") | ||
if source_canonical.endswith("*"): | ||
# This is a prefixed mapping - remove the '*' for prefix matching | ||
prefix = source_canonical[:-1] | ||
prefixed_mappings[(prefix, target_apparent)] = target_canonical | ||
else: | ||
# This is an exact mapping | ||
exact_mappings[(source_canonical, target_apparent)] = target_canonical | ||
|
||
return _RepositoryMapping(exact_mappings, prefixed_mappings) | ||
|
||
def lookup(self, source_repo: Optional[str], target_apparent: str) -> Optional[str]: | ||
"""Look up repository mapping for the given source and target. | ||
|
||
This handles both exact mappings and prefix-based mappings introduced by the | ||
--incompatible_compact_repo_mapping_manifest flag. Exact mappings are tried | ||
first, followed by prefix-based mappings where order matters. | ||
|
||
Args: | ||
source_repo: Source canonical repository name | ||
target_apparent: Target apparent repository name | ||
|
||
Returns: | ||
target_canonical repository name, or None if no mapping exists | ||
""" | ||
if source_repo is None: | ||
return None | ||
|
||
key = (source_repo, target_apparent) | ||
|
||
# Try exact mapping first | ||
if key in self._exact_mappings: | ||
return self._exact_mappings[key] | ||
|
||
# Try prefixed mapping if no exact match found | ||
if target_apparent in self._grouped_prefixed_mappings: | ||
for prefix_source, target_canonical in self._grouped_prefixed_mappings[ | ||
target_apparent | ||
]: | ||
if source_repo.startswith(prefix_source): | ||
return target_canonical | ||
|
||
# No mapping found | ||
return None | ||
|
||
def is_empty(self) -> bool: | ||
"""Check if this repository mapping is empty (no exact or prefixed mappings). | ||
|
||
Returns: | ||
True if there are no mappings, False otherwise | ||
""" | ||
return len(self._exact_mappings) == 0 and len(self._prefixed_mappings) == 0 | ||
|
||
|
||
class _ManifestBased: | ||
"""`Runfiles` strategy that parses a runfiles-manifest to look up runfiles.""" | ||
|
||
|
@@ -130,7 +248,7 @@ class Runfiles: | |
def __init__(self, strategy: Union[_ManifestBased, _DirectoryBased]) -> None: | ||
self._strategy = strategy | ||
self._python_runfiles_root = _FindPythonRunfilesRoot() | ||
self._repo_mapping = _ParseRepoMapping( | ||
self._repo_mapping = _RepositoryMapping.create_from_file( | ||
strategy.RlocationChecked("_repo_mapping") | ||
) | ||
|
||
|
@@ -179,7 +297,7 @@ def Rlocation(self, path: str, source_repo: Optional[str] = None) -> Optional[st | |
if os.path.isabs(path): | ||
return path | ||
|
||
if source_repo is None and self._repo_mapping: | ||
if source_repo is None and not self._repo_mapping.is_empty(): | ||
# Look up runfiles using the repository mapping of the caller of the | ||
# current method. If the repo mapping is empty, determining this | ||
# name is not necessary. | ||
|
@@ -188,7 +306,7 @@ def Rlocation(self, path: str, source_repo: Optional[str] = None) -> Optional[st | |
# Split off the first path component, which contains the repository | ||
# name (apparent or canonical). | ||
target_repo, _, remainder = path.partition("/") | ||
if not remainder or (source_repo, target_repo) not in self._repo_mapping: | ||
if not remainder or self._repo_mapping.lookup(source_repo, target_repo) is None: | ||
# One of the following is the case: | ||
# - not using Bzlmod, so the repository mapping is empty and | ||
# apparent and canonical repository names are the same | ||
|
@@ -202,11 +320,17 @@ def Rlocation(self, path: str, source_repo: Optional[str] = None) -> Optional[st | |
source_repo is not None | ||
), "BUG: if the `source_repo` is None, we should never go past the `if` statement above" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. TODO: This assertion may still be important. I need to reassess above whether we need to match the previous conditional. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think an equivalent assertion is needed with the new logic, but I'm open to more thoughts on it. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please keep the assert. I'm not entirely sure if we have test coverage of all the cases. Looking at the code, I don't see why the comment wouldn't still apply. source_repo can end up None here if the repo mapping is empty. (which, according to comments, occurs for workspace mode) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Restored. |
||
|
||
# target_repo is an apparent repository name. Look up the corresponding | ||
# canonical repository name with respect to the current repository, | ||
# identified by its canonical name. | ||
target_canonical = self._repo_mapping[(source_repo, target_repo)] | ||
return self._strategy.RlocationChecked(target_canonical + "/" + remainder) | ||
# Look up the target repository using the repository mapping | ||
if source_repo is not None: | ||
target_canonical = self._repo_mapping.lookup(source_repo, target_repo) | ||
if target_canonical is not None: | ||
return self._strategy.RlocationChecked( | ||
target_canonical + "/" + remainder | ||
) | ||
|
||
# No mapping found - assume target_repo is already canonical or | ||
# we're not using Bzlmod | ||
return self._strategy.RlocationChecked(path) | ||
|
||
def EnvVars(self) -> Dict[str, str]: | ||
"""Returns environment variables for subprocesses. | ||
|
@@ -359,30 +483,6 @@ def _FindPythonRunfilesRoot() -> str: | |
return root | ||
|
||
|
||
def _ParseRepoMapping(repo_mapping_path: Optional[str]) -> Dict[Tuple[str, str], str]: | ||
"""Parses the repository mapping manifest.""" | ||
# If the repository mapping file can't be found, that is not an error: We | ||
# might be running without Bzlmod enabled or there may not be any runfiles. | ||
# In this case, just apply an empty repo mapping. | ||
if not repo_mapping_path: | ||
return {} | ||
try: | ||
with open(repo_mapping_path, "r", encoding="utf-8", newline="\n") as f: | ||
content = f.read() | ||
except FileNotFoundError: | ||
return {} | ||
|
||
repo_mapping = {} | ||
for line in content.split("\n"): | ||
if not line: | ||
# Empty line following the last line break | ||
break | ||
current_canonical, target_local, target_canonical = line.split(",") | ||
repo_mapping[(current_canonical, target_local)] = target_canonical | ||
|
||
return repo_mapping | ||
|
||
|
||
def CreateManifestBased(manifest_path: str) -> Runfiles: | ||
return Runfiles.CreateManifestBased(manifest_path) | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The loop here doesn't account for empty or whitespace-only lines that
content.splitlines()
might produce. This could lead to aValueError
when trying to unpack the result ofline.split(',')
on the next line. It's safer to strip each line and skip it if it's empty.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It may be best to error out anyway if repo mappings diverged from expected format in the ways described here.