diff --git a/CHANGELOG.md b/CHANGELOG.md index 3d06a68935..469e9d3612 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -85,6 +85,8 @@ END_UNRELEASED_TEMPLATE {#v0-0-0-added} ### Added +* (runfiles) The Python runfiles library now supports Bazel's + `--incompatible_compact_repo_mapping_manifest` flag. * (bootstrap) {obj}`--bootstrap_impl=system_python` now supports the {obj}`main_module` attribute. * (bootstrap) {obj}`--bootstrap_impl=system_python` now supports the diff --git a/python/runfiles/runfiles.py b/python/runfiles/runfiles.py index 3943be5646..847d9cbfd7 100644 --- a/python/runfiles/runfiles.py +++ b/python/runfiles/runfiles.py @@ -15,14 +15,132 @@ """Runfiles lookup library for Bazel-built Python binaries and tests. See @rules_python//python/runfiles/README.md for usage instructions. + +:::{versionadded} VERSION_NEXT_FEATURE +Support for Bazel's `--incompatible_compact_repo_mapping_manifest` flag was added. +This enables prefix-based repository mappings to reduce memory usage for large +dependency graphs under bzlmod. +::: """ +import collections.abc import inspect import os import posixpath import sys +from collections import defaultdict from typing import Dict, Optional, Tuple, Union +class _RepositoryMapping: + """Repository mapping for resolving apparent repository names to canonical ones. + + Handles both exact mappings and prefix-based mappings introduced by the + --incompatible_compact_repo_mapping_manifest flag. + """ + + def __init__( + self, + exact_mappings: Dict[Tuple[str, str], str], + prefixed_mappings: Dict[Tuple[str, str], str], + ) -> None: + """Initialize repository mapping with exact and prefixed mappings. + + Args: + exact_mappings: Dict mapping (source_canonical, target_apparent) -> target_canonical + prefixed_mappings: Dict mapping (source_prefix, target_apparent) -> target_canonical + """ + self._exact_mappings = exact_mappings + self._prefixed_mappings = prefixed_mappings + + # Group prefixed mappings by target_apparent for faster lookups + self._grouped_prefixed_mappings = defaultdict(list) + for ( + prefix_source, + target_app, + ), target_canonical in self._prefixed_mappings.items(): + self._grouped_prefixed_mappings[target_app].append( + (prefix_source, target_canonical) + ) + + @staticmethod + def create_from_file(repo_mapping_path: Optional[str]) -> "_RepositoryMapping": + """Create RepositoryMapping from a repository mapping manifest file. + + Args: + repo_mapping_path: Path to the repository mapping file, or None if not available + + Returns: + RepositoryMapping instance with parsed mappings + """ + # If the repository mapping file can't be found, that is not an error: We + # might be running without Bzlmod enabled or there may not be any runfiles. + # In this case, just apply empty repo mappings. + if not repo_mapping_path: + return _RepositoryMapping({}, {}) + + try: + with open(repo_mapping_path, "r", encoding="utf-8", newline="\n") as f: + content = f.read() + except FileNotFoundError: + return _RepositoryMapping({}, {}) + + exact_mappings = {} + prefixed_mappings = {} + for line in content.splitlines(): + source_canonical, target_apparent, target_canonical = line.split(",") + if source_canonical.endswith("*"): + # This is a prefixed mapping - remove the '*' for prefix matching + prefix = source_canonical[:-1] + prefixed_mappings[(prefix, target_apparent)] = target_canonical + else: + # This is an exact mapping + exact_mappings[(source_canonical, target_apparent)] = target_canonical + + return _RepositoryMapping(exact_mappings, prefixed_mappings) + + def lookup(self, source_repo: Optional[str], target_apparent: str) -> Optional[str]: + """Look up repository mapping for the given source and target. + + This handles both exact mappings and prefix-based mappings introduced by the + --incompatible_compact_repo_mapping_manifest flag. Exact mappings are tried + first, followed by prefix-based mappings where order matters. + + Args: + source_repo: Source canonical repository name + target_apparent: Target apparent repository name + + Returns: + target_canonical repository name, or None if no mapping exists + """ + if source_repo is None: + return None + + key = (source_repo, target_apparent) + + # Try exact mapping first + if key in self._exact_mappings: + return self._exact_mappings[key] + + # Try prefixed mapping if no exact match found + if target_apparent in self._grouped_prefixed_mappings: + for prefix_source, target_canonical in self._grouped_prefixed_mappings[ + target_apparent + ]: + if source_repo.startswith(prefix_source): + return target_canonical + + # No mapping found + return None + + def is_empty(self) -> bool: + """Check if this repository mapping is empty (no exact or prefixed mappings). + + Returns: + True if there are no mappings, False otherwise + """ + return len(self._exact_mappings) == 0 and len(self._prefixed_mappings) == 0 + + class _ManifestBased: """`Runfiles` strategy that parses a runfiles-manifest to look up runfiles.""" @@ -130,7 +248,7 @@ class Runfiles: def __init__(self, strategy: Union[_ManifestBased, _DirectoryBased]) -> None: self._strategy = strategy self._python_runfiles_root = _FindPythonRunfilesRoot() - self._repo_mapping = _ParseRepoMapping( + self._repo_mapping = _RepositoryMapping.create_from_file( strategy.RlocationChecked("_repo_mapping") ) @@ -179,7 +297,7 @@ def Rlocation(self, path: str, source_repo: Optional[str] = None) -> Optional[st if os.path.isabs(path): return path - if source_repo is None and self._repo_mapping: + if source_repo is None and not self._repo_mapping.is_empty(): # Look up runfiles using the repository mapping of the caller of the # current method. If the repo mapping is empty, determining this # name is not necessary. @@ -188,7 +306,7 @@ def Rlocation(self, path: str, source_repo: Optional[str] = None) -> Optional[st # Split off the first path component, which contains the repository # name (apparent or canonical). target_repo, _, remainder = path.partition("/") - if not remainder or (source_repo, target_repo) not in self._repo_mapping: + if not remainder or self._repo_mapping.lookup(source_repo, target_repo) is None: # One of the following is the case: # - not using Bzlmod, so the repository mapping is empty and # apparent and canonical repository names are the same @@ -202,11 +320,17 @@ def Rlocation(self, path: str, source_repo: Optional[str] = None) -> Optional[st source_repo is not None ), "BUG: if the `source_repo` is None, we should never go past the `if` statement above" - # target_repo is an apparent repository name. Look up the corresponding - # canonical repository name with respect to the current repository, - # identified by its canonical name. - target_canonical = self._repo_mapping[(source_repo, target_repo)] - return self._strategy.RlocationChecked(target_canonical + "/" + remainder) + # Look up the target repository using the repository mapping + if source_repo is not None: + target_canonical = self._repo_mapping.lookup(source_repo, target_repo) + if target_canonical is not None: + return self._strategy.RlocationChecked( + target_canonical + "/" + remainder + ) + + # No mapping found - assume target_repo is already canonical or + # we're not using Bzlmod + return self._strategy.RlocationChecked(path) def EnvVars(self) -> Dict[str, str]: """Returns environment variables for subprocesses. @@ -359,30 +483,6 @@ def _FindPythonRunfilesRoot() -> str: return root -def _ParseRepoMapping(repo_mapping_path: Optional[str]) -> Dict[Tuple[str, str], str]: - """Parses the repository mapping manifest.""" - # If the repository mapping file can't be found, that is not an error: We - # might be running without Bzlmod enabled or there may not be any runfiles. - # In this case, just apply an empty repo mapping. - if not repo_mapping_path: - return {} - try: - with open(repo_mapping_path, "r", encoding="utf-8", newline="\n") as f: - content = f.read() - except FileNotFoundError: - return {} - - repo_mapping = {} - for line in content.split("\n"): - if not line: - # Empty line following the last line break - break - current_canonical, target_local, target_canonical = line.split(",") - repo_mapping[(current_canonical, target_local)] = target_canonical - - return repo_mapping - - def CreateManifestBased(manifest_path: str) -> Runfiles: return Runfiles.CreateManifestBased(manifest_path) diff --git a/tests/runfiles/runfiles_test.py b/tests/runfiles/runfiles_test.py index a3837ac842..b8a3d5f7b7 100644 --- a/tests/runfiles/runfiles_test.py +++ b/tests/runfiles/runfiles_test.py @@ -18,6 +18,7 @@ from typing import Any, List, Optional from python.runfiles import runfiles +from python.runfiles.runfiles import _RepositoryMapping class RunfilesTest(unittest.TestCase): @@ -525,6 +526,165 @@ def testDirectoryBasedRlocationWithRepoMappingFromOtherRepo(self) -> None: r.Rlocation("config.json", "protobuf~3.19.2"), dir + "/config.json" ) + def testDirectoryBasedRlocationWithCompactRepoMappingFromMain(self) -> None: + """Test repository mapping with prefix-based entries (compact format).""" + with _MockFile( + name="_repo_mapping", + contents=[ + # Exact mappings (no asterisk) + "_,config.json,config.json~1.2.3", + ",my_module,_main", + ",my_workspace,_main", + # Prefixed mappings (with asterisk) - these apply to any repo starting with the prefix + "deps+*,external_dep,external_dep~1.0.0", + "test_deps+*,test_lib,test_lib~2.1.0", + ], + ) as rm: + dir = os.path.dirname(rm.Path()) + r = runfiles.CreateDirectoryBased(dir) + + # Test exact mappings still work + self.assertEqual( + r.Rlocation("my_module/bar/runfile", ""), dir + "/_main/bar/runfile" + ) + self.assertEqual( + r.Rlocation("my_workspace/bar/runfile", ""), dir + "/_main/bar/runfile" + ) + + # Test prefixed mappings - should match any repo starting with "deps+" + self.assertEqual( + r.Rlocation("external_dep/foo/file", "deps+dep1"), + dir + "/external_dep~1.0.0/foo/file", + ) + self.assertEqual( + r.Rlocation("external_dep/bar/file", "deps+dep2"), + dir + "/external_dep~1.0.0/bar/file", + ) + self.assertEqual( + r.Rlocation("external_dep/nested/path/file", "deps+some_long_dep_name"), + dir + "/external_dep~1.0.0/nested/path/file", + ) + + # Test that prefixed mappings work for test_deps+ prefix too + self.assertEqual( + r.Rlocation("test_lib/test/file", "test_deps+junit"), + dir + "/test_lib~2.1.0/test/file", + ) + + # Test that non-matching prefixes don't match + self.assertEqual( + r.Rlocation("external_dep/foo/file", "other_prefix"), + dir + "/external_dep/foo/file", # No mapping applied, use as-is + ) + + def testDirectoryBasedRlocationWithCompactRepoMappingPrecedence(self) -> None: + """Test that exact mappings take precedence over prefixed mappings.""" + with _MockFile( + name="_repo_mapping", + contents=[ + # Exact mapping for a specific source repo + "deps+specific_repo,external_dep,external_dep~exact", + # Prefixed mapping for repos starting with "deps+" + "deps+*,external_dep,external_dep~prefix", + # Another prefixed mapping with different prefix + "other+*,external_dep,external_dep~other", + ], + ) as rm: + dir = os.path.dirname(rm.Path()) + r = runfiles.CreateDirectoryBased(dir) + + # Exact mapping should take precedence over prefix + self.assertEqual( + r.Rlocation("external_dep/foo/file", "deps+specific_repo"), + dir + "/external_dep~exact/foo/file", + ) + + # Other repos with deps+ prefix should use the prefixed mapping + self.assertEqual( + r.Rlocation("external_dep/foo/file", "deps+other_repo"), + dir + "/external_dep~prefix/foo/file", + ) + + # Different prefix should use its own mapping + self.assertEqual( + r.Rlocation("external_dep/foo/file", "other+some_repo"), + dir + "/external_dep~other/foo/file", + ) + + def testDirectoryBasedRlocationWithCompactRepoMappingOrderMatters(self) -> None: + """Test that order matters for prefixed mappings (first match wins).""" + with _MockFile( + name="_repo_mapping", + contents=[ + # More specific prefix comes first + "deps+specific+*,lib,lib~specific", + # More general prefix comes second + "deps+*,lib,lib~general", + ], + ) as rm: + dir = os.path.dirname(rm.Path()) + r = runfiles.CreateDirectoryBased(dir) + + # Should match the more specific prefix first + self.assertEqual( + r.Rlocation("lib/foo/file", "deps+specific+repo"), + dir + "/lib~specific/foo/file", + ) + + # Should match the general prefix for non-specific repos + self.assertEqual( + r.Rlocation("lib/foo/file", "deps+other_repo"), + dir + "/lib~general/foo/file", + ) + + def testRepositoryMappingLookup(self) -> None: + """Test _RepositoryMapping.lookup() method for both exact and prefix-based mappings.""" + exact_mappings = { + ("", "my_workspace"): "_main", + ("", "config_lib"): "config_lib~1.0.0", + ("deps+specific_repo", "external_dep"): "external_dep~exact", + } + prefixed_mappings = { + ("deps+", "external_dep"): "external_dep~prefix", + ("test_deps+", "test_lib"): "test_lib~2.1.0", + } + + repo_mapping = _RepositoryMapping(exact_mappings, prefixed_mappings) + + # Test exact lookups + self.assertEqual(repo_mapping.lookup("", "my_workspace"), "_main") + self.assertEqual(repo_mapping.lookup("", "config_lib"), "config_lib~1.0.0") + self.assertEqual( + repo_mapping.lookup("deps+specific_repo", "external_dep"), + "external_dep~exact", + ) + + # Test prefix-based lookups + self.assertEqual( + repo_mapping.lookup("deps+some_repo", "external_dep"), "external_dep~prefix" + ) + self.assertEqual( + repo_mapping.lookup("test_deps+another_repo", "test_lib"), "test_lib~2.1.0" + ) + + # Test that exact takes precedence over prefix + self.assertEqual( + repo_mapping.lookup("deps+specific_repo", "external_dep"), + "external_dep~exact", + ) + + # Test non-existent mapping + self.assertIsNone(repo_mapping.lookup("nonexistent", "repo")) + self.assertIsNone(repo_mapping.lookup("unknown+repo", "missing")) + + # Test empty mapping + empty_mapping = _RepositoryMapping({}, {}) + self.assertIsNone(empty_mapping.lookup("any", "repo")) + + # Test is_empty() method + self.assertFalse(repo_mapping.is_empty()) # Should have mappings + self.assertTrue(empty_mapping.is_empty()) # Should be empty + def testCurrentRepository(self) -> None: # Under bzlmod, the current repository name is the empty string instead # of the name in the workspace file.