Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,8 @@ END_UNRELEASED_TEMPLATE

{#v0-0-0-added}
### Added
* (runfiles) The Python runfiles library now supports Bazel's
`--incompatible_compact_repo_mapping_manifest` flag.
* (bootstrap) {obj}`--bootstrap_impl=system_python` now supports the
{obj}`main_module` attribute.
* (bootstrap) {obj}`--bootstrap_impl=system_python` now supports the
Expand Down
164 changes: 132 additions & 32 deletions python/runfiles/runfiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,132 @@
"""Runfiles lookup library for Bazel-built Python binaries and tests.

See @rules_python//python/runfiles/README.md for usage instructions.

:::{versionadded} VERSION_NEXT_FEATURE
Support for Bazel's `--incompatible_compact_repo_mapping_manifest` flag was added.
This enables prefix-based repository mappings to reduce memory usage for large
dependency graphs under bzlmod.
:::
"""
import collections.abc
import inspect
import os
import posixpath
import sys
from collections import defaultdict
from typing import Dict, Optional, Tuple, Union


class _RepositoryMapping:
"""Repository mapping for resolving apparent repository names to canonical ones.

Handles both exact mappings and prefix-based mappings introduced by the
--incompatible_compact_repo_mapping_manifest flag.
"""

def __init__(
self,
exact_mappings: Dict[Tuple[str, str], str],
prefixed_mappings: Dict[Tuple[str, str], str],
) -> None:
"""Initialize repository mapping with exact and prefixed mappings.

Args:
exact_mappings: Dict mapping (source_canonical, target_apparent) -> target_canonical
prefixed_mappings: Dict mapping (source_prefix, target_apparent) -> target_canonical
"""
self._exact_mappings = exact_mappings
self._prefixed_mappings = prefixed_mappings

# Group prefixed mappings by target_apparent for faster lookups
self._grouped_prefixed_mappings = defaultdict(list)
for (
prefix_source,
target_app,
), target_canonical in self._prefixed_mappings.items():
self._grouped_prefixed_mappings[target_app].append(
(prefix_source, target_canonical)
)

@staticmethod
def create_from_file(repo_mapping_path: Optional[str]) -> "_RepositoryMapping":
"""Create RepositoryMapping from a repository mapping manifest file.

Args:
repo_mapping_path: Path to the repository mapping file, or None if not available

Returns:
RepositoryMapping instance with parsed mappings
"""
# If the repository mapping file can't be found, that is not an error: We
# might be running without Bzlmod enabled or there may not be any runfiles.
# In this case, just apply empty repo mappings.
if not repo_mapping_path:
return _RepositoryMapping({}, {})

try:
with open(repo_mapping_path, "r", encoding="utf-8", newline="\n") as f:
content = f.read()
except FileNotFoundError:
return _RepositoryMapping({}, {})

exact_mappings = {}
prefixed_mappings = {}
for line in content.splitlines():
source_canonical, target_apparent, target_canonical = line.split(",")
if source_canonical.endswith("*"):
# This is a prefixed mapping - remove the '*' for prefix matching
prefix = source_canonical[:-1]
prefixed_mappings[(prefix, target_apparent)] = target_canonical
else:
# This is an exact mapping
exact_mappings[(source_canonical, target_apparent)] = target_canonical

Comment on lines +89 to +98
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The loop here doesn't account for empty or whitespace-only lines that content.splitlines() might produce. This could lead to a ValueError when trying to unpack the result of line.split(',') on the next line. It's safer to strip each line and skip it if it's empty.

Suggested change
for line in content.splitlines():
source_canonical, target_apparent, target_canonical = line.split(",")
if source_canonical.endswith("*"):
# This is a prefixed mapping - remove the '*' for prefix matching
prefix = source_canonical[:-1]
prefixed_mappings[(prefix, target_apparent)] = target_canonical
else:
# This is an exact mapping
exact_mappings[(source_canonical, target_apparent)] = target_canonical
for line in content.splitlines():
line = line.strip()
if not line:
continue
source_canonical, target_apparent, target_canonical = line.split(",")
if source_canonical.endswith("*"):
# This is a prefixed mapping - remove the '*' for prefix matching
prefix = source_canonical[:-1]
prefixed_mappings[(prefix, target_apparent)] = target_canonical
else:
# This is an exact mapping
exact_mappings[(source_canonical, target_apparent)] = target_canonical

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It may be best to error out anyway if repo mappings diverged from expected format in the ways described here.

return _RepositoryMapping(exact_mappings, prefixed_mappings)

def lookup(self, source_repo: Optional[str], target_apparent: str) -> Optional[str]:
"""Look up repository mapping for the given source and target.

This handles both exact mappings and prefix-based mappings introduced by the
--incompatible_compact_repo_mapping_manifest flag. Exact mappings are tried
first, followed by prefix-based mappings where order matters.

Args:
source_repo: Source canonical repository name
target_apparent: Target apparent repository name

Returns:
target_canonical repository name, or None if no mapping exists
"""
if source_repo is None:
return None

key = (source_repo, target_apparent)

# Try exact mapping first
if key in self._exact_mappings:
return self._exact_mappings[key]

# Try prefixed mapping if no exact match found
if target_apparent in self._grouped_prefixed_mappings:
for prefix_source, target_canonical in self._grouped_prefixed_mappings[
target_apparent
]:
if source_repo.startswith(prefix_source):
return target_canonical

# No mapping found
return None

def is_empty(self) -> bool:
"""Check if this repository mapping is empty (no exact or prefixed mappings).

Returns:
True if there are no mappings, False otherwise
"""
return len(self._exact_mappings) == 0 and len(self._prefixed_mappings) == 0


class _ManifestBased:
"""`Runfiles` strategy that parses a runfiles-manifest to look up runfiles."""

Expand Down Expand Up @@ -130,7 +248,7 @@ class Runfiles:
def __init__(self, strategy: Union[_ManifestBased, _DirectoryBased]) -> None:
self._strategy = strategy
self._python_runfiles_root = _FindPythonRunfilesRoot()
self._repo_mapping = _ParseRepoMapping(
self._repo_mapping = _RepositoryMapping.create_from_file(
strategy.RlocationChecked("_repo_mapping")
)

Expand Down Expand Up @@ -179,7 +297,7 @@ def Rlocation(self, path: str, source_repo: Optional[str] = None) -> Optional[st
if os.path.isabs(path):
return path

if source_repo is None and self._repo_mapping:
if source_repo is None and not self._repo_mapping.is_empty():
# Look up runfiles using the repository mapping of the caller of the
# current method. If the repo mapping is empty, determining this
# name is not necessary.
Expand All @@ -188,7 +306,7 @@ def Rlocation(self, path: str, source_repo: Optional[str] = None) -> Optional[st
# Split off the first path component, which contains the repository
# name (apparent or canonical).
target_repo, _, remainder = path.partition("/")
if not remainder or (source_repo, target_repo) not in self._repo_mapping:
if not remainder or self._repo_mapping.lookup(source_repo, target_repo) is None:
# One of the following is the case:
# - not using Bzlmod, so the repository mapping is empty and
# apparent and canonical repository names are the same
Expand All @@ -202,11 +320,17 @@ def Rlocation(self, path: str, source_repo: Optional[str] = None) -> Optional[st
source_repo is not None
), "BUG: if the `source_repo` is None, we should never go past the `if` statement above"
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TODO: This assertion may still be important. I need to reassess above whether we need to match the previous conditional.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think an equivalent assertion is needed with the new logic, but I'm open to more thoughts on it.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please keep the assert. I'm not entirely sure if we have test coverage of all the cases.

Looking at the code, I don't see why the comment wouldn't still apply. source_repo can end up None here if the repo mapping is empty. (which, according to comments, occurs for workspace mode)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Restored.


# target_repo is an apparent repository name. Look up the corresponding
# canonical repository name with respect to the current repository,
# identified by its canonical name.
target_canonical = self._repo_mapping[(source_repo, target_repo)]
return self._strategy.RlocationChecked(target_canonical + "/" + remainder)
# Look up the target repository using the repository mapping
if source_repo is not None:
target_canonical = self._repo_mapping.lookup(source_repo, target_repo)
if target_canonical is not None:
return self._strategy.RlocationChecked(
target_canonical + "/" + remainder
)

# No mapping found - assume target_repo is already canonical or
# we're not using Bzlmod
return self._strategy.RlocationChecked(path)

def EnvVars(self) -> Dict[str, str]:
"""Returns environment variables for subprocesses.
Expand Down Expand Up @@ -359,30 +483,6 @@ def _FindPythonRunfilesRoot() -> str:
return root


def _ParseRepoMapping(repo_mapping_path: Optional[str]) -> Dict[Tuple[str, str], str]:
"""Parses the repository mapping manifest."""
# If the repository mapping file can't be found, that is not an error: We
# might be running without Bzlmod enabled or there may not be any runfiles.
# In this case, just apply an empty repo mapping.
if not repo_mapping_path:
return {}
try:
with open(repo_mapping_path, "r", encoding="utf-8", newline="\n") as f:
content = f.read()
except FileNotFoundError:
return {}

repo_mapping = {}
for line in content.split("\n"):
if not line:
# Empty line following the last line break
break
current_canonical, target_local, target_canonical = line.split(",")
repo_mapping[(current_canonical, target_local)] = target_canonical

return repo_mapping


def CreateManifestBased(manifest_path: str) -> Runfiles:
return Runfiles.CreateManifestBased(manifest_path)

Expand Down
160 changes: 160 additions & 0 deletions tests/runfiles/runfiles_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from typing import Any, List, Optional

from python.runfiles import runfiles
from python.runfiles.runfiles import _RepositoryMapping


class RunfilesTest(unittest.TestCase):
Expand Down Expand Up @@ -525,6 +526,165 @@ def testDirectoryBasedRlocationWithRepoMappingFromOtherRepo(self) -> None:
r.Rlocation("config.json", "protobuf~3.19.2"), dir + "/config.json"
)

def testDirectoryBasedRlocationWithCompactRepoMappingFromMain(self) -> None:
"""Test repository mapping with prefix-based entries (compact format)."""
with _MockFile(
name="_repo_mapping",
contents=[
# Exact mappings (no asterisk)
"_,config.json,config.json~1.2.3",
",my_module,_main",
",my_workspace,_main",
# Prefixed mappings (with asterisk) - these apply to any repo starting with the prefix
"deps+*,external_dep,external_dep~1.0.0",
"test_deps+*,test_lib,test_lib~2.1.0",
],
) as rm:
dir = os.path.dirname(rm.Path())
r = runfiles.CreateDirectoryBased(dir)

# Test exact mappings still work
self.assertEqual(
r.Rlocation("my_module/bar/runfile", ""), dir + "/_main/bar/runfile"
)
self.assertEqual(
r.Rlocation("my_workspace/bar/runfile", ""), dir + "/_main/bar/runfile"
)

# Test prefixed mappings - should match any repo starting with "deps+"
self.assertEqual(
r.Rlocation("external_dep/foo/file", "deps+dep1"),
dir + "/external_dep~1.0.0/foo/file",
)
self.assertEqual(
r.Rlocation("external_dep/bar/file", "deps+dep2"),
dir + "/external_dep~1.0.0/bar/file",
)
self.assertEqual(
r.Rlocation("external_dep/nested/path/file", "deps+some_long_dep_name"),
dir + "/external_dep~1.0.0/nested/path/file",
)

# Test that prefixed mappings work for test_deps+ prefix too
self.assertEqual(
r.Rlocation("test_lib/test/file", "test_deps+junit"),
dir + "/test_lib~2.1.0/test/file",
)

# Test that non-matching prefixes don't match
self.assertEqual(
r.Rlocation("external_dep/foo/file", "other_prefix"),
dir + "/external_dep/foo/file", # No mapping applied, use as-is
)

def testDirectoryBasedRlocationWithCompactRepoMappingPrecedence(self) -> None:
"""Test that exact mappings take precedence over prefixed mappings."""
with _MockFile(
name="_repo_mapping",
contents=[
# Exact mapping for a specific source repo
"deps+specific_repo,external_dep,external_dep~exact",
# Prefixed mapping for repos starting with "deps+"
"deps+*,external_dep,external_dep~prefix",
# Another prefixed mapping with different prefix
"other+*,external_dep,external_dep~other",
],
) as rm:
dir = os.path.dirname(rm.Path())
r = runfiles.CreateDirectoryBased(dir)

# Exact mapping should take precedence over prefix
self.assertEqual(
r.Rlocation("external_dep/foo/file", "deps+specific_repo"),
dir + "/external_dep~exact/foo/file",
)

# Other repos with deps+ prefix should use the prefixed mapping
self.assertEqual(
r.Rlocation("external_dep/foo/file", "deps+other_repo"),
dir + "/external_dep~prefix/foo/file",
)

# Different prefix should use its own mapping
self.assertEqual(
r.Rlocation("external_dep/foo/file", "other+some_repo"),
dir + "/external_dep~other/foo/file",
)

def testDirectoryBasedRlocationWithCompactRepoMappingOrderMatters(self) -> None:
"""Test that order matters for prefixed mappings (first match wins)."""
with _MockFile(
name="_repo_mapping",
contents=[
# More specific prefix comes first
"deps+specific+*,lib,lib~specific",
# More general prefix comes second
"deps+*,lib,lib~general",
],
) as rm:
dir = os.path.dirname(rm.Path())
r = runfiles.CreateDirectoryBased(dir)

# Should match the more specific prefix first
self.assertEqual(
r.Rlocation("lib/foo/file", "deps+specific+repo"),
dir + "/lib~specific/foo/file",
)

# Should match the general prefix for non-specific repos
self.assertEqual(
r.Rlocation("lib/foo/file", "deps+other_repo"),
dir + "/lib~general/foo/file",
)

def testRepositoryMappingLookup(self) -> None:
"""Test _RepositoryMapping.lookup() method for both exact and prefix-based mappings."""
exact_mappings = {
("", "my_workspace"): "_main",
("", "config_lib"): "config_lib~1.0.0",
("deps+specific_repo", "external_dep"): "external_dep~exact",
}
prefixed_mappings = {
("deps+", "external_dep"): "external_dep~prefix",
("test_deps+", "test_lib"): "test_lib~2.1.0",
}

repo_mapping = _RepositoryMapping(exact_mappings, prefixed_mappings)

# Test exact lookups
self.assertEqual(repo_mapping.lookup("", "my_workspace"), "_main")
self.assertEqual(repo_mapping.lookup("", "config_lib"), "config_lib~1.0.0")
self.assertEqual(
repo_mapping.lookup("deps+specific_repo", "external_dep"),
"external_dep~exact",
)

# Test prefix-based lookups
self.assertEqual(
repo_mapping.lookup("deps+some_repo", "external_dep"), "external_dep~prefix"
)
self.assertEqual(
repo_mapping.lookup("test_deps+another_repo", "test_lib"), "test_lib~2.1.0"
)

# Test that exact takes precedence over prefix
self.assertEqual(
repo_mapping.lookup("deps+specific_repo", "external_dep"),
"external_dep~exact",
)

# Test non-existent mapping
self.assertIsNone(repo_mapping.lookup("nonexistent", "repo"))
self.assertIsNone(repo_mapping.lookup("unknown+repo", "missing"))

# Test empty mapping
empty_mapping = _RepositoryMapping({}, {})
self.assertIsNone(empty_mapping.lookup("any", "repo"))

# Test is_empty() method
self.assertFalse(repo_mapping.is_empty()) # Should have mappings
self.assertTrue(empty_mapping.is_empty()) # Should be empty

def testCurrentRepository(self) -> None:
# Under bzlmod, the current repository name is the empty string instead
# of the name in the workspace file.
Expand Down