diff --git a/news/7296.bugfix b/news/7296.bugfix new file mode 100644 index 00000000000..5d617bf75db --- /dev/null +++ b/news/7296.bugfix @@ -0,0 +1,5 @@ +Make sure ``pip wheel`` never outputs pure python wheels with a +python implementation tag. Better fix/workaround for +`#3025 `_ by +using a per-implementation wheel cache instead of caching pure python +wheels with an implementation tag in their name. diff --git a/news/7296.removal b/news/7296.removal new file mode 100644 index 00000000000..ef0e5f7495f --- /dev/null +++ b/news/7296.removal @@ -0,0 +1,3 @@ +The pip>=20 wheel cache is not retro-compatible with previous versions. Until +pip 21.0, pip will continue to take advantage of existing legacy cache +entries. diff --git a/src/pip/_internal/cache.py b/src/pip/_internal/cache.py index 7a431f9a2ed..8c2041527c1 100644 --- a/src/pip/_internal/cache.py +++ b/src/pip/_internal/cache.py @@ -4,8 +4,8 @@ # The following comment should be removed at some point in the future. # mypy: strict-optional=False -import errno import hashlib +import json import logging import os @@ -14,19 +14,27 @@ from pip._internal.exceptions import InvalidWheelFilename from pip._internal.models.link import Link from pip._internal.models.wheel import Wheel +from pip._internal.pep425tags import interpreter_name, interpreter_version from pip._internal.utils.compat import expanduser from pip._internal.utils.temp_dir import TempDirectory from pip._internal.utils.typing import MYPY_CHECK_RUNNING from pip._internal.utils.urls import path_to_url if MYPY_CHECK_RUNNING: - from typing import Optional, Set, List, Any + from typing import Optional, Set, List, Any, Dict from pip._internal.models.format_control import FormatControl from pip._internal.pep425tags import Pep425Tag logger = logging.getLogger(__name__) +def _hash_dict(d): + # type: (Dict[str, str]) -> str + """Return a stable sha224 of a dictionary.""" + s = json.dumps(d, sort_keys=True, separators=(",", ":"), ensure_ascii=True) + return hashlib.sha224(s.encode("ascii")).hexdigest() + + class Cache(object): """An abstract class - provides cache directories for data from links @@ -48,9 +56,11 @@ def __init__(self, cache_dir, format_control, allowed_formats): _valid_formats = {"source", "binary"} assert self.allowed_formats.union(_valid_formats) == _valid_formats - def _get_cache_path_parts(self, link): + def _get_cache_path_parts_legacy(self, link): # type: (Link) -> List[str] """Get parts of part that must be os.path.joined with cache_dir + + Legacy cache key (pip < 20) for compatibility with older caches. """ # We want to generate an url to use as our cache key, we don't want to @@ -59,10 +69,6 @@ def _get_cache_path_parts(self, link): key_parts = [link.url_without_fragment] if link.hash_name is not None and link.hash is not None: key_parts.append("=".join([link.hash_name, link.hash])) - if link.subdirectory_fragment: - key_parts.append( - "=".join(["subdirectory", link.subdirectory_fragment]) - ) key_url = "#".join(key_parts) # Encode our key url with sha224, we'll use this because it has similar @@ -78,6 +84,41 @@ def _get_cache_path_parts(self, link): return parts + def _get_cache_path_parts(self, link): + # type: (Link) -> List[str] + """Get parts of part that must be os.path.joined with cache_dir + """ + + # We want to generate an url to use as our cache key, we don't want to + # just re-use the URL because it might have other items in the fragment + # and we don't care about those. + key_parts = {"url": link.url_without_fragment} + if link.hash_name is not None and link.hash is not None: + key_parts[link.hash_name] = link.hash + if link.subdirectory_fragment: + key_parts["subdirectory"] = link.subdirectory_fragment + + # Include interpreter name, major and minor version in cache key + # to cope with ill-behaved sdists that build a different wheel + # depending on the python version their setup.py is being run on, + # and don't encode the difference in compatibility tags. + # https://github.com/pypa/pip/issues/7296 + key_parts["interpreter_name"] = interpreter_name() + key_parts["interpreter_version"] = interpreter_version() + + # Encode our key url with sha224, we'll use this because it has similar + # security properties to sha256, but with a shorter total output (and + # thus less secure). However the differences don't make a lot of + # difference for our use case here. + hashed = _hash_dict(key_parts) + + # We want to nest the directories some to prevent having a ton of top + # level directories where we might run out of sub directories on some + # FS. + parts = [hashed[:2], hashed[2:4], hashed[4:6], hashed[6:]] + + return parts + def _get_candidates(self, link, canonical_package_name): # type: (Link, Optional[str]) -> List[Any] can_not_cache = ( @@ -94,13 +135,19 @@ def _get_candidates(self, link, canonical_package_name): if not self.allowed_formats.intersection(formats): return [] - root = self.get_path_for_link(link) - try: - return os.listdir(root) - except OSError as err: - if err.errno in {errno.ENOENT, errno.ENOTDIR}: - return [] - raise + candidates = [] + path = self.get_path_for_link(link) + if os.path.isdir(path): + candidates.extend(os.listdir(path)) + # TODO remove legacy path lookup in pip>=21 + legacy_path = self.get_path_for_link_legacy(link) + if os.path.isdir(legacy_path): + candidates.extend(os.listdir(legacy_path)) + return candidates + + def get_path_for_link_legacy(self, link): + # type: (Link) -> str + raise NotImplementedError() def get_path_for_link(self, link): # type: (Link) -> str @@ -142,6 +189,11 @@ def __init__(self, cache_dir, format_control): cache_dir, format_control, {"binary"} ) + def get_path_for_link_legacy(self, link): + # type: (Link) -> str + parts = self._get_cache_path_parts_legacy(link) + return os.path.join(self.cache_dir, "wheels", *parts) + def get_path_for_link(self, link): # type: (Link) -> str """Return a directory to store cached wheels for link @@ -234,6 +286,10 @@ def __init__(self, cache_dir, format_control): self._wheel_cache = SimpleWheelCache(cache_dir, format_control) self._ephem_cache = EphemWheelCache(format_control) + def get_path_for_link_legacy(self, link): + # type: (Link) -> str + return self._wheel_cache.get_path_for_link_legacy(link) + def get_path_for_link(self, link): # type: (Link) -> str return self._wheel_cache.get_path_for_link(link) diff --git a/src/pip/_internal/pep425tags.py b/src/pip/_internal/pep425tags.py index 44b2d7ee0f8..3a291ebaeb1 100644 --- a/src/pip/_internal/pep425tags.py +++ b/src/pip/_internal/pep425tags.py @@ -54,6 +54,9 @@ def get_abbr_impl(): return pyimpl +interpreter_name = get_abbr_impl + + def version_info_to_nodot(version_info): # type: (Tuple[int, ...]) -> str # Only use up to the first two numbers. @@ -69,6 +72,9 @@ def get_impl_ver(): return impl_ver +interpreter_version = get_impl_ver + + def get_impl_version_info(): # type: () -> Tuple[int, ...] """Return sys.version_info-like tuple for use in decrementing the minor @@ -83,14 +89,6 @@ def get_impl_version_info(): return sys.version_info[0], sys.version_info[1] -def get_impl_tag(): - # type: () -> str - """ - Returns the Tag for this specific implementation. - """ - return "{}{}".format(get_abbr_impl(), get_impl_ver()) - - def get_flag(var, fallback, expected=True, warn=True): # type: (str, Callable[..., bool], Union[bool, int], bool) -> bool """Use a fallback method for determining SOABI flags if the needed config @@ -459,6 +457,3 @@ def get_supported( supported.append(('py%s' % (version,), 'none', 'any')) return supported - - -implementation_tag = get_impl_tag() diff --git a/src/pip/_internal/utils/setuptools_build.py b/src/pip/_internal/utils/setuptools_build.py index 497b0eb4939..4147a650dca 100644 --- a/src/pip/_internal/utils/setuptools_build.py +++ b/src/pip/_internal/utils/setuptools_build.py @@ -52,7 +52,6 @@ def make_setuptools_bdist_wheel_args( global_options, # type: Sequence[str] build_options, # type: Sequence[str] destination_dir, # type: str - python_tag, # type: Optional[str] ): # type: (...) -> List[str] # NOTE: Eventually, we'd want to also -S to the flags here, when we're @@ -66,8 +65,6 @@ def make_setuptools_bdist_wheel_args( ) args += ["bdist_wheel", "-d", destination_dir] args += build_options - if python_tag is not None: - args += ["--python-tag", python_tag] return args diff --git a/src/pip/_internal/wheel_builder.py b/src/pip/_internal/wheel_builder.py index 994b2f1a3dc..e67b18781dd 100644 --- a/src/pip/_internal/wheel_builder.py +++ b/src/pip/_internal/wheel_builder.py @@ -9,7 +9,6 @@ import re import shutil -from pip._internal import pep425tags from pip._internal.models.link import Link from pip._internal.utils.logging import indent_log from pip._internal.utils.marker_files import has_delete_marker_file @@ -47,14 +46,6 @@ logger = logging.getLogger(__name__) -def replace_python_tag(wheelname, new_tag): - # type: (str, str) -> str - """Replace the Python tag in a wheel file name with a new value.""" - parts = wheelname.split('-') - parts[-3] = new_tag - return '-'.join(parts) - - def _contains_egg_info( s, _egg_info_re=re.compile(r'([a-z0-9_.]+)-([a-z0-9_.!+-]+)', re.I)): # type: (str, Pattern[str]) -> bool @@ -197,7 +188,6 @@ def _build_wheel_legacy( global_options, # type: List[str] build_options, # type: List[str] tempd, # type: str - python_tag=None, # type: Optional[str] ): # type: (...) -> Optional[str] """Build one unpacked package using the "legacy" build process. @@ -209,7 +199,6 @@ def _build_wheel_legacy( global_options=global_options, build_options=build_options, destination_dir=tempd, - python_tag=python_tag, ) spin_message = 'Building wheel for %s (setup.py)' % (name,) @@ -277,7 +266,6 @@ def _build_one( self, req, # type: InstallRequirement output_dir, # type: str - python_tag=None, # type: Optional[str] ): # type: (...) -> Optional[str] """Build one wheel. @@ -286,21 +274,17 @@ def _build_one( """ # Install build deps into temporary directory (PEP 518) with req.build_env: - return self._build_one_inside_env(req, output_dir, - python_tag=python_tag) + return self._build_one_inside_env(req, output_dir) def _build_one_inside_env( self, req, # type: InstallRequirement output_dir, # type: str - python_tag=None, # type: Optional[str] ): # type: (...) -> Optional[str] with TempDirectory(kind="wheel") as temp_dir: if req.use_pep517: - wheel_path = self._build_one_pep517( - req, temp_dir.path, python_tag=python_tag - ) + wheel_path = self._build_one_pep517(req, temp_dir.path) else: wheel_path = _build_wheel_legacy( name=req.name, @@ -309,7 +293,6 @@ def _build_one_inside_env( global_options=self.global_options, build_options=self.build_options, tempd=temp_dir.path, - python_tag=python_tag, ) if wheel_path is not None: @@ -334,7 +317,6 @@ def _build_one_pep517( self, req, # type: InstallRequirement tempd, # type: str - python_tag=None, # type: Optional[str] ): # type: (...) -> Optional[str] """Build one InstallRequirement using the PEP 517 build process. @@ -359,17 +341,6 @@ def _build_one_pep517( tempd, metadata_directory=req.metadata_directory, ) - if python_tag: - # General PEP 517 backends don't necessarily support - # a "--python-tag" option, so we rename the wheel - # file directly. - new_name = replace_python_tag(wheel_name, python_tag) - os.rename( - os.path.join(tempd, wheel_name), - os.path.join(tempd, new_name) - ) - # Reassign to simplify the return at the end of function - wheel_name = new_name except Exception: logger.error('Failed building wheel for %s', req.name) return None @@ -447,10 +418,6 @@ def build( ', '.join([req.name for (req, _) in buildset]), ) - python_tag = None - if should_unpack: - python_tag = pep425tags.implementation_tag - with indent_log(): build_success, build_failure = [], [] for req, output_dir in buildset: @@ -464,10 +431,7 @@ def build( build_failure.append(req) continue - wheel_file = self._build_one( - req, output_dir, - python_tag=python_tag, - ) + wheel_file = self._build_one(req, output_dir) if wheel_file: if should_unpack: # XXX: This is mildly duplicative with prepare_files, diff --git a/tests/functional/test_install.py b/tests/functional/test_install.py index ffc3736ba06..6b62d863545 100644 --- a/tests/functional/test_install.py +++ b/tests/functional/test_install.py @@ -12,7 +12,6 @@ from pip._vendor.six import PY2 from pip import __version__ as pip_current_version -from pip._internal import pep425tags from pip._internal.cli.status_codes import ERROR, SUCCESS from pip._internal.models.index import PyPI, TestPyPI from pip._internal.utils.misc import rmtree @@ -1297,9 +1296,9 @@ def test_install_builds_wheels(script, data, with_wheel): assert "Running setup.py install for requir" not in str(res), str(res) # wheelbroken has to run install assert "Running setup.py install for wheelb" in str(res), str(res) - # We want to make sure we used the correct implementation tag + # We want to make sure pure python wheels do not have an implementation tag assert wheels == [ - "Upper-2.0-{}-none-any.whl".format(pep425tags.implementation_tag), + "Upper-2.0-py{}-none-any.whl".format(sys.version_info[0]), ] diff --git a/tests/unit/test_cache.py b/tests/unit/test_cache.py index 79e4f624d19..8926f3af84e 100644 --- a/tests/unit/test_cache.py +++ b/tests/unit/test_cache.py @@ -1,6 +1,6 @@ import os -from pip._internal.cache import WheelCache +from pip._internal.cache import WheelCache, _hash_dict from pip._internal.models.format_control import FormatControl from pip._internal.models.link import Link from pip._internal.utils.compat import expanduser @@ -42,3 +42,32 @@ def test_wheel_name_filter(tmpdir): assert wc.get(link, "package", [("py3", "none", "any")]) is not link # package2 does not match wheel name assert wc.get(link, "package2", [("py3", "none", "any")]) is link + + +def test_cache_hash(): + h = _hash_dict({"url": "https://g.c/o/r"}) + assert h == "72aa79d3315c181d2cc23239d7109a782de663b6f89982624d8c1e86" + h = _hash_dict({"url": "https://g.c/o/r", "subdirectory": "sd"}) + assert h == "8b13391b6791bf7f3edeabb41ea4698d21bcbdbba7f9c7dc9339750d" + h = _hash_dict({"subdirectory": u"/\xe9e"}) + assert h == "f83b32dfa27a426dec08c21bf006065dd003d0aac78e7fc493d9014d" + + +def test_get_path_for_link_legacy(tmpdir): + """ + Test that an existing cache entry that was created with the legacy hashing + mechanism is used. + """ + wc = WheelCache(tmpdir, FormatControl()) + link = Link("https://g.c/o/r") + path = wc.get_path_for_link(link) + legacy_path = wc.get_path_for_link_legacy(link) + assert path != legacy_path + ensure_dir(path) + with open(os.path.join(path, "test-pyz-none-any.whl"), "w"): + pass + ensure_dir(legacy_path) + with open(os.path.join(legacy_path, "test-pyx-none-any.whl"), "w"): + pass + expected_candidates = {"test-pyx-none-any.whl", "test-pyz-none-any.whl"} + assert set(wc._get_candidates(link, "test")) == expected_candidates diff --git a/tests/unit/test_wheel_builder.py b/tests/unit/test_wheel_builder.py index 8db535dadc2..3e0fc0490ef 100644 --- a/tests/unit/test_wheel_builder.py +++ b/tests/unit/test_wheel_builder.py @@ -182,22 +182,6 @@ def test_format_command_result__empty_output(caplog, log_level): ] -def test_python_tag(): - wheelnames = [ - 'simplewheel-1.0-py2.py3-none-any.whl', - 'simplewheel-1.0-py27-none-any.whl', - 'simplewheel-2.0-1-py2.py3-none-any.whl', - ] - newnames = [ - 'simplewheel-1.0-py37-none-any.whl', - 'simplewheel-1.0-py37-none-any.whl', - 'simplewheel-2.0-1-py37-none-any.whl', - ] - for name, expected in zip(wheelnames, newnames): - result = wheel_builder.replace_python_tag(name, 'py37') - assert result == expected - - class TestWheelBuilder(object): def test_skip_building_wheels(self, caplog):