Skip to content

Commit fd1d83b

Browse files
committed
exclude vendored spdx data from sdist/whl. build/bring our own
Per feedback, integrate a variant of #799 that builds a minimal JSON dataset to feed vendored license-expression 32K src/packaging/_spdx.json vs 848K src/packaging/_vendor/license_expression/data/scancode-licensedb-index.json
1 parent e3dec5c commit fd1d83b

File tree

7 files changed

+76
-3
lines changed

7 files changed

+76
-3
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ Source = "https://github.com/pypa/packaging"
3737

3838
[tool.flit.sdist]
3939
include = ["LICENSE*", "tests/", "docs/", "CHANGELOG.rst"]
40-
exclude = ["docs/_build", "tests/manylinux/build-hello-world.sh", "tests/musllinux/build.sh", "tests/hello-world.c", "tests/__pycache__", "build/__pycache__"]
40+
exclude = ["docs/_build", "tests/manylinux/build-hello-world.sh", "tests/musllinux/build.sh", "tests/hello-world.c", "tests/__pycache__", "build/__pycache__", "src/packaging/_vendor/license_expression/data/*"]
4141

4242
[tool.pytest.ini_options]
4343
addopts = [

src/packaging/_spdx.json

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

src/packaging/metadata.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import email.message
77
import email.parser
88
import email.policy
9+
import importlib.resources
910
import typing
1011
from typing import (
1112
Any,
@@ -647,7 +648,8 @@ def _process_requires_dist(
647648
return reqs
648649

649650
def _process_license_expression(self, value: str) -> str:
650-
licensing = get_spdx_licensing()
651+
with importlib.resources.path("packaging", "_spdx.json") as spdx_path:
652+
licensing = get_spdx_licensing(license_index_location=spdx_path)
651653
try:
652654
return str(licensing.parse(value, validate=True))
653655
except LicenseExpressionError as exc:

tasks/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
import invoke
66

7-
from . import check
7+
from . import check, licenses
88

99
ns = invoke.Collection(check)
10+
ns.add_collection(licenses)

tasks/licenses.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
import json
2+
import time
3+
4+
import httpx
5+
import invoke
6+
7+
from .paths import SPDX_LICENSES
8+
9+
LATEST_API = "https://api.github.com/repos/spdx/license-list-data/releases/latest"
10+
LICENSES_URL = (
11+
"https://raw.githubusercontent.com/spdx/license-list-data/v{}/json/licenses.json"
12+
)
13+
EXCEPTIONS_URL = (
14+
"https://raw.githubusercontent.com/spdx/license-list-data/v{}/json/exceptions.json"
15+
)
16+
17+
18+
def download_data(url):
19+
for _ in range(600):
20+
try:
21+
response = httpx.get(url)
22+
response.raise_for_status()
23+
except Exception:
24+
time.sleep(1)
25+
continue
26+
else:
27+
return json.loads(response.content.decode("utf-8"))
28+
29+
message = "Download failed"
30+
raise ConnectionError(message)
31+
32+
33+
@invoke.task
34+
def update(ctx):
35+
print("Updating SPDX licenses...")
36+
37+
latest_version = download_data(LATEST_API)["tag_name"][1:]
38+
print(f"Latest version: {latest_version}")
39+
40+
license_payload = download_data(LICENSES_URL.format(latest_version))["licenses"]
41+
print(f"Licenses: {len(license_payload)}")
42+
43+
exception_payload = download_data(EXCEPTIONS_URL.format(latest_version))[
44+
"exceptions"
45+
]
46+
print(f"Exceptions: {len(exception_payload)}")
47+
48+
licenses = []
49+
for license_data in license_payload:
50+
_l = {
51+
"spdx_license_key": license_data["licenseId"],
52+
}
53+
if license_data["isDeprecatedLicenseId"]:
54+
_l["is_deprecated"] = license_data["isDeprecatedLicenseId"]
55+
licenses.append(_l)
56+
57+
for exception_data in exception_payload:
58+
_l = {
59+
"spdx_license_key": exception_data["licenseExceptionId"],
60+
"is_exception": True,
61+
}
62+
if exception_data["isDeprecatedLicenseId"]:
63+
_l["is_deprecated"] = exception_data["isDeprecatedLicenseId"]
64+
licenses.append(_l)
65+
66+
with open(SPDX_LICENSES, "w", encoding="utf-8") as f:
67+
f.write(json.dumps(licenses))

tasks/paths.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,4 @@
77
PROJECT = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
88

99
CACHE = os.path.join(PROJECT, ".cache")
10+
SPDX_LICENSES = os.path.join(PROJECT, "src", "packaging", "_spdx.json")

tasks/requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
# The requirements required to invoke the tasks
22
invoke
33
progress
4+
httpx

0 commit comments

Comments
 (0)