Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions src/zarr/core/metadata/v3.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import warnings
from typing import TYPE_CHECKING, cast, overload

if TYPE_CHECKING:
Expand Down Expand Up @@ -70,6 +71,27 @@ def parse_dimension_names(data: None | Iterable[str | None]) -> tuple[str | None
raise TypeError(msg)


def parse_storage_transformers(data: object) -> tuple[dict[str, JSON], ...]:
if data is None:
return ()
if isinstance(data, Iterable):
if len(tuple(data)) >= 1:
msg = (
"Got a non-null storage_transformers keyword argument. "
"Storage transformers are not supported by zarr-python at this time. "
"The storage transformer(s) will be retained in array metadata but will not "
"influence storage routines"
)
warnings.warn(msg, UserWarning, stacklevel=1)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

after thinking about this some more, I believe we should raise an error here instead of warning.

We allow:

  • metadata w/o the storage_transformers key
  • metadata with storage_transformers == [] or None

We error for:

  • metadata w/ len(storage_transformers) > 0

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why not error when a user tries to create an array with the metadata? I'm thinking about how someone would develop a storage transformer with zarr-python as a dependency. If the metadata parsing functionality supports storage transformers, then they can use that immediately, and then subclass / implement their own array class that can use the storage transformer. Seems like a better workflow than forcing them to also subclass the metadata class?

return data # type: ignore[return-value]
else:
return ()
else:
raise TypeError(
f"Invalid storage_transformers. Expected an iterable of dicts. Got {type(data)} instead."
)


@dataclass(frozen=True, kw_only=True)
class ArrayV3Metadata(ArrayMetadata):
shape: ChunkCoords
Expand All @@ -82,6 +104,7 @@ class ArrayV3Metadata(ArrayMetadata):
dimension_names: tuple[str, ...] | None = None
zarr_format: Literal[3] = field(default=3, init=False)
node_type: Literal["array"] = field(default="array", init=False)
storage_transformers: tuple[dict[str, JSON], ...]

def __init__(
self,
Expand All @@ -94,6 +117,7 @@ def __init__(
codecs: Iterable[Codec | dict[str, JSON]],
attributes: None | dict[str, JSON],
dimension_names: None | Iterable[str],
storage_transformers: None | Iterable[dict[str, JSON]] = None,
) -> None:
"""
Because the class is a frozen dataclass, we set attributes using object.__setattr__
Expand All @@ -106,6 +130,7 @@ def __init__(
fill_value_parsed = parse_fill_value(fill_value, dtype=data_type_parsed)
attributes_parsed = parse_attributes(attributes)
codecs_parsed_partial = parse_codecs(codecs)
storage_transformers_parsed = parse_storage_transformers(storage_transformers)

array_spec = ArraySpec(
shape=shape_parsed,
Expand All @@ -124,6 +149,7 @@ def __init__(
object.__setattr__(self, "dimension_names", dimension_names_parsed)
object.__setattr__(self, "fill_value", fill_value_parsed)
object.__setattr__(self, "attributes", attributes_parsed)
object.__setattr__(self, "storage_transformers", storage_transformers_parsed)

self._validate_metadata()

Expand Down
32 changes: 32 additions & 0 deletions tests/v3/test_metadata/test_v3.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from typing import Any

from zarr.abc.codec import Codec
from zarr.core.common import JSON


import numpy as np
Expand Down Expand Up @@ -172,6 +173,7 @@ def test_parse_fill_value_invalid_type_sequence(fill_value: Any, dtype_str: str)
@pytest.mark.parametrize("chunk_key_encoding", ["v2", "default"])
@pytest.mark.parametrize("dimension_separator", [".", "/", None])
@pytest.mark.parametrize("dimension_names", ["nones", "strings", "missing"])
@pytest.mark.parametrize("storage_transformers", [None, ()])
def test_metadata_to_dict(
chunk_grid: str,
codecs: list[Codec],
Expand All @@ -180,6 +182,7 @@ def test_metadata_to_dict(
dimension_separator: Literal[".", "/"] | None,
dimension_names: Literal["nones", "strings", "missing"],
attributes: None | dict[str, Any],
storage_transformers: None | tuple[dict[str, JSON]],
) -> None:
shape = (1, 2, 3)
data_type = "uint8"
Expand Down Expand Up @@ -210,6 +213,7 @@ def test_metadata_to_dict(
"chunk_key_encoding": cke,
"codecs": tuple(c.to_dict() for c in codecs),
"fill_value": fill_value,
"storage_transformers": storage_transformers,
}

if attributes is not None:
Expand All @@ -220,9 +224,16 @@ def test_metadata_to_dict(
metadata = ArrayV3Metadata.from_dict(metadata_dict)
observed = metadata.to_dict()
expected = metadata_dict.copy()

# if unset or None or (), storage_transformers gets normalized to ()
assert observed["storage_transformers"] == ()
observed.pop("storage_transformers")
expected.pop("storage_transformers")

if attributes is None:
assert observed["attributes"] == {}
observed.pop("attributes")

if dimension_separator is None:
if chunk_key_encoding == "default":
expected_cke_dict = DefaultChunkKeyEncoding(separator="/").to_dict()
Expand Down Expand Up @@ -253,3 +264,24 @@ async def test_datetime_metadata(fill_value: int, precision: str) -> None:

result = json.loads(d["zarr.json"].to_bytes())
assert result["fill_value"] == fill_value


def test_storage_transformers() -> None:
"""
Test that providing an actual storage transformer produces a warning and otherwise passes through
"""
metadata_dict = {
"zarr_format": 3,
"node_type": "array",
"shape": (10,),
"chunk_grid": {"name": "regular", "configuration": {"chunk_shape": (1,)}},
"data_type": "uint8",
"chunk_key_encoding": {"name": "v2", "configuration": {"separator": "/"}},
"codecs": (BytesCodec(),),
"fill_value": 0,
"storage_transformers": ({"test": "should_warn"}),
}
with pytest.warns():
meta = ArrayV3Metadata.from_dict(metadata_dict)

assert meta.to_dict()["storage_transformers"] == metadata_dict["storage_transformers"]