Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
63ff6c9
fix .dtype property
TomNicholas Jul 15, 2025
e765e13
use zarr data types in create_array_v3_metadata
TomNicholas Jul 15, 2025
c60795a
change expected repr
TomNicholas Jul 15, 2025
b2d9549
fix test for checking dtypes are the same
TomNicholas Jul 15, 2025
a05d9e8
fix icechunk tests
TomNicholas Jul 15, 2025
f172930
fix kerchunk tests
TomNicholas Jul 15, 2025
02dfa56
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jul 15, 2025
7dfeabe
Merge branch 'develop' into zarr-data-types-refactor-compat2
TomNicholas Jul 15, 2025
b42022f
fix combine test
TomNicholas Jul 15, 2025
7bb6e6f
fix conversion of v3 to v2 metadata
TomNicholas Jul 16, 2025
744fd49
write function to normalize kerchunk references into true json
TomNicholas Jul 16, 2025
35f94b5
use the new function in our tests
TomNicholas Jul 16, 2025
9663d05
remove outdated imports
TomNicholas Jul 16, 2025
e1e54b3
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jul 16, 2025
915e4c3
Merge branch 'develop' into kerchunk_to_true_json
TomNicholas Jul 16, 2025
ae3f871
Merge branch 'kerchunk_to_true_json' into zarr-data-types-refactor-co…
TomNicholas Jul 16, 2025
8548086
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jul 16, 2025
01baf83
missed a coersion
TomNicholas Jul 16, 2025
ad0be5c
Merge branch 'kerchunk_to_true_json' of https://github.com/TomNichola…
TomNicholas Jul 16, 2025
de9c8fd
Merge branch 'kerchunk_to_true_json' into zarr-data-types-refactor-co…
TomNicholas Jul 16, 2025
35cfddf
fix dmrpp test
TomNicholas Jul 16, 2025
813ee16
Merge branch 'zarr-data-types-refactor-compat2' of https://github.com…
TomNicholas Jul 16, 2025
f0739c1
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jul 16, 2025
77493a6
Merge branch 'develop' into zarr-data-types-refactor-compat2
TomNicholas Jul 16, 2025
31941fb
require latest zarr
TomNicholas Jul 16, 2025
64a513b
Merge branch 'zarr-data-types-refactor-compat2' of https://github.com…
TomNicholas Jul 16, 2025
a17884d
update minimum version in pixi
TomNicholas Jul 16, 2025
93f7758
fix array return type and add test
TomNicholas Jul 17, 2025
407f208
change expected group repr dtype str
TomNicholas Jul 17, 2025
09d801d
fix metadata comparison by just using updated version of ArrayV3Metad…
TomNicholas Jul 17, 2025
9117116
don't need to add .nbytes to ManifestArray
TomNicholas Jul 17, 2025
3ebb731
remove rogue print statements
TomNicholas Jul 17, 2025
bf104cb
try fixing zarr data type error in icechunk writer in CI
TomNicholas Jul 17, 2025
b1961b6
remove xfail now that datetime dtypes are supported in zarr
TomNicholas Jul 17, 2025
b0bddf2
add xfail for one test case
TomNicholas Jul 17, 2025
fc1bd34
add note about now supporting big-endian
TomNicholas Jul 17, 2025
925ffec
remove unnecessary conversion of dtype
TomNicholas Jul 17, 2025
0d7ad60
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jul 17, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,11 +358,12 @@ def manifest_array(array_v3_metadata):
def _manifest_array(
shape: tuple = (5, 2),
chunks: tuple = (5, 2),
data_type: np.dtype = np.dtype("int32"),
codecs: list[dict] | None = [ARRAYBYTES_CODEC, ZLIB_CODEC],
dimension_names: Iterable[str] | None = None,
):
metadata = array_v3_metadata(
shape=shape, chunks=chunks, codecs=codecs, dimension_names=dimension_names
shape=shape, chunks=chunks, data_type=data_type, codecs=codecs, dimension_names=dimension_names
)
entries = _generate_chunk_entries(shape, chunks, _entry_from_chunk_key)
chunkmanifest = ChunkManifest(entries=entries)
Expand Down
8 changes: 5 additions & 3 deletions virtualizarr/manifests/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,11 +84,13 @@ def chunks(self) -> tuple[int, ...]:
Individual chunk size by number of elements.
"""
return self._metadata.chunks

@property
def dtype(self) -> np.dtype:
dtype_str = self.metadata.data_type
return dtype_str.to_numpy()
"""The native dtype of the data (typically a numpy dtype)"""
zdtype = self.metadata.data_type
dtype = zdtype.to_native_dtype()
return dtype.str

@property
def shape(self) -> tuple[int, ...]:
Expand Down
13 changes: 9 additions & 4 deletions virtualizarr/manifests/array_api.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import TYPE_CHECKING, Any, Callable, cast
from typing import TYPE_CHECKING, Any, Callable, cast, Union

import numpy as np

Expand Down Expand Up @@ -34,12 +34,17 @@ def decorator(func):


@implements(np.result_type)
def result_type(*arrays_and_dtypes) -> np.dtype:
def result_type(*arrays_and_dtypes: Union["ManifestArray", np.dtype]) -> np.dtype:
"""Called by xarray to ensure all arguments to concat have the same dtype."""
first_dtype, *other_dtypes = (np.dtype(obj) for obj in arrays_and_dtypes)
from virtualizarr.manifests.array import ManifestArray

dtypes = (obj.dtype if isinstance(obj, ManifestArray) else np.dtype(obj) for obj in arrays_and_dtypes)
first_dtype, *other_dtypes = dtypes
unique_dtypes = set(dtypes)
for other_dtype in other_dtypes:
if other_dtype != first_dtype:
raise ValueError("dtypes not all consistent")
raise ValueError(f"Cannot combine arrays with inconsistent dtypes, but got {len(unique_dtypes)} distinct dtypes: {unique_dtypes}")

return first_dtype


Expand Down
6 changes: 4 additions & 2 deletions virtualizarr/manifests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
parse_dimension_names,
parse_shapelike,
)
from zarr.dtype import parse_data_type

from virtualizarr.codecs import convert_to_codec_pipeline, get_codecs

Expand Down Expand Up @@ -75,15 +76,16 @@ def create_v3_array_metadata(
ArrayV3Metadata
A configured ArrayV3Metadata instance with standard defaults
"""
zdtype = parse_data_type(data_type, zarr_format=3)
return ArrayV3Metadata(
shape=shape,
data_type=data_type.name if hasattr(data_type, "name") else data_type,
data_type=zdtype,
chunk_grid={
"name": "regular",
"configuration": {"chunk_shape": chunk_shape},
},
chunk_key_encoding=chunk_key_encoding,
fill_value=fill_value,
fill_value=zdtype.default_scalar() if fill_value is None else fill_value,
codecs=convert_to_codec_pipeline(
codecs=codecs or [],
dtype=data_type,
Expand Down
19 changes: 18 additions & 1 deletion virtualizarr/tests/test_manifests/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from virtualizarr.manifests import ChunkManifest, ManifestArray


class TestManifestArray:
class TestInit:
def test_manifest_array(self, array_v3_metadata):
chunks_dict = {
"0.0.0": {"path": "s3://bucket/foo.nc", "offset": 100, "length": 100},
Expand Down Expand Up @@ -50,6 +50,23 @@ def test_manifest_array_dict_v3_metadata(self, array_v3_metadata):
assert marr.ndim == 3


class TestResultType:
def test_idempotent(self, manifest_array):
marr1 = manifest_array(shape=(), chunks=(), data_type=np.dtype("int32"))
marr2 = manifest_array(shape=(), chunks=(), data_type=np.dtype("int32"))

assert np.result_type(marr1) == marr1.dtype
assert np.result_type(marr1, marr1.dtype) == marr1.dtype
assert np.result_type(marr1, marr2) == marr1.dtype

def test_raises(self, manifest_array):
marr1 = manifest_array(shape=(), chunks=(), data_type=np.dtype("int32"))
marr2 = manifest_array(shape=(), chunks=(), data_type=np.dtype("int64"))

with pytest.raises(ValueError, match="inconsistent"):
np.result_type(marr1, marr2)


class TestEquals:
def test_equals(self, array_v3_metadata):
chunks_dict = {
Expand Down
2 changes: 1 addition & 1 deletion virtualizarr/tests/test_manifests/test_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def test_manifest_repr(self, manifest_array):
expected_repr = textwrap.dedent(
"""
ManifestGroup(
arrays={'foo': ManifestArray<shape=(5, 2), dtype=int32, chunks=(5, 2)>},
arrays={'foo': ManifestArray<shape=(5, 2), dtype=<i4, chunks=(5, 2)>},
groups={},
metadata=GroupMetadata(attributes={}, zarr_format=3, consolidated_metadata=None, node_type='group'),
)
Expand Down
2 changes: 1 addition & 1 deletion virtualizarr/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ def convert_v3_to_v2_metadata(

v2_metadata = ArrayV2Metadata(
shape=v3_metadata.shape,
dtype=v3_metadata.data_type.to_numpy(),
dtype=v3_metadata.data_type,
chunks=v3_metadata.chunks,
fill_value=fill_value or v3_metadata.fill_value,
compressor=compressor_config,
Expand Down
2 changes: 1 addition & 1 deletion virtualizarr/writers/icechunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,7 @@ def write_virtual_variable_to_icechunk(
name=name,
shape=metadata.shape,
chunks=metadata.chunks,
dtype=metadata.data_type.to_numpy(),
dtype=metadata.data_type,
filters=filters,
compressors=compressors,
dimension_names=var.dims,
Expand Down
Loading