From 63ff6c924337b3e91ee89d3c504e4ac4e0256093 Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Tue, 15 Jul 2025 13:29:46 +0100 Subject: [PATCH 01/30] fix .dtype property --- virtualizarr/manifests/array.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/virtualizarr/manifests/array.py b/virtualizarr/manifests/array.py index 642a4d62..ce5d6bb1 100644 --- a/virtualizarr/manifests/array.py +++ b/virtualizarr/manifests/array.py @@ -84,11 +84,13 @@ def chunks(self) -> tuple[int, ...]: Individual chunk size by number of elements. """ return self._metadata.chunks - + @property def dtype(self) -> np.dtype: - dtype_str = self.metadata.data_type - return dtype_str.to_numpy() + """The native dtype of the data (typically a numpy dtype)""" + zdtype = self.metadata.data_type + dtype = zdtype.to_native_dtype() + return dtype.str @property def shape(self) -> tuple[int, ...]: From e765e1314d2194ece7db0b0e7976de4ad5894791 Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Tue, 15 Jul 2025 15:35:09 +0100 Subject: [PATCH 02/30] use zarr data types in create_array_v3_metadata --- virtualizarr/manifests/utils.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/virtualizarr/manifests/utils.py b/virtualizarr/manifests/utils.py index f1deb129..1252e2bb 100644 --- a/virtualizarr/manifests/utils.py +++ b/virtualizarr/manifests/utils.py @@ -8,6 +8,7 @@ parse_dimension_names, parse_shapelike, ) +from zarr.dtype import parse_data_type from virtualizarr.codecs import convert_to_codec_pipeline, get_codecs @@ -75,15 +76,16 @@ def create_v3_array_metadata( ArrayV3Metadata A configured ArrayV3Metadata instance with standard defaults """ + zdtype = parse_data_type(data_type, zarr_format=3) return ArrayV3Metadata( shape=shape, - data_type=data_type.name if hasattr(data_type, "name") else data_type, + data_type=zdtype, chunk_grid={ "name": "regular", "configuration": {"chunk_shape": chunk_shape}, }, chunk_key_encoding=chunk_key_encoding, - fill_value=fill_value, + fill_value=zdtype.default_scalar() if fill_value is None else fill_value, codecs=convert_to_codec_pipeline( codecs=codecs or [], dtype=data_type, From c60795abcb9d4686bd2c3996bdb4191885535dc4 Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Tue, 15 Jul 2025 18:24:02 +0100 Subject: [PATCH 03/30] change expected repr --- virtualizarr/tests/test_manifests/test_group.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virtualizarr/tests/test_manifests/test_group.py b/virtualizarr/tests/test_manifests/test_group.py index 751596f9..ac5a5781 100644 --- a/virtualizarr/tests/test_manifests/test_group.py +++ b/virtualizarr/tests/test_manifests/test_group.py @@ -27,7 +27,7 @@ def test_manifest_repr(self, manifest_array): expected_repr = textwrap.dedent( """ ManifestGroup( - arrays={'foo': ManifestArray}, + arrays={'foo': ManifestArray}, groups={}, metadata=GroupMetadata(attributes={}, zarr_format=3, consolidated_metadata=None, node_type='group'), ) From b2d95496f0df5189430939189fb239504a787198 Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Tue, 15 Jul 2025 19:37:33 +0100 Subject: [PATCH 04/30] fix test for checking dtypes are the same --- conftest.py | 3 ++- virtualizarr/manifests/array_api.py | 13 +++++++++---- .../tests/test_manifests/test_array.py | 19 ++++++++++++++++++- 3 files changed, 29 insertions(+), 6 deletions(-) diff --git a/conftest.py b/conftest.py index 3749a7c5..a87e3471 100644 --- a/conftest.py +++ b/conftest.py @@ -358,11 +358,12 @@ def manifest_array(array_v3_metadata): def _manifest_array( shape: tuple = (5, 2), chunks: tuple = (5, 2), + data_type: np.dtype = np.dtype("int32"), codecs: list[dict] | None = [ARRAYBYTES_CODEC, ZLIB_CODEC], dimension_names: Iterable[str] | None = None, ): metadata = array_v3_metadata( - shape=shape, chunks=chunks, codecs=codecs, dimension_names=dimension_names + shape=shape, chunks=chunks, data_type=data_type, codecs=codecs, dimension_names=dimension_names ) entries = _generate_chunk_entries(shape, chunks, _entry_from_chunk_key) chunkmanifest = ChunkManifest(entries=entries) diff --git a/virtualizarr/manifests/array_api.py b/virtualizarr/manifests/array_api.py index 2ad6fe78..241074d8 100644 --- a/virtualizarr/manifests/array_api.py +++ b/virtualizarr/manifests/array_api.py @@ -1,4 +1,4 @@ -from typing import TYPE_CHECKING, Any, Callable, cast +from typing import TYPE_CHECKING, Any, Callable, cast, Union import numpy as np @@ -34,12 +34,17 @@ def decorator(func): @implements(np.result_type) -def result_type(*arrays_and_dtypes) -> np.dtype: +def result_type(*arrays_and_dtypes: Union["ManifestArray", np.dtype]) -> np.dtype: """Called by xarray to ensure all arguments to concat have the same dtype.""" - first_dtype, *other_dtypes = (np.dtype(obj) for obj in arrays_and_dtypes) + from virtualizarr.manifests.array import ManifestArray + + dtypes = (obj.dtype if isinstance(obj, ManifestArray) else np.dtype(obj) for obj in arrays_and_dtypes) + first_dtype, *other_dtypes = dtypes + unique_dtypes = set(dtypes) for other_dtype in other_dtypes: if other_dtype != first_dtype: - raise ValueError("dtypes not all consistent") + raise ValueError(f"Cannot combine arrays with inconsistent dtypes, but got {len(unique_dtypes)} distinct dtypes: {unique_dtypes}") + return first_dtype diff --git a/virtualizarr/tests/test_manifests/test_array.py b/virtualizarr/tests/test_manifests/test_array.py index e6e7cbbd..1cd2f391 100644 --- a/virtualizarr/tests/test_manifests/test_array.py +++ b/virtualizarr/tests/test_manifests/test_array.py @@ -9,7 +9,7 @@ from virtualizarr.manifests import ChunkManifest, ManifestArray -class TestManifestArray: +class TestInit: def test_manifest_array(self, array_v3_metadata): chunks_dict = { "0.0.0": {"path": "s3://bucket/foo.nc", "offset": 100, "length": 100}, @@ -50,6 +50,23 @@ def test_manifest_array_dict_v3_metadata(self, array_v3_metadata): assert marr.ndim == 3 +class TestResultType: + def test_idempotent(self, manifest_array): + marr1 = manifest_array(shape=(), chunks=(), data_type=np.dtype("int32")) + marr2 = manifest_array(shape=(), chunks=(), data_type=np.dtype("int32")) + + assert np.result_type(marr1) == marr1.dtype + assert np.result_type(marr1, marr1.dtype) == marr1.dtype + assert np.result_type(marr1, marr2) == marr1.dtype + + def test_raises(self, manifest_array): + marr1 = manifest_array(shape=(), chunks=(), data_type=np.dtype("int32")) + marr2 = manifest_array(shape=(), chunks=(), data_type=np.dtype("int64")) + + with pytest.raises(ValueError, match="inconsistent"): + np.result_type(marr1, marr2) + + class TestEquals: def test_equals(self, array_v3_metadata): chunks_dict = { From a05d9e86491e9d5e83f93aa85c27e5c45401cfab Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Tue, 15 Jul 2025 20:01:11 +0100 Subject: [PATCH 05/30] fix icechunk tests --- virtualizarr/writers/icechunk.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virtualizarr/writers/icechunk.py b/virtualizarr/writers/icechunk.py index 85f85162..fae8fee6 100644 --- a/virtualizarr/writers/icechunk.py +++ b/virtualizarr/writers/icechunk.py @@ -344,7 +344,7 @@ def write_virtual_variable_to_icechunk( name=name, shape=metadata.shape, chunks=metadata.chunks, - dtype=metadata.data_type.to_numpy(), + dtype=metadata.data_type, filters=filters, compressors=compressors, dimension_names=var.dims, From f172930a3af9b396a2349dbab6adf1743f521f67 Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Tue, 15 Jul 2025 20:05:35 +0100 Subject: [PATCH 06/30] fix kerchunk tests --- virtualizarr/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virtualizarr/utils.py b/virtualizarr/utils.py index d1a174ca..24b5750e 100644 --- a/virtualizarr/utils.py +++ b/virtualizarr/utils.py @@ -150,7 +150,7 @@ def convert_v3_to_v2_metadata( v2_metadata = ArrayV2Metadata( shape=v3_metadata.shape, - dtype=v3_metadata.data_type.to_numpy(), + dtype=v3_metadata.data_type, chunks=v3_metadata.chunks, fill_value=fill_value or v3_metadata.fill_value, compressor=compressor_config, From 02dfa56103b2380e37fca73574a7ca0ccd0d7c39 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 15 Jul 2025 19:10:58 +0000 Subject: [PATCH 07/30] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- conftest.py | 6 +++++- virtualizarr/manifests/array.py | 2 +- virtualizarr/manifests/array_api.py | 11 ++++++++--- virtualizarr/tests/test_manifests/test_array.py | 2 +- 4 files changed, 15 insertions(+), 6 deletions(-) diff --git a/conftest.py b/conftest.py index a87e3471..5a83f641 100644 --- a/conftest.py +++ b/conftest.py @@ -363,7 +363,11 @@ def _manifest_array( dimension_names: Iterable[str] | None = None, ): metadata = array_v3_metadata( - shape=shape, chunks=chunks, data_type=data_type, codecs=codecs, dimension_names=dimension_names + shape=shape, + chunks=chunks, + data_type=data_type, + codecs=codecs, + dimension_names=dimension_names, ) entries = _generate_chunk_entries(shape, chunks, _entry_from_chunk_key) chunkmanifest = ChunkManifest(entries=entries) diff --git a/virtualizarr/manifests/array.py b/virtualizarr/manifests/array.py index ce5d6bb1..32c1f1d9 100644 --- a/virtualizarr/manifests/array.py +++ b/virtualizarr/manifests/array.py @@ -84,7 +84,7 @@ def chunks(self) -> tuple[int, ...]: Individual chunk size by number of elements. """ return self._metadata.chunks - + @property def dtype(self) -> np.dtype: """The native dtype of the data (typically a numpy dtype)""" diff --git a/virtualizarr/manifests/array_api.py b/virtualizarr/manifests/array_api.py index 241074d8..1651676b 100644 --- a/virtualizarr/manifests/array_api.py +++ b/virtualizarr/manifests/array_api.py @@ -1,4 +1,4 @@ -from typing import TYPE_CHECKING, Any, Callable, cast, Union +from typing import TYPE_CHECKING, Any, Callable, Union, cast import numpy as np @@ -38,12 +38,17 @@ def result_type(*arrays_and_dtypes: Union["ManifestArray", np.dtype]) -> np.dtyp """Called by xarray to ensure all arguments to concat have the same dtype.""" from virtualizarr.manifests.array import ManifestArray - dtypes = (obj.dtype if isinstance(obj, ManifestArray) else np.dtype(obj) for obj in arrays_and_dtypes) + dtypes = ( + obj.dtype if isinstance(obj, ManifestArray) else np.dtype(obj) + for obj in arrays_and_dtypes + ) first_dtype, *other_dtypes = dtypes unique_dtypes = set(dtypes) for other_dtype in other_dtypes: if other_dtype != first_dtype: - raise ValueError(f"Cannot combine arrays with inconsistent dtypes, but got {len(unique_dtypes)} distinct dtypes: {unique_dtypes}") + raise ValueError( + f"Cannot combine arrays with inconsistent dtypes, but got {len(unique_dtypes)} distinct dtypes: {unique_dtypes}" + ) return first_dtype diff --git a/virtualizarr/tests/test_manifests/test_array.py b/virtualizarr/tests/test_manifests/test_array.py index 1cd2f391..9f6ac5b1 100644 --- a/virtualizarr/tests/test_manifests/test_array.py +++ b/virtualizarr/tests/test_manifests/test_array.py @@ -58,7 +58,7 @@ def test_idempotent(self, manifest_array): assert np.result_type(marr1) == marr1.dtype assert np.result_type(marr1, marr1.dtype) == marr1.dtype assert np.result_type(marr1, marr2) == marr1.dtype - + def test_raises(self, manifest_array): marr1 = manifest_array(shape=(), chunks=(), data_type=np.dtype("int32")) marr2 = manifest_array(shape=(), chunks=(), data_type=np.dtype("int64")) From b42022f45ffcc53a7007cc160afb749adc172d4f Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Tue, 15 Jul 2025 21:41:19 +0100 Subject: [PATCH 08/30] fix combine test --- virtualizarr/tests/test_manifests/test_array.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/virtualizarr/tests/test_manifests/test_array.py b/virtualizarr/tests/test_manifests/test_array.py index 9f6ac5b1..9179e283 100644 --- a/virtualizarr/tests/test_manifests/test_array.py +++ b/virtualizarr/tests/test_manifests/test_array.py @@ -394,9 +394,7 @@ def test_refuse_combine(array_v3_metadata): with pytest.raises(NotImplementedError, match="different codecs"): func([marr1, marr2], axis=0) - metadata_copy = metadata_common.to_dict().copy() - metadata_copy["data_type"] = np.dtype("int64") - metadata_wrong_dtype = ArrayV3Metadata.from_dict(metadata_copy) + metadata_wrong_dtype = array_v3_metadata(shape=shape, chunks=chunks, data_type=np.dtype("int64")) marr2 = ManifestArray(metadata=metadata_wrong_dtype, chunkmanifest=chunkmanifest2) for func in [np.concatenate, np.stack]: with pytest.raises(ValueError, match="inconsistent dtypes"): From 7bb6e6f6d6260b0882706b467d96dfa2140b132b Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Wed, 16 Jul 2025 11:46:56 +0100 Subject: [PATCH 09/30] fix conversion of v3 to v2 metadata --- virtualizarr/tests/test_writers/test_kerchunk.py | 2 +- virtualizarr/utils.py | 6 +++++- virtualizarr/writers/kerchunk.py | 7 ++++++- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/virtualizarr/tests/test_writers/test_kerchunk.py b/virtualizarr/tests/test_writers/test_kerchunk.py index 1bbeed1e..6396c289 100644 --- a/virtualizarr/tests/test_writers/test_kerchunk.py +++ b/virtualizarr/tests/test_writers/test_kerchunk.py @@ -167,7 +167,7 @@ def testconvert_v3_to_v2_metadata(array_v3_metadata): assert isinstance(v2_metadata, ArrayV2Metadata) assert v2_metadata.shape == shape - assert v2_metadata.dtype == np.dtype("int32") + assert v2_metadata.dtype.to_native_dtype() == np.dtype("int32") assert v2_metadata.chunks == chunks assert v2_metadata.fill_value == 0 compressor_config = v2_metadata.compressor.get_config() diff --git a/virtualizarr/utils.py b/virtualizarr/utils.py index 24b5750e..66cc7293 100644 --- a/virtualizarr/utils.py +++ b/virtualizarr/utils.py @@ -8,6 +8,7 @@ from zarr.abc.codec import ArrayArrayCodec, BytesBytesCodec from zarr.core.metadata import ArrayV2Metadata, ArrayV3Metadata +from zarr.dtype import parse_data_type from virtualizarr.codecs import extract_codecs, get_codec_config @@ -148,9 +149,12 @@ def convert_v3_to_v2_metadata( # Handle filter configurations filter_configs = [get_codec_config(filter_) for filter_ in array_filters] + native_dtype = v3_metadata.data_type.to_native_dtype() + v2_compatible_data_type = parse_data_type(native_dtype, zarr_format=2) + v2_metadata = ArrayV2Metadata( shape=v3_metadata.shape, - dtype=v3_metadata.data_type, + dtype=v2_compatible_data_type, chunks=v3_metadata.chunks, fill_value=fill_value or v3_metadata.fill_value, compressor=compressor_config, diff --git a/virtualizarr/writers/kerchunk.py b/virtualizarr/writers/kerchunk.py index 139a8619..cf49cbc4 100644 --- a/virtualizarr/writers/kerchunk.py +++ b/virtualizarr/writers/kerchunk.py @@ -11,6 +11,7 @@ from xarray.conventions import encode_dataset_coordinates from zarr.core.common import JSON from zarr.core.metadata.v2 import ArrayV2Metadata +from zarr.dtype import parse_data_type from virtualizarr.manifests import ManifestArray from virtualizarr.manifests.manifest import join @@ -55,6 +56,9 @@ def default(self, obj): def to_kerchunk_json(v2_metadata: ArrayV2Metadata) -> str: """Convert V2 metadata to kerchunk JSON format.""" + print(v2_metadata) + print(type(v2_metadata.dtype)) + zarray_dict: dict[str, JSON] = v2_metadata.to_dict() if v2_metadata.filters: zarray_dict["filters"] = [ @@ -161,12 +165,13 @@ def variable_to_kerchunk_arr_refs(var: Variable, var_name: str) -> KerchunkArrRe array_v2_metadata = ArrayV2Metadata( chunks=np_arr.shape, shape=np_arr.shape, - dtype=np_arr.dtype, + dtype=parse_data_type(np_arr.dtype, zarr_format=2), # needed unless zarr-python fixes https://github.com/zarr-developers/zarr-python/issues/3253 order="C", fill_value=None, ) zattrs = {**var.attrs} + print(array_v2_metadata) zarray_dict = to_kerchunk_json(array_v2_metadata) arr_refs[".zarray"] = zarray_dict From 744fd491fb5bf6bd063f5821dc846800306a6d34 Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Wed, 16 Jul 2025 15:48:52 +0100 Subject: [PATCH 10/30] write function to normalize kerchunk references into true json --- virtualizarr/utils.py | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/virtualizarr/utils.py b/virtualizarr/utils.py index d1a174ca..f5205c63 100644 --- a/virtualizarr/utils.py +++ b/virtualizarr/utils.py @@ -3,13 +3,21 @@ import importlib import io import os -from typing import TYPE_CHECKING, Any, Iterable, Optional, Union +import json +from typing import TYPE_CHECKING, Any, Iterable, Optional, Union, Mapping, Sequence from urllib.parse import urlparse +import copy from zarr.abc.codec import ArrayArrayCodec, BytesBytesCodec from zarr.core.metadata import ArrayV2Metadata, ArrayV3Metadata from virtualizarr.codecs import extract_codecs, get_codec_config +from virtualizarr.types.general import JSON +from virtualizarr.types.kerchunk import KerchunkStoreRefs, KerchunkArrRefs + +# taken from zarr.core.common +JSON = str | int | float | Mapping[str, "JSON"] | Sequence["JSON"] | None + if TYPE_CHECKING: import fsspec.core @@ -160,3 +168,20 @@ def convert_v3_to_v2_metadata( dimension_separator=".", # Assuming '.' as default dimension separator ) return v2_metadata + + +def kerchunk_refs_as_json(refs: KerchunkStoreRefs) -> JSON: + """ + Normalizes all Kerchunk references into true JSON all the way down. + + See https://github.com/zarr-developers/VirtualiZarr/issues/679 for context as to why this is needed. + """ + normalized_result: JSON = copy.deepcopy(refs) + + for k, v in refs["refs"].items(): + # check for strings because the value could be for a chunk, in which case it is already a list like ["/test.nc", 6144, 48] + # this is a rather fragile way to discover if we're looking at a chunk key or not, but it should work... + if isinstance(v, str): + normalized_result["refs"][k] = json.loads(v) + + return normalized_result From 35f94b5e049d2a0057e312bd39037e556a9cb4cb Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Wed, 16 Jul 2025 15:49:13 +0100 Subject: [PATCH 11/30] use the new function in our tests --- .../tests/test_writers/test_kerchunk.py | 46 +++++++++++++++++-- 1 file changed, 42 insertions(+), 4 deletions(-) diff --git a/virtualizarr/tests/test_writers/test_kerchunk.py b/virtualizarr/tests/test_writers/test_kerchunk.py index 1bbeed1e..732a0524 100644 --- a/virtualizarr/tests/test_writers/test_kerchunk.py +++ b/virtualizarr/tests/test_writers/test_kerchunk.py @@ -8,7 +8,45 @@ from conftest import ARRAYBYTES_CODEC from virtualizarr.manifests import ChunkManifest, ManifestArray from virtualizarr.tests import requires_fastparquet, requires_kerchunk -from virtualizarr.utils import convert_v3_to_v2_metadata +from virtualizarr.utils import convert_v3_to_v2_metadata, kerchunk_refs_as_json, JSON + + +def test_deserialize_to_json(): + refs = { + "version": 1, + "refs": { + ".zgroup": '{"zarr_format":2}', + ".zattrs": "{}", + "a/.zarray": '{"shape":[2,3],"chunks":[2,3],"fill_value":0,"order":"C","filters":null,"dimension_separator":".","compressor":null,"attributes":{},"zarr_format":2,"dtype":" Date: Wed, 16 Jul 2025 15:52:25 +0100 Subject: [PATCH 12/30] remove outdated imports --- virtualizarr/utils.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/virtualizarr/utils.py b/virtualizarr/utils.py index f5205c63..e37d7aed 100644 --- a/virtualizarr/utils.py +++ b/virtualizarr/utils.py @@ -12,8 +12,7 @@ from zarr.core.metadata import ArrayV2Metadata, ArrayV3Metadata from virtualizarr.codecs import extract_codecs, get_codec_config -from virtualizarr.types.general import JSON -from virtualizarr.types.kerchunk import KerchunkStoreRefs, KerchunkArrRefs +from virtualizarr.types.kerchunk import KerchunkStoreRefs # taken from zarr.core.common JSON = str | int | float | Mapping[str, "JSON"] | Sequence["JSON"] | None From e1e54b3fb3c6bcb1b12c375581b5e0f7b9925313 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 16 Jul 2025 14:53:06 +0000 Subject: [PATCH 13/30] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- virtualizarr/tests/test_writers/test_kerchunk.py | 16 ++++++++++------ virtualizarr/utils.py | 10 +++++----- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/virtualizarr/tests/test_writers/test_kerchunk.py b/virtualizarr/tests/test_writers/test_kerchunk.py index 732a0524..7b0930fd 100644 --- a/virtualizarr/tests/test_writers/test_kerchunk.py +++ b/virtualizarr/tests/test_writers/test_kerchunk.py @@ -8,7 +8,7 @@ from conftest import ARRAYBYTES_CODEC from virtualizarr.manifests import ChunkManifest, ManifestArray from virtualizarr.tests import requires_fastparquet, requires_kerchunk -from virtualizarr.utils import convert_v3_to_v2_metadata, kerchunk_refs_as_json, JSON +from virtualizarr.utils import JSON, convert_v3_to_v2_metadata, kerchunk_refs_as_json def test_deserialize_to_json(): @@ -28,10 +28,10 @@ def test_deserialize_to_json(): ".zgroup": {"zarr_format": 2}, ".zattrs": {}, "a/.zarray": { - "shape": [2,3], - "chunks": [2,3], + "shape": [2, 3], + "chunks": [2, 3], "fill_value": 0, - "order": "C" , + "order": "C", "filters": None, "dimension_separator": ".", "compressor": None, @@ -106,7 +106,9 @@ def test_accessor_to_kerchunk_dict_empty(self, array_v3_metadata): } result_ds_refs = ds.vz.to_kerchunk(format="dict") - assert kerchunk_refs_as_json(result_ds_refs) == kerchunk_refs_as_json(expected_ds_refs) + assert kerchunk_refs_as_json(result_ds_refs) == kerchunk_refs_as_json( + expected_ds_refs + ) def test_accessor_to_kerchunk_json(self, tmp_path, array_v3_metadata): import ujson @@ -143,7 +145,9 @@ def test_accessor_to_kerchunk_json(self, tmp_path, array_v3_metadata): "a/0.0": ["/test.nc", 6144, 48], }, } - assert kerchunk_refs_as_json(loaded_refs) == kerchunk_refs_as_json(expected_ds_refs) + assert kerchunk_refs_as_json(loaded_refs) == kerchunk_refs_as_json( + expected_ds_refs + ) @requires_fastparquet def test_accessor_to_kerchunk_parquet(self, tmp_path, array_v3_metadata): diff --git a/virtualizarr/utils.py b/virtualizarr/utils.py index e37d7aed..c8de739a 100644 --- a/virtualizarr/utils.py +++ b/virtualizarr/utils.py @@ -1,12 +1,12 @@ from __future__ import annotations +import copy import importlib import io -import os import json -from typing import TYPE_CHECKING, Any, Iterable, Optional, Union, Mapping, Sequence +import os +from typing import TYPE_CHECKING, Any, Iterable, Mapping, Optional, Sequence, Union from urllib.parse import urlparse -import copy from zarr.abc.codec import ArrayArrayCodec, BytesBytesCodec from zarr.core.metadata import ArrayV2Metadata, ArrayV3Metadata @@ -172,7 +172,7 @@ def convert_v3_to_v2_metadata( def kerchunk_refs_as_json(refs: KerchunkStoreRefs) -> JSON: """ Normalizes all Kerchunk references into true JSON all the way down. - + See https://github.com/zarr-developers/VirtualiZarr/issues/679 for context as to why this is needed. """ normalized_result: JSON = copy.deepcopy(refs) @@ -182,5 +182,5 @@ def kerchunk_refs_as_json(refs: KerchunkStoreRefs) -> JSON: # this is a rather fragile way to discover if we're looking at a chunk key or not, but it should work... if isinstance(v, str): normalized_result["refs"][k] = json.loads(v) - + return normalized_result From 854808690f4c19e93fc174afea6a7587e6dee828 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 16 Jul 2025 15:06:41 +0000 Subject: [PATCH 14/30] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- virtualizarr/tests/test_manifests/test_array.py | 4 +++- virtualizarr/writers/kerchunk.py | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/virtualizarr/tests/test_manifests/test_array.py b/virtualizarr/tests/test_manifests/test_array.py index 9179e283..6b09cb0a 100644 --- a/virtualizarr/tests/test_manifests/test_array.py +++ b/virtualizarr/tests/test_manifests/test_array.py @@ -394,7 +394,9 @@ def test_refuse_combine(array_v3_metadata): with pytest.raises(NotImplementedError, match="different codecs"): func([marr1, marr2], axis=0) - metadata_wrong_dtype = array_v3_metadata(shape=shape, chunks=chunks, data_type=np.dtype("int64")) + metadata_wrong_dtype = array_v3_metadata( + shape=shape, chunks=chunks, data_type=np.dtype("int64") + ) marr2 = ManifestArray(metadata=metadata_wrong_dtype, chunkmanifest=chunkmanifest2) for func in [np.concatenate, np.stack]: with pytest.raises(ValueError, match="inconsistent dtypes"): diff --git a/virtualizarr/writers/kerchunk.py b/virtualizarr/writers/kerchunk.py index cf49cbc4..292b43bd 100644 --- a/virtualizarr/writers/kerchunk.py +++ b/virtualizarr/writers/kerchunk.py @@ -165,7 +165,9 @@ def variable_to_kerchunk_arr_refs(var: Variable, var_name: str) -> KerchunkArrRe array_v2_metadata = ArrayV2Metadata( chunks=np_arr.shape, shape=np_arr.shape, - dtype=parse_data_type(np_arr.dtype, zarr_format=2), # needed unless zarr-python fixes https://github.com/zarr-developers/zarr-python/issues/3253 + dtype=parse_data_type( + np_arr.dtype, zarr_format=2 + ), # needed unless zarr-python fixes https://github.com/zarr-developers/zarr-python/issues/3253 order="C", fill_value=None, ) From 01baf83b2171194147ab238a7c6f2ad20cf624a2 Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Wed, 16 Jul 2025 16:11:19 +0100 Subject: [PATCH 15/30] missed a coersion --- virtualizarr/tests/test_writers/test_kerchunk.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virtualizarr/tests/test_writers/test_kerchunk.py b/virtualizarr/tests/test_writers/test_kerchunk.py index 7b0930fd..29970ece 100644 --- a/virtualizarr/tests/test_writers/test_kerchunk.py +++ b/virtualizarr/tests/test_writers/test_kerchunk.py @@ -79,7 +79,7 @@ def test_accessor_to_kerchunk_dict(self, array_v3_metadata): } result_ds_refs = ds.vz.to_kerchunk(format="dict") - assert result_ds_refs == kerchunk_refs_as_json(expected_ds_refs) + assert kerchunk_refs_as_json(result_ds_refs) == kerchunk_refs_as_json(expected_ds_refs) def test_accessor_to_kerchunk_dict_empty(self, array_v3_metadata): manifest = ChunkManifest(entries={}, shape=(1, 1)) From 35cfddf3ff35ef15595c7024f24594c4c6784598 Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Wed, 16 Jul 2025 16:16:25 +0100 Subject: [PATCH 16/30] fix dmrpp test --- virtualizarr/tests/test_parsers/test_dmrpp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virtualizarr/tests/test_parsers/test_dmrpp.py b/virtualizarr/tests/test_parsers/test_dmrpp.py index 4945696c..c1e245bf 100644 --- a/virtualizarr/tests/test_parsers/test_dmrpp.py +++ b/virtualizarr/tests/test_parsers/test_dmrpp.py @@ -354,7 +354,7 @@ def test_parse_variable(tmp_path): basic_dmrpp = dmrparser(DMRPP_XML_STRINGS["basic"], tmp_path=tmp_path) var = basic_dmrpp._parse_variable(basic_dmrpp.find_node_fqn("/data")) - assert var.metadata.dtype == "float32" + assert var.metadata.dtype.to_native_dtype() == "float32" assert var.metadata.dimension_names == ("x", "y") assert var.shape == (720, 1440) assert var.chunks == (360, 720) From f0739c108a904527dcd8bb2f0d840380fb09433b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 16 Jul 2025 15:16:41 +0000 Subject: [PATCH 17/30] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- virtualizarr/tests/test_writers/test_kerchunk.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/virtualizarr/tests/test_writers/test_kerchunk.py b/virtualizarr/tests/test_writers/test_kerchunk.py index 456fc3ee..24985930 100644 --- a/virtualizarr/tests/test_writers/test_kerchunk.py +++ b/virtualizarr/tests/test_writers/test_kerchunk.py @@ -79,7 +79,9 @@ def test_accessor_to_kerchunk_dict(self, array_v3_metadata): } result_ds_refs = ds.vz.to_kerchunk(format="dict") - assert kerchunk_refs_as_json(result_ds_refs) == kerchunk_refs_as_json(expected_ds_refs) + assert kerchunk_refs_as_json(result_ds_refs) == kerchunk_refs_as_json( + expected_ds_refs + ) def test_accessor_to_kerchunk_dict_empty(self, array_v3_metadata): manifest = ChunkManifest(entries={}, shape=(1, 1)) From 31941fbff191e83576fc97f112d065943692315b Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Wed, 16 Jul 2025 18:31:31 +0100 Subject: [PATCH 18/30] require latest zarr --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 4e413100..7fd16fd7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,7 +27,7 @@ dependencies = [ "numcodecs>=0.15.1", "ujson", "packaging", - "zarr>=3.0.8", + "zarr>=3.1.0", "obstore>=0.5.1", ] From a17884d2245c322107f3a70d043e7e9b69b4f794 Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Wed, 16 Jul 2025 18:39:26 +0100 Subject: [PATCH 19/30] update minimum version in pixi --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 7fd16fd7..5bf43250 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -169,7 +169,7 @@ rust = "*" xarray = "==2025.3.0" numpy = "==2.0.0" numcodecs = "==0.15.1" -zarr = "==3.0.8" +zarr = "==3.1.0" obstore = "==0.5.1" # Define commands to run within the test environments From 93f775872821555152c624a1c35b7326a66a8f83 Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Thu, 17 Jul 2025 11:13:43 +0100 Subject: [PATCH 20/30] fix array return type and add test --- virtualizarr/manifests/array.py | 23 ++++++++++++++++--- .../tests/test_manifests/test_array.py | 17 ++++++++++++++ 2 files changed, 37 insertions(+), 3 deletions(-) diff --git a/virtualizarr/manifests/array.py b/virtualizarr/manifests/array.py index 32c1f1d9..5a44dc0f 100644 --- a/virtualizarr/manifests/array.py +++ b/virtualizarr/manifests/array.py @@ -90,7 +90,7 @@ def dtype(self) -> np.dtype: """The native dtype of the data (typically a numpy dtype)""" zdtype = self.metadata.data_type dtype = zdtype.to_native_dtype() - return dtype.str + return dtype @property def shape(self) -> tuple[int, ...]: @@ -110,12 +110,29 @@ def size(self) -> int: def __repr__(self) -> str: return f"ManifestArray" + @property + def nbytes(self) -> int: + """ + The total number of bytes that are stored in the chunks of this array. + + Notes + ----- + This value is calculated by multiplying the number of elements in the array and the size + of each element, the latter of which is determined by the dtype of the array. + For this reason, ``nbytes`` will likely be inaccurate for arrays with variable-length + dtypes. It is not possible to determine the size of an array with variable-length elements + from the shape and dtype alone. + """ + return self.size * self.dtype.itemsize + @property def nbytes_virtual(self) -> int: """ - Size required to hold these references in memory in bytes. + The total number of bytes required to hold these virtual references in memory in bytes. - Note this is not the size of the referenced array if it were actually loaded into memory, + Notes + ----- + This is not the size of the referenced array if it were actually loaded into memory (use `.nbytes`), this is only the size of the pointers to the chunk locations. If you were to load the data into memory it would be ~1e6x larger for 1MB chunks. """ diff --git a/virtualizarr/tests/test_manifests/test_array.py b/virtualizarr/tests/test_manifests/test_array.py index 6b09cb0a..1a3b6d35 100644 --- a/virtualizarr/tests/test_manifests/test_array.py +++ b/virtualizarr/tests/test_manifests/test_array.py @@ -445,6 +445,23 @@ def test_indexing_scalar_with_ellipsis(self, manifest_array): assert marr[...] == marr +def test_nbytes(array_v3_metadata): + chunks = (5, 10) + shape = (5, 20) + metadata = array_v3_metadata( + shape=shape, + chunks=chunks, + dimension_names=["x", "y"], + ) + chunks_dict = { + "0.0": {"path": "/foo.nc", "offset": 100, "length": 100}, + "0.1": {"path": "/foo.nc", "offset": 200, "length": 100}, + } + manifest = ChunkManifest(entries=chunks_dict) + marr = ManifestArray(metadata=metadata, chunkmanifest=manifest) + assert marr.nbytes == 400 + + def test_to_xarray(array_v3_metadata): chunks = (5, 10) shape = (5, 20) From 407f208b1b7a3f0617c0c4d0bb22198227097521 Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Thu, 17 Jul 2025 11:16:52 +0100 Subject: [PATCH 21/30] change expected group repr dtype str --- virtualizarr/tests/test_manifests/test_group.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virtualizarr/tests/test_manifests/test_group.py b/virtualizarr/tests/test_manifests/test_group.py index ac5a5781..751596f9 100644 --- a/virtualizarr/tests/test_manifests/test_group.py +++ b/virtualizarr/tests/test_manifests/test_group.py @@ -27,7 +27,7 @@ def test_manifest_repr(self, manifest_array): expected_repr = textwrap.dedent( """ ManifestGroup( - arrays={'foo': ManifestArray}, + arrays={'foo': ManifestArray}, groups={}, metadata=GroupMetadata(attributes={}, zarr_format=3, consolidated_metadata=None, node_type='group'), ) From 09d801d02314dd4b994d156fd2c1f8d094653f25 Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Thu, 17 Jul 2025 11:23:22 +0100 Subject: [PATCH 22/30] fix metadata comparison by just using updated version of ArrayV3Metadata.to_dict() --- virtualizarr/manifests/array.py | 2 +- virtualizarr/manifests/utils.py | 18 ------------------ 2 files changed, 1 insertion(+), 19 deletions(-) diff --git a/virtualizarr/manifests/array.py b/virtualizarr/manifests/array.py index 5a44dc0f..7e51abf1 100644 --- a/virtualizarr/manifests/array.py +++ b/virtualizarr/manifests/array.py @@ -189,7 +189,7 @@ def __eq__( # type: ignore[override] if self.shape != other.shape: raise NotImplementedError("Unsure how to handle broadcasting like this") - if not utils.metadata_identical(self.metadata, other.metadata): + if not self.metadata.to_dict() == other.metadata.to_dict(): return np.full(shape=self.shape, fill_value=False, dtype=np.dtype(bool)) else: if self.manifest == other.manifest: diff --git a/virtualizarr/manifests/utils.py b/virtualizarr/manifests/utils.py index 1252e2bb..222ac7a1 100644 --- a/virtualizarr/manifests/utils.py +++ b/virtualizarr/manifests/utils.py @@ -157,24 +157,6 @@ def check_same_shapes(shapes: list[tuple[int, ...]]) -> None: ) -# TODO remove this once https://github.com/zarr-developers/zarr-python/issues/2929 is solved upstream -def metadata_identical(metadata1: ArrayV3Metadata, metadata2: ArrayV3Metadata) -> bool: - """Checks the metadata of two zarr arrays are identical, including special treatment for NaN fill_values.""" - metadata_dict1 = metadata1.to_dict() - metadata_dict2 = metadata2.to_dict() - - # fill_value is a special case because numpy NaNs cannot be compared using __eq__, see https://stackoverflow.com/a/10059796 - fill_value1 = metadata_dict1.pop("fill_value") - fill_value2 = metadata_dict2.pop("fill_value") - if np.isnan(fill_value1) and np.isnan(fill_value2): # type: ignore[arg-type] - fill_values_equal = fill_value1.dtype == fill_value2.dtype # type: ignore[union-attr] - else: - fill_values_equal = fill_value1 == fill_value2 - - # everything else in ArrayV3Metadata is a string, Enum, or Dataclass - return fill_values_equal and metadata_dict1 == metadata_dict2 - - def _remove_element_at_position(t: tuple[int, ...], pos: int) -> tuple[int, ...]: new_l = list(t) new_l.pop(pos) From 91171160acabd50b4df8ad451c8abf9ed79fea84 Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Thu, 17 Jul 2025 11:29:10 +0100 Subject: [PATCH 23/30] don't need to add .nbytes to ManifestArray --- virtualizarr/manifests/array.py | 15 --------------- virtualizarr/tests/test_manifests/test_array.py | 17 ----------------- 2 files changed, 32 deletions(-) diff --git a/virtualizarr/manifests/array.py b/virtualizarr/manifests/array.py index 7e51abf1..f6d93819 100644 --- a/virtualizarr/manifests/array.py +++ b/virtualizarr/manifests/array.py @@ -110,21 +110,6 @@ def size(self) -> int: def __repr__(self) -> str: return f"ManifestArray" - @property - def nbytes(self) -> int: - """ - The total number of bytes that are stored in the chunks of this array. - - Notes - ----- - This value is calculated by multiplying the number of elements in the array and the size - of each element, the latter of which is determined by the dtype of the array. - For this reason, ``nbytes`` will likely be inaccurate for arrays with variable-length - dtypes. It is not possible to determine the size of an array with variable-length elements - from the shape and dtype alone. - """ - return self.size * self.dtype.itemsize - @property def nbytes_virtual(self) -> int: """ diff --git a/virtualizarr/tests/test_manifests/test_array.py b/virtualizarr/tests/test_manifests/test_array.py index 1a3b6d35..6b09cb0a 100644 --- a/virtualizarr/tests/test_manifests/test_array.py +++ b/virtualizarr/tests/test_manifests/test_array.py @@ -445,23 +445,6 @@ def test_indexing_scalar_with_ellipsis(self, manifest_array): assert marr[...] == marr -def test_nbytes(array_v3_metadata): - chunks = (5, 10) - shape = (5, 20) - metadata = array_v3_metadata( - shape=shape, - chunks=chunks, - dimension_names=["x", "y"], - ) - chunks_dict = { - "0.0": {"path": "/foo.nc", "offset": 100, "length": 100}, - "0.1": {"path": "/foo.nc", "offset": 200, "length": 100}, - } - manifest = ChunkManifest(entries=chunks_dict) - marr = ManifestArray(metadata=metadata, chunkmanifest=manifest) - assert marr.nbytes == 400 - - def test_to_xarray(array_v3_metadata): chunks = (5, 10) shape = (5, 20) From 3ebb731bbd46305be9b89df540c5dc5e2f028524 Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Thu, 17 Jul 2025 11:33:54 +0100 Subject: [PATCH 24/30] remove rogue print statements --- virtualizarr/writers/kerchunk.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/virtualizarr/writers/kerchunk.py b/virtualizarr/writers/kerchunk.py index 292b43bd..0431f004 100644 --- a/virtualizarr/writers/kerchunk.py +++ b/virtualizarr/writers/kerchunk.py @@ -56,9 +56,6 @@ def default(self, obj): def to_kerchunk_json(v2_metadata: ArrayV2Metadata) -> str: """Convert V2 metadata to kerchunk JSON format.""" - print(v2_metadata) - print(type(v2_metadata.dtype)) - zarray_dict: dict[str, JSON] = v2_metadata.to_dict() if v2_metadata.filters: zarray_dict["filters"] = [ @@ -173,7 +170,6 @@ def variable_to_kerchunk_arr_refs(var: Variable, var_name: str) -> KerchunkArrRe ) zattrs = {**var.attrs} - print(array_v2_metadata) zarray_dict = to_kerchunk_json(array_v2_metadata) arr_refs[".zarray"] = zarray_dict From bf104cb12df22768f89842eb4e70332ef7a93a6f Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Thu, 17 Jul 2025 11:52:02 +0100 Subject: [PATCH 25/30] try fixing zarr data type error in icechunk writer in CI --- virtualizarr/writers/icechunk.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virtualizarr/writers/icechunk.py b/virtualizarr/writers/icechunk.py index fae8fee6..a1136ce6 100644 --- a/virtualizarr/writers/icechunk.py +++ b/virtualizarr/writers/icechunk.py @@ -344,7 +344,7 @@ def write_virtual_variable_to_icechunk( name=name, shape=metadata.shape, chunks=metadata.chunks, - dtype=metadata.data_type, + dtype=metadata.data_type.to_native_dtype(), filters=filters, compressors=compressors, dimension_names=var.dims, From b1961b633c0c6844a5ff19e1d71c0d42f683079b Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Thu, 17 Jul 2025 12:01:25 +0100 Subject: [PATCH 26/30] remove xfail now that datetime dtypes are supported in zarr --- virtualizarr/tests/test_integration.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/virtualizarr/tests/test_integration.py b/virtualizarr/tests/test_integration.py index 14b74d79..3405f90f 100644 --- a/virtualizarr/tests/test_integration.py +++ b/virtualizarr/tests/test_integration.py @@ -343,9 +343,6 @@ def test_non_dimension_coordinates( for coord in ds.coords: assert ds.coords[coord].attrs == roundtrip.coords[coord].attrs - @pytest.mark.xfail( - reason="Datetime and timedelta data types not yet supported by zarr-python 3.0" # https://github.com/zarr-developers/zarr-python/issues/2616 - ) def test_datetime64_dtype_fill_value( self, tmpdir, roundtrip_func, array_v3_metadata ): From b0bddf2b9e46f553fbdf953497086e37080842d9 Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Thu, 17 Jul 2025 12:12:07 +0100 Subject: [PATCH 27/30] add xfail for one test case --- virtualizarr/tests/test_integration.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/virtualizarr/tests/test_integration.py b/virtualizarr/tests/test_integration.py index 3405f90f..d7db3536 100644 --- a/virtualizarr/tests/test_integration.py +++ b/virtualizarr/tests/test_integration.py @@ -346,6 +346,9 @@ def test_non_dimension_coordinates( def test_datetime64_dtype_fill_value( self, tmpdir, roundtrip_func, array_v3_metadata ): + if roundtrip_func == roundtrip_as_in_memory_icechunk: + pytest.xfail(reason="xarray can't decode the ns datetime fill_value") + chunks_dict = { "0.0.0": {"path": "/foo.nc", "offset": 100, "length": 100}, } From fc1bd3410151d5dc68b52cef0de33eff2d9821ac Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Thu, 17 Jul 2025 12:15:26 +0100 Subject: [PATCH 28/30] add note about now supporting big-endian --- docs/releases.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/releases.md b/docs/releases.md index e38b4a12..42544559 100644 --- a/docs/releases.md +++ b/docs/releases.md @@ -23,6 +23,7 @@ [Chuck Daniels](https://github.com/chuckwondo). - Now throws a warning if you attempt to write an entirely non-virtual dataset to a virtual references format ([#657](https://github.com/zarr-developers/VirtualiZarr/pull/657)). By [Tom Nicholas](https://github.com/TomNicholas). +- Support big-endian data via zarr-python 3.0.9 and zarr v3's new data types system ([#618](https://github.com/zarr-developers/VirtualiZarr/issues/618), [#677](https://github.com/zarr-developers/VirtualiZarr/issues/677)) By [Max Jones](https://github.com/maxrjones) and [Tom Nicholas](https://github.com/TomNicholas). ### Breaking changes From 925ffecb3a80a9ddb6418e56b1661bdbf4778149 Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Thu, 17 Jul 2025 13:53:33 +0100 Subject: [PATCH 29/30] remove unnecessary conversion of dtype --- virtualizarr/utils.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/virtualizarr/utils.py b/virtualizarr/utils.py index 03f087dd..93390dfb 100644 --- a/virtualizarr/utils.py +++ b/virtualizarr/utils.py @@ -156,12 +156,9 @@ def convert_v3_to_v2_metadata( # Handle filter configurations filter_configs = [get_codec_config(filter_) for filter_ in array_filters] - native_dtype = v3_metadata.data_type.to_native_dtype() - v2_compatible_data_type = parse_data_type(native_dtype, zarr_format=2) - v2_metadata = ArrayV2Metadata( shape=v3_metadata.shape, - dtype=v2_compatible_data_type, + dtype=v3_metadata.data_type, chunks=v3_metadata.chunks, fill_value=fill_value or v3_metadata.fill_value, compressor=compressor_config, From 0d7ad60a861071e74a403fed25812418491e282e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 17 Jul 2025 12:54:01 +0000 Subject: [PATCH 30/30] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- virtualizarr/utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/virtualizarr/utils.py b/virtualizarr/utils.py index 93390dfb..f0c94e0e 100644 --- a/virtualizarr/utils.py +++ b/virtualizarr/utils.py @@ -10,7 +10,6 @@ from zarr.abc.codec import ArrayArrayCodec, BytesBytesCodec from zarr.core.metadata import ArrayV2Metadata, ArrayV3Metadata -from zarr.dtype import parse_data_type from virtualizarr.codecs import extract_codecs, get_codec_config from virtualizarr.types.kerchunk import KerchunkStoreRefs