diff --git a/docs/guide/consolidated_metadata.rst b/docs/guide/consolidated_metadata.rst index 480a855831..5010d32481 100644 --- a/docs/guide/consolidated_metadata.rst +++ b/docs/guide/consolidated_metadata.rst @@ -12,8 +12,8 @@ Usage If consolidated metadata is present in a Zarr Group's metadata then it is used by default. The initial read to open the group will need to communicate with -the store (reading from a file for a :class:`zarr.store.LocalStore`, making a -network request for a :class:`zarr.store.RemoteStore`). After that, any subsequent +the store (reading from a file for a :class:`zarr.storage.LocalStore`, making a +network request for a :class:`zarr.storage.RemoteStore`). After that, any subsequent metadata reads get child Group or Array nodes will *not* require reads from the store. In Python, the consolidated metadata is available on the ``.consolidated_metadata`` @@ -22,7 +22,7 @@ attribute of the ``GroupMetadata`` object. .. code-block:: python >>> import zarr - >>> store = zarr.store.MemoryStore({}, mode="w") + >>> store = zarr.storage.MemoryStore({}, mode="w") >>> group = zarr.open_group(store=store) >>> group.create_array(shape=(1,), name="a") >>> group.create_array(shape=(2, 2), name="b") diff --git a/docs/guide/index.rst b/docs/guide/index.rst index 106c35ce8d..f841dbb85d 100644 --- a/docs/guide/index.rst +++ b/docs/guide/index.rst @@ -4,4 +4,5 @@ Guide .. toctree:: :maxdepth: 1 + storage consolidated_metadata diff --git a/docs/guide/storage.rst b/docs/guide/storage.rst new file mode 100644 index 0000000000..dfda553c43 --- /dev/null +++ b/docs/guide/storage.rst @@ -0,0 +1,101 @@ +Storage +======= + +Zarr-Python supports multiple storage backends, including: local file systems, +Zip files, remote stores via ``fspec`` (S3, HTTP, etc.), and in-memory stores. In +Zarr-Python 3, stores must implement the abstract store API from +:class:`zarr.abc.store.Store`. + +.. note:: + Unlike Zarr-Python 2 where the store interface was built around a generic ``MutableMapping`` + API, Zarr-Python 3 utilizes a custom store API that utilizes Python's AsyncIO library. + +Implicit Store Creation +----------------------- + +In most cases, it is not required to create a ``Store`` object explicitly. Passing a string +to Zarr's top level API will result in the store being created automatically. + +.. code-block:: python + + >>> import zarr + >>> zarr.open("data/foo/bar", mode="r") # implicitly creates a LocalStore + + >>> zarr.open("s3://foo/bar", mode="r") # implicitly creates a RemoteStore + + >>> data = {} + >>> zarr.open(data, mode="w") # implicitly creates a MemoryStore + + +Explicit Store Creation +----------------------- + +In some cases, it may be helpful to create a store instance directly. Zarr-Python offers four +built-in store: :class:`zarr.storage.LocalStore`, :class:`zarr.storage.RemoteStore`, +:class:`zarr.storage.ZipStore`, and :class:`zarr.storage.MemoryStore`. + +Local Store +~~~~~~~~~~~ + +The :class:`zarr.storage.LocalStore` stores data in a nested set of directories on a local +filesystem. + +.. code-block:: python + + >>> import zarr + >>> store = zarr.storage.LocalStore("data/foo/bar", mode="r") + >>> zarr.open(store=store) + + +Zip Store +~~~~~~~~~ + +The :class:`zarr.storage.ZipStore` stores the contents of a Zarr hierarchy in a single +Zip file. The `Zip Store specification_` is currently in draft form. + +.. code-block:: python + + >>> import zarr + >>> store = zarr.storage.ZipStore("data.zip", mode="w") + >>> zarr.open(store=store, shape=(2,)) + >> import zarr + >>> store = zarr.storage.RemoteStore("gs://foo/bar", mode="r") + >>> zarr.open(store=store) + shape=(10, 20) dtype=float32> + +Memory Store +~~~~~~~~~~~~ + +The :class:`zarr.storage.RemoteStore` a in-memory store that allows for serialization of +Zarr data (metadata and chunks) to a dictionary. + +.. code-block:: python + + >>> import zarr + >>> data = {} + >>> store = zarr.storage.MemoryStore(data, mode="w") + >>> zarr.open(store=store, shape=(2, )) + + +Developing custom stores +------------------------ + +Zarr-Python :class:`zarr.abc.store.Store` API is meant to be extended. The Store Abstract Base +Class includes all of the methods needed to be a fully operational store in Zarr Python. +Zarr also provides a test harness for custom stores: :class:`zarr.testing.store.StoreTests`. + +.. _Zip Store Specification: https://github.com/zarr-developers/zarr-specs/pull/311 +.. _Fsspec: https://zarr-specs.readthedocs.io/en/latest/v3/core/v3.0.html#consolidated-metadata diff --git a/src/zarr/abc/store.py b/src/zarr/abc/store.py index 7771a10464..261e56dd01 100644 --- a/src/zarr/abc/store.py +++ b/src/zarr/abc/store.py @@ -20,6 +20,8 @@ class AccessMode(NamedTuple): + """Access mode flags.""" + str: AccessModeLiteral readonly: bool overwrite: bool @@ -28,6 +30,24 @@ class AccessMode(NamedTuple): @classmethod def from_literal(cls, mode: AccessModeLiteral) -> Self: + """ + Create an AccessMode instance from a literal. + + Parameters + ---------- + mode : AccessModeLiteral + One of 'r', 'r+', 'w', 'w-', 'a'. + + Returns + ------- + AccessMode + The created instance. + + Raises + ------ + ValueError + If mode is not one of 'r', 'r+', 'w', 'w-', 'a'. + """ if mode in ("r", "r+", "a", "w", "w-"): return cls( str=mode, @@ -40,6 +60,10 @@ def from_literal(cls, mode: AccessModeLiteral) -> Self: class Store(ABC): + """ + Abstract base class for Zarr stores. + """ + _mode: AccessMode _is_open: bool @@ -49,6 +73,21 @@ def __init__(self, *args: Any, mode: AccessModeLiteral = "r", **kwargs: Any) -> @classmethod async def open(cls, *args: Any, **kwargs: Any) -> Self: + """ + Create and open the store. + + Parameters + ---------- + *args : Any + Positional arguments to pass to the store constructor. + **kwargs : Any + Keyword arguments to pass to the store constructor. + + Returns + ------- + Store + The opened store instance. + """ store = cls(*args, **kwargs) await store._open() return store @@ -67,6 +106,20 @@ def __exit__( self.close() async def _open(self) -> None: + """ + Open the store. + + Raises + ------ + ValueError + If the store is already open. + FileExistsError + If ``mode='w-'`` and the store already exists. + + Notes + ----- + * When ``mode='w'`` and the store already exists, it will be cleared. + """ if self._is_open: raise ValueError("store is already open") if self.mode.str == "w": @@ -76,14 +129,30 @@ async def _open(self) -> None: self._is_open = True async def _ensure_open(self) -> None: + """Open the store if it is not already open.""" if not self._is_open: await self._open() @abstractmethod - async def empty(self) -> bool: ... + async def empty(self) -> bool: + """ + Check if the store is empty. + + Returns + ------- + bool + True if the store is empty, False otherwise. + """ + ... @abstractmethod - async def clear(self) -> None: ... + async def clear(self) -> None: + """ + Clear the store. + + Remove all keys and values from the store. + """ + ... @abstractmethod def with_mode(self, mode: AccessModeLiteral) -> Self: @@ -116,6 +185,7 @@ def mode(self) -> AccessMode: return self._mode def _check_writable(self) -> None: + """Raise an exception if the store is not writable.""" if self.mode.readonly: raise ValueError("store mode does not support writing") @@ -199,7 +269,7 @@ async def set_if_not_exists(self, key: str, value: Buffer) -> None: Store a key to ``value`` if the key is not already present. Parameters - ----------- + ---------- key : str value : Buffer """ @@ -339,6 +409,17 @@ async def set_if_not_exists(self, default: Buffer) -> None: ... async def set_or_delete(byte_setter: ByteSetter, value: Buffer | None) -> None: + """Set or delete a value in a byte setter + + Parameters + ---------- + byte_setter : ByteSetter + value : Buffer | None + + Notes + ----- + If value is None, the key will be deleted. + """ if value is None: await byte_setter.delete() else: diff --git a/src/zarr/storage/__init__.py b/src/zarr/storage/__init__.py index 47f70bcc9e..6703aa2723 100644 --- a/src/zarr/storage/__init__.py +++ b/src/zarr/storage/__init__.py @@ -1,11 +1,13 @@ from zarr.storage.common import StoreLike, StorePath, make_store_path from zarr.storage.local import LocalStore +from zarr.storage.logging import LoggingStore from zarr.storage.memory import MemoryStore from zarr.storage.remote import RemoteStore from zarr.storage.zip import ZipStore __all__ = [ "LocalStore", + "LoggingStore", "MemoryStore", "RemoteStore", "StoreLike", diff --git a/src/zarr/storage/common.py b/src/zarr/storage/common.py index 977fe4ba2b..101e8f38af 100644 --- a/src/zarr/storage/common.py +++ b/src/zarr/storage/common.py @@ -27,6 +27,17 @@ def _dereference_path(root: str, path: str) -> str: class StorePath: + """ + Path-like interface for a Store. + + Parameters + ---------- + store : Store + The store to use. + path : str + The path within the store. + """ + store: Store path: str @@ -39,25 +50,80 @@ async def get( prototype: BufferPrototype | None = None, byte_range: ByteRangeRequest | None = None, ) -> Buffer | None: + """ + Read bytes from the store. + + Parameters + ---------- + prototype : BufferPrototype, optional + The buffer prototype to use when reading the bytes. + byte_range : ByteRangeRequest, optional + The range of bytes to read. + + Returns + ------- + buffer : Buffer or None + The read bytes, or None if the key does not exist. + """ if prototype is None: prototype = default_buffer_prototype() return await self.store.get(self.path, prototype=prototype, byte_range=byte_range) async def set(self, value: Buffer, byte_range: ByteRangeRequest | None = None) -> None: + """ + Write bytes to the store. + + Parameters + ---------- + value : Buffer + The buffer to write. + byte_range : ByteRangeRequest, optional + The range of bytes to write. If None, the entire buffer is written. + + Raises + ------ + NotImplementedError + If `byte_range` is not None, because Store.set does not support partial writes yet. + """ if byte_range is not None: raise NotImplementedError("Store.set does not have partial writes yet") await self.store.set(self.path, value) async def delete(self) -> None: + """ + Delete the key from the store. + + Raises + ------ + NotImplementedError + If the store does not support deletion. + """ await self.store.delete(self.path) async def set_if_not_exists(self, default: Buffer) -> None: + """ + Store a key to ``value`` if the key is not already present. + + Parameters + ---------- + default : Buffer + The buffer to store if the key is not already present. + """ await self.store.set_if_not_exists(self.path, default) async def exists(self) -> bool: + """ + Check if the key exists in the store. + + Returns + ------- + bool + True if the key exists in the store, False otherwise. + """ return await self.store.exists(self.path) def __truediv__(self, other: str) -> StorePath: + """combine this store path with another path""" return self.__class__(self.store, _dereference_path(self.path, other)) def __str__(self) -> str: @@ -67,6 +133,19 @@ def __repr__(self) -> str: return f"StorePath({self.store.__class__.__name__}, {str(self)!r})" def __eq__(self, other: object) -> bool: + """ + Check if two StorePath objects are equal. + + Returns + ------- + bool + True if the two objects are equal, False otherwise. + + Notes + ----- + Two StorePath objects are considered equal if their stores are equal + and their paths are equal. + """ try: return self.store == other.store and self.path == other.path # type: ignore[attr-defined, no-any-return] except Exception: @@ -83,6 +162,50 @@ async def make_store_path( mode: AccessModeLiteral | None = None, storage_options: dict[str, Any] | None = None, ) -> StorePath: + """ + Convert a `StoreLike` object into a StorePath object. + + This function takes a `StoreLike` object and returns a `StorePath` object. The + `StoreLike` object can be a `Store`, `StorePath`, `Path`, `str`, or `dict[str, Buffer]`. + If the `StoreLike` object is a Store or `StorePath`, it is converted to a + `StorePath` object. If the `StoreLike` object is a Path or str, it is converted + to a LocalStore object and then to a `StorePath` object. If the `StoreLike` + object is a dict[str, Buffer], it is converted to a `MemoryStore` object and + then to a `StorePath` object. + + If the `StoreLike` object is None, a `MemoryStore` object is created and + converted to a `StorePath` object. + + If the `StoreLike` object is a str and starts with a protocol, it is + converted to a RemoteStore object and then to a `StorePath` object. + + If the `StoreLike` object is a dict[str, Buffer] and the mode is not None, + the `MemoryStore` object is created with the given mode. + + If the `StoreLike` object is a str and starts with a protocol, the + RemoteStore object is created with the given mode and storage options. + + Parameters + ---------- + store_like : StoreLike | None + The object to convert to a `StorePath` object. + mode : AccessModeLiteral | None, optional + The mode to use when creating the `StorePath` object. If None, the + default mode is 'r'. + storage_options : dict[str, Any] | None, optional + The storage options to use when creating the `RemoteStore` object. If + None, the default storage options are used. + + Returns + ------- + StorePath + The converted StorePath object. + + Raises + ------ + TypeError + If the StoreLike object is not one of the supported types. + """ from zarr.storage.remote import RemoteStore # circular import used_storage_options = False diff --git a/src/zarr/storage/local.py b/src/zarr/storage/local.py index da37cbfd5f..b80b04e1d0 100644 --- a/src/zarr/storage/local.py +++ b/src/zarr/storage/local.py @@ -21,19 +21,6 @@ def _get( path: Path, prototype: BufferPrototype, byte_range: tuple[int | None, int | None] | None ) -> Buffer: - """ - Fetch a contiguous region of bytes from a file. - - Parameters - ---------- - path: Path - The file to read bytes from. - byte_range: tuple[int, int | None] | None = None - The range of bytes to read. If `byte_range` is `None`, then the entire file will be read. - If `byte_range` is a tuple, the first value specifies the index of the first byte to read, - and the second value specifies the total number of bytes to read. If the total value is - `None`, then the entire file after the first byte will be read. - """ if byte_range is not None: if byte_range[0] is None: start = 0 @@ -80,6 +67,25 @@ def _put( class LocalStore(Store): + """ + Local file system store. + + Parameters + ---------- + root : str or Path + Directory to use as root of store. + mode : str + Mode in which to open the store. Either 'r', 'r+', 'a', 'w', 'w-'. + + Attributes + ---------- + supports_writes + supports_deletes + supports_partial_writes + supports_listing + root + """ + supports_writes: bool = True supports_deletes: bool = True supports_partial_writes: bool = True @@ -100,11 +106,13 @@ async def _open(self) -> None: return await super()._open() async def clear(self) -> None: + # docstring inherited self._check_writable() shutil.rmtree(self.root) self.root.mkdir() async def empty(self) -> bool: + # docstring inherited try: with os.scandir(self.root) as it: for entry in it: @@ -117,6 +125,7 @@ async def empty(self) -> bool: return True def with_mode(self, mode: AccessModeLiteral) -> Self: + # docstring inherited return type(self)(root=self.root, mode=mode) def __str__(self) -> str: @@ -134,6 +143,7 @@ async def get( prototype: BufferPrototype, byte_range: tuple[int | None, int | None] | None = None, ) -> Buffer | None: + # docstring inherited if not self._is_open: await self._open() assert isinstance(key, str) @@ -149,17 +159,7 @@ async def get_partial_values( prototype: BufferPrototype, key_ranges: Iterable[tuple[str, ByteRangeRequest]], ) -> list[Buffer | None]: - """ - Read byte ranges from multiple keys. - - Parameters - ---------- - key_ranges: List[Tuple[str, Tuple[int, int]]] - A list of (key, (start, length)) tuples. The first element of the tuple is the name of - the key in storage to fetch bytes from. The second element the tuple defines the byte - range to retrieve. These values are arguments to `get`, as this method wraps - concurrent invocation of `get`. - """ + # docstring inherited args = [] for key, byte_range in key_ranges: assert isinstance(key, str) @@ -168,9 +168,11 @@ async def get_partial_values( return await concurrent_map(args, asyncio.to_thread, limit=None) # TODO: fix limit async def set(self, key: str, value: Buffer) -> None: + # docstring inherited return await self._set(key, value) async def set_if_not_exists(self, key: str, value: Buffer) -> None: + # docstring inherited try: return await self._set(key, value, exclusive=True) except FileExistsError: @@ -189,6 +191,7 @@ async def _set(self, key: str, value: Buffer, exclusive: bool = False) -> None: async def set_partial_values( self, key_start_values: Iterable[tuple[str, int, bytes | bytearray | memoryview]] ) -> None: + # docstring inherited self._check_writable() args = [] for key, start, value in key_start_values: @@ -198,6 +201,7 @@ async def set_partial_values( await concurrent_map(args, asyncio.to_thread, limit=None) # TODO: fix limit async def delete(self, key: str) -> None: + # docstring inherited self._check_writable() path = self.root / key if path.is_dir(): # TODO: support deleting directories? shutil.rmtree? @@ -206,53 +210,26 @@ async def delete(self, key: str) -> None: await asyncio.to_thread(path.unlink, True) # Q: we may want to raise if path is missing async def exists(self, key: str) -> bool: + # docstring inherited path = self.root / key return await asyncio.to_thread(path.is_file) async def list(self) -> AsyncGenerator[str, None]: - """Retrieve all keys in the store. - - Returns - ------- - AsyncGenerator[str, None] - """ + # docstring inherited to_strip = str(self.root) + "/" for p in list(self.root.rglob("*")): if p.is_file(): yield str(p).replace(to_strip, "") async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]: - """ - Retrieve all keys in the store that begin with a given prefix. Keys are returned with the - common leading prefix removed. - - Parameters - ---------- - prefix : str - - Returns - ------- - AsyncGenerator[str, None] - """ + # docstring inherited to_strip = os.path.join(str(self.root / prefix)) for p in (self.root / prefix).rglob("*"): if p.is_file(): yield str(p.relative_to(to_strip)) async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: - """ - Retrieve all keys and prefixes with a given prefix and which do not contain the character - “/” after the given prefix. - - Parameters - ---------- - prefix : str - - Returns - ------- - AsyncGenerator[str, None] - """ - + # docstring inherited base = self.root / prefix to_strip = str(base) + "/" diff --git a/src/zarr/storage/logging.py b/src/zarr/storage/logging.py index 52c7c7b84d..2e0436b048 100644 --- a/src/zarr/storage/logging.py +++ b/src/zarr/storage/logging.py @@ -18,6 +18,24 @@ class LoggingStore(Store): + """ + Store wrapper that logs all calls to the wrapped store. + + Parameters + ---------- + store: Store + Store to wrap + log_level: str + Log level + log_handler: logging.Handler + Log handler + + Attributes + ---------- + counter: dict + Counter of number of times each method has been called + """ + _store: Store counter: defaultdict[str, int] @@ -58,6 +76,11 @@ def _default_handler(self) -> logging.Handler: @contextmanager def log(self) -> Generator[None, None, None]: + """Context manager to log method calls + + Each call to the wrapped store is logged to the configured logger and added to + the counter dict. + """ method = inspect.stack()[2].function op = f"{type(self._store).__name__}.{method}" self.logger.info(f"Calling {op}") @@ -108,10 +131,12 @@ async def _ensure_open(self) -> None: return await self._store._ensure_open() async def empty(self) -> bool: + # docstring inherited with self.log(): return await self._store.empty() async def clear(self) -> None: + # docstring inherited with self.log(): return await self._store.clear() @@ -131,6 +156,7 @@ async def get( prototype: BufferPrototype, byte_range: tuple[int | None, int | None] | None = None, ) -> Buffer | None: + # docstring inherited with self.log(): return await self._store.get(key=key, prototype=prototype, byte_range=byte_range) @@ -139,47 +165,57 @@ async def get_partial_values( prototype: BufferPrototype, key_ranges: Iterable[tuple[str, ByteRangeRequest]], ) -> list[Buffer | None]: + # docstring inherited with self.log(): return await self._store.get_partial_values(prototype=prototype, key_ranges=key_ranges) async def exists(self, key: str) -> bool: + # docstring inherited with self.log(): return await self._store.exists(key) async def set(self, key: str, value: Buffer) -> None: + # docstring inherited with self.log(): return await self._store.set(key=key, value=value) async def set_if_not_exists(self, key: str, value: Buffer) -> None: + # docstring inherited with self.log(): return await self._store.set_if_not_exists(key=key, value=value) async def delete(self, key: str) -> None: + # docstring inherited with self.log(): return await self._store.delete(key=key) async def set_partial_values( self, key_start_values: Iterable[tuple[str, int, bytes | bytearray | memoryview]] ) -> None: + # docstring inherited with self.log(): return await self._store.set_partial_values(key_start_values=key_start_values) async def list(self) -> AsyncGenerator[str, None]: + # docstring inherited with self.log(): async for key in self._store.list(): yield key async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]: + # docstring inherited with self.log(): async for key in self._store.list_prefix(prefix=prefix): yield key async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: + # docstring inherited with self.log(): async for key in self._store.list_dir(prefix=prefix): yield key def with_mode(self, mode: AccessModeLiteral) -> Self: + # docstring inherited with self.log(): return type(self)( self._store.with_mode(mode), diff --git a/src/zarr/storage/memory.py b/src/zarr/storage/memory.py index 24ea7e0040..673c2a75d5 100644 --- a/src/zarr/storage/memory.py +++ b/src/zarr/storage/memory.py @@ -14,9 +14,25 @@ from zarr.core.common import AccessModeLiteral -# TODO: this store could easily be extended to wrap any MutableMapping store from v2 -# When that is done, the `MemoryStore` will just be a store that wraps a dict. class MemoryStore(Store): + """ + In-memory store for testing purposes. + + Parameters + ---------- + store_dict : dict + Initial data + mode : str + Access mode + + Attributes + ---------- + supports_writes + supports_deletes + supports_partial_writes + supports_listing + """ + supports_writes: bool = True supports_deletes: bool = True supports_partial_writes: bool = True @@ -36,12 +52,15 @@ def __init__( self._store_dict = store_dict async def empty(self) -> bool: + # docstring inherited return not self._store_dict async def clear(self) -> None: + # docstring inherited self._store_dict.clear() def with_mode(self, mode: AccessModeLiteral) -> Self: + # docstring inherited return type(self)(store_dict=self._store_dict, mode=mode) def __str__(self) -> str: @@ -63,6 +82,7 @@ async def get( prototype: BufferPrototype, byte_range: tuple[int | None, int | None] | None = None, ) -> Buffer | None: + # docstring inherited if not self._is_open: await self._open() assert isinstance(key, str) @@ -78,6 +98,8 @@ async def get_partial_values( prototype: BufferPrototype, key_ranges: Iterable[tuple[str, ByteRangeRequest]], ) -> list[Buffer | None]: + # docstring inherited + # All the key-ranges arguments goes with the same prototype async def _get(key: str, byte_range: ByteRangeRequest) -> Buffer | None: return await self.get(key, prototype=prototype, byte_range=byte_range) @@ -85,9 +107,11 @@ async def _get(key: str, byte_range: ByteRangeRequest) -> Buffer | None: return await concurrent_map(key_ranges, _get, limit=None) async def exists(self, key: str) -> bool: + # docstring inherited return key in self._store_dict async def set(self, key: str, value: Buffer, byte_range: tuple[int, int] | None = None) -> None: + # docstring inherited self._check_writable() await self._ensure_open() assert isinstance(key, str) @@ -102,42 +126,36 @@ async def set(self, key: str, value: Buffer, byte_range: tuple[int, int] | None self._store_dict[key] = value async def set_if_not_exists(self, key: str, value: Buffer) -> None: + # docstring inherited self._check_writable() await self._ensure_open() self._store_dict.setdefault(key, value) async def delete(self, key: str) -> None: + # docstring inherited self._check_writable() try: del self._store_dict[key] except KeyError: - pass # Q(JH): why not raise? + pass async def set_partial_values(self, key_start_values: Iterable[tuple[str, int, bytes]]) -> None: + # docstring inherited raise NotImplementedError async def list(self) -> AsyncGenerator[str, None]: + # docstring inherited for key in self._store_dict: yield key async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]: + # docstring inherited for key in self._store_dict: if key.startswith(prefix): yield key.removeprefix(prefix) async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: - """ - Retrieve all keys in the store that begin with a given prefix. Keys are returned with the - common leading prefix removed. - - Parameters - ---------- - prefix : str - - Returns - ------- - AsyncGenerator[str, None] - """ + # docstring inherited if prefix.endswith("/"): prefix = prefix[:-1] @@ -212,6 +230,7 @@ def from_dict(cls, store_dict: MutableMapping[str, Buffer]) -> Self: return cls(gpu_store_dict) async def set(self, key: str, value: Buffer, byte_range: tuple[int, int] | None = None) -> None: + # docstring inherited self._check_writable() assert isinstance(key, str) if not isinstance(value, Buffer): diff --git a/src/zarr/storage/remote.py b/src/zarr/storage/remote.py index 1adc7ee419..9782a98772 100644 --- a/src/zarr/storage/remote.py +++ b/src/zarr/storage/remote.py @@ -25,6 +25,30 @@ class RemoteStore(Store): + """ + A remote Store based on FSSpec + + Parameters + ---------- + fs : AsyncFileSystem + The Async FSSpec filesystem to use with this store. + mode : AccessModeLiteral + The access mode to use. + path : str + The root path of the store. + allowed_exceptions : tuple[type[Exception], ...] + When fetching data, these cases will be deemed to correspond to missing keys. + + Attributes + ---------- + fs + allowed_exceptions + supports_writes + supports_deletes + supports_partial_writes + supports_listing + """ + # based on FSSpec supports_writes: bool = True supports_deletes: bool = True @@ -41,17 +65,6 @@ def __init__( path: str = "/", allowed_exceptions: tuple[type[Exception], ...] = ALLOWED_EXCEPTIONS, ) -> None: - """ - Parameters - ---------- - url: root of the datastore. In fsspec notation, this is usually like "protocol://path/to". - Can also be a upath.UPath instance/ - allowed_exceptions: when fetching data, these cases will be deemed to correspond to missing - keys, rather than some other IO failure - storage_options: passed on to fsspec to make the filesystem instance. If url is a UPath, - this must not be used. - - """ super().__init__(mode=mode) self.fs = fs self.path = path @@ -67,6 +80,23 @@ def from_upath( mode: AccessModeLiteral = "r", allowed_exceptions: tuple[type[Exception], ...] = ALLOWED_EXCEPTIONS, ) -> RemoteStore: + """ + Create a RemoteStore from an upath object. + + Parameters + ---------- + upath : UPath + The upath to the root of the store. + mode : str, optional + The mode of the store. Defaults to "r". + allowed_exceptions : tuple, optional + The exceptions that are allowed to be raised when accessing the + store. Defaults to ALLOWED_EXCEPTIONS. + + Returns + ------- + RemoteStore + """ return cls( fs=upath.fs, path=upath.path.rstrip("/"), @@ -82,10 +112,30 @@ def from_url( mode: AccessModeLiteral = "r", allowed_exceptions: tuple[type[Exception], ...] = ALLOWED_EXCEPTIONS, ) -> RemoteStore: + """ + Create a RemoteStore from a URL. + + Parameters + ---------- + url : str + The URL to the root of the store. + storage_options : dict, optional + The options to pass to fsspec when creating the filesystem. + mode : str, optional + The mode of the store. Defaults to "r". + allowed_exceptions : tuple, optional + The exceptions that are allowed to be raised when accessing the + store. Defaults to ALLOWED_EXCEPTIONS. + + Returns + ------- + RemoteStore + """ fs, path = fsspec.url_to_fs(url, **storage_options) return cls(fs=fs, path=path, mode=mode, allowed_exceptions=allowed_exceptions) async def clear(self) -> None: + # docstring inherited try: for subpath in await self.fs._find(self.path, withdirs=True): if subpath != self.path: @@ -94,11 +144,14 @@ async def clear(self) -> None: pass async def empty(self) -> bool: + # docstring inherited + # TODO: it would be nice if we didn't have to list all keys here # it should be possible to stop after the first key is discovered return not await self.fs._ls(self.path) def with_mode(self, mode: AccessModeLiteral) -> Self: + # docstring inherited return type(self)( fs=self.fs, mode=mode, @@ -123,6 +176,7 @@ async def get( prototype: BufferPrototype, byte_range: ByteRangeRequest | None = None, ) -> Buffer | None: + # docstring inherited if not self._is_open: await self._open() path = _dereference_path(self.path, key) @@ -161,6 +215,7 @@ async def set( value: Buffer, byte_range: tuple[int, int] | None = None, ) -> None: + # docstring inherited if not self._is_open: await self._open() self._check_writable() @@ -171,6 +226,7 @@ async def set( await self.fs._pipe_file(path, value.to_bytes()) async def delete(self, key: str) -> None: + # docstring inherited self._check_writable() path = _dereference_path(self.path, key) try: @@ -181,6 +237,7 @@ async def delete(self, key: str) -> None: pass async def exists(self, key: str) -> bool: + # docstring inherited path = _dereference_path(self.path, key) exists: bool = await self.fs._exists(path) return exists @@ -190,6 +247,7 @@ async def get_partial_values( prototype: BufferPrototype, key_ranges: Iterable[tuple[str, ByteRangeRequest]], ) -> list[Buffer | None]: + # docstring inherited if key_ranges: paths, starts, stops = zip( *( @@ -217,14 +275,17 @@ async def get_partial_values( async def set_partial_values( self, key_start_values: Iterable[tuple[str, int, BytesLike]] ) -> None: + # docstring inherited raise NotImplementedError async def list(self) -> AsyncGenerator[str, None]: + # docstring inherited allfiles = await self.fs._find(self.path, detail=False, withdirs=False) for onefile in (a.replace(self.path + "/", "") for a in allfiles): yield onefile async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: + # docstring inherited prefix = f"{self.path}/{prefix.rstrip('/')}" try: allfiles = await self.fs._ls(prefix, detail=False) @@ -234,19 +295,7 @@ async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: yield onefile.removeprefix(self.path).removeprefix("/") async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]: - """ - Retrieve all keys in the store that begin with a given prefix. Keys are returned with the - common leading prefix removed. - - Parameters - ---------- - prefix : str - - Returns - ------- - AsyncGenerator[str, None] - """ - + # docstring inherited find_str = f"{self.path}/{prefix}" for onefile in await self.fs._find(find_str, detail=False, maxdepth=None, withdirs=False): yield onefile.removeprefix(find_str) diff --git a/src/zarr/storage/zip.py b/src/zarr/storage/zip.py index 49e5903a9f..c9cb579586 100644 --- a/src/zarr/storage/zip.py +++ b/src/zarr/storage/zip.py @@ -35,6 +35,17 @@ class ZipStore(Store): extensions when the zipfile is larger than 2 GiB. If False will raise an exception when the ZIP file would require ZIP64 extensions. + + Attributes + ---------- + allowed_exceptions + supports_writes + supports_deletes + supports_partial_writes + supports_listing + path + compression + allowZip64 """ supports_writes: bool = True @@ -95,11 +106,13 @@ def __setstate__(self, state: Any) -> None: self._sync_open() def close(self) -> None: + # docstring inherited super().close() with self._lock: self._zf.close() async def clear(self) -> None: + # docstring inherited with self._lock: self._check_writable() self._zf.close() @@ -109,10 +122,12 @@ async def clear(self) -> None: ) async def empty(self) -> bool: + # docstring inherited with self._lock: return not self._zf.namelist() def with_mode(self, mode: ZipStoreAccessModeLiteral) -> Self: # type: ignore[override] + # docstring inherited raise NotImplementedError("ZipStore cannot be reopened with a new mode.") def __str__(self) -> str: @@ -130,6 +145,7 @@ def _get( prototype: BufferPrototype, byte_range: ByteRangeRequest | None = None, ) -> Buffer | None: + # docstring inherited try: with self._zf.open(key) as f: # will raise KeyError if byte_range is None: @@ -153,6 +169,7 @@ async def get( prototype: BufferPrototype, byte_range: ByteRangeRequest | None = None, ) -> Buffer | None: + # docstring inherited assert isinstance(key, str) with self._lock: @@ -163,6 +180,7 @@ async def get_partial_values( prototype: BufferPrototype, key_ranges: Iterable[tuple[str, ByteRangeRequest]], ) -> list[Buffer | None]: + # docstring inherited out = [] with self._lock: for key, byte_range in key_ranges: @@ -181,6 +199,7 @@ def _set(self, key: str, value: Buffer) -> None: self._zf.writestr(keyinfo, value.to_bytes()) async def set(self, key: str, value: Buffer) -> None: + # docstring inherited self._check_writable() assert isinstance(key, str) if not isinstance(value, Buffer): @@ -199,9 +218,11 @@ async def set_if_not_exists(self, key: str, value: Buffer) -> None: self._set(key, value) async def delete(self, key: str) -> None: + # docstring inherited raise NotImplementedError async def exists(self, key: str) -> bool: + # docstring inherited with self._lock: try: self._zf.getinfo(key) @@ -211,28 +232,19 @@ async def exists(self, key: str) -> bool: return True async def list(self) -> AsyncGenerator[str, None]: + # docstring inherited with self._lock: for key in self._zf.namelist(): yield key async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]: - """ - Retrieve all keys in the store that begin with a given prefix. Keys are returned with the - common leading prefix removed. - - Parameters - ---------- - prefix : str - - Returns - ------- - AsyncGenerator[str, None] - """ + # docstring inherited async for key in self.list(): if key.startswith(prefix): yield key.removeprefix(prefix) async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: + # docstring inherited if prefix.endswith("/"): prefix = prefix[:-1]