-
Notifications
You must be signed in to change notification settings - Fork 411
Write Deletion Vectors #2822
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Write Deletion Vectors #2822
Changes from 1 commit
9a4b91d
4db1734
71dd925
3efd28e
228263c
36bb37f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Loading branch information
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,7 +17,8 @@ | |
| import io | ||
| import math | ||
| import zlib | ||
| from typing import TYPE_CHECKING, Dict, Iterable, List, Literal, Optional | ||
| from collections.abc import Iterable | ||
| from typing import TYPE_CHECKING, Literal | ||
|
|
||
| from pydantic import Field | ||
| from pyroaring import BitMap, FrozenBitMap | ||
|
|
@@ -65,9 +66,9 @@ def _deserialize_bitmap(pl: bytes) -> list[BitMap]: | |
| return bitmaps | ||
|
|
||
|
|
||
| def _serialize_bitmaps(bitmaps: Dict[int, BitMap]) -> bytes: | ||
| def _serialize_bitmaps(bitmaps: dict[int, BitMap]) -> bytes: | ||
| """ | ||
| Serializes a dictionary of bitmaps into a byte array. | ||
| Serialize a dictionary of bitmaps into a byte array. | ||
|
|
||
| The format is: | ||
| - 8 bytes: number of bitmaps (little-endian) | ||
|
|
@@ -149,8 +150,8 @@ def to_vector(self) -> dict[str, "pa.ChunkedArray"]: | |
|
|
||
|
|
||
| class PuffinWriter: | ||
| _blobs: List[PuffinBlobMetadata] | ||
| _blob_payloads: List[bytes] | ||
| _blobs: list[PuffinBlobMetadata] | ||
| _blob_payloads: list[bytes] | ||
|
|
||
| def __init__(self) -> None: | ||
| self._blobs = [] | ||
|
|
@@ -162,7 +163,7 @@ def add( | |
| referenced_data_file: str, | ||
| ) -> None: | ||
| # 1. Create bitmaps from positions | ||
| bitmaps: Dict[int, BitMap] = {} | ||
| bitmaps: dict[int, BitMap] = {} | ||
| cardinality = 0 | ||
| for pos in positions: | ||
| cardinality += 1 | ||
|
||
|
|
@@ -219,7 +220,7 @@ def finish(self) -> bytes: | |
| for blob_payload in self._blob_payloads: | ||
| payload_buffer.write(blob_payload) | ||
|
|
||
| updated_blobs_metadata: List[PuffinBlobMetadata] = [] | ||
| updated_blobs_metadata: list[PuffinBlobMetadata] = [] | ||
| current_offset = 4 # Start after file magic (4 bytes) | ||
| for i, blob_payload in enumerate(self._blob_payloads): | ||
| original_metadata_dict = self._blobs[i].model_dump(by_alias=True, exclude_none=True) | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.