Skip to content
Prev Previous commit
Next Next commit
feat: use tell() instead of __len__
  • Loading branch information
felixscherz committed Aug 5, 2024
commit 70c37efde1dddcbfd85698f5f14b05bac8a9d1ab
4 changes: 2 additions & 2 deletions pyiceberg/avro/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ def write_block(self, objects: List[D]) -> None:
self.encoder.write(self.sync_bytes)

def __len__(self) -> int:
"""Returns the total number number of bytes written."""
"""Return the total number number of bytes written."""
if self.closed:
return len(self.output_file)
return len(self.output_stream)
return self.output_stream.tell()
4 changes: 2 additions & 2 deletions pyiceberg/io/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,8 +137,8 @@ def __exit__(
"""Perform cleanup when exiting the scope of a 'with' statement."""

@abstractmethod
def __len__(self) -> int:
"""Returns the total number number of bytes written to the stream."""
def tell(self) -> int:
"""Return the total number number of bytes written to the stream."""


class InputFile(ABC):
Expand Down
5 changes: 2 additions & 3 deletions pyiceberg/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -887,7 +887,7 @@ def existing(self, entry: ManifestEntry) -> ManifestWriter:


def __len__(self) -> int:
"""Returns the total number number of bytes written."""
"""Return the total number number of bytes written."""
return len(self._writer)


Expand Down Expand Up @@ -938,8 +938,7 @@ def _should_roll_to_new_file(self) -> bool:
if not self._current_writer:
return False
return (
self._current_file_rows >= self._target_number_of_rows
or len(self._current_writer) >= self._target_file_size_in_bytes
self._current_file_rows >= self._target_number_of_rows or len(self._current_writer) >= self._target_file_size_in_bytes
)

def _close_current_writer(self):
Expand Down
7 changes: 0 additions & 7 deletions tests/utils/test_manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -533,13 +533,6 @@ def test_rolling_manifest_writer(
spec_id=demo_manifest_file.partition_spec_id,
)

# The tests are using `PyArrowFileIO` where `OutputStream` is implemented as `pyarrow.lib.BufferedOutputStream`
# this is just to show the tests passing if `pyarrow.lib.BufferedOutputStream` would implement the
# new `OutputStream` protocol that includes a `__len__` method
from pyiceberg.avro.file import AvroOutputFile
AvroOutputFile.__len__ = lambda self: self.output_stream.tell()


with TemporaryDirectory() as tmpdir:

def supplier() -> Generator[ManifestWriter, None, None]:
Expand Down