diff --git a/pyiceberg/manifest.py b/pyiceberg/manifest.py index bf5749ce9b..4fd82fec1a 100644 --- a/pyiceberg/manifest.py +++ b/pyiceberg/manifest.py @@ -685,6 +685,10 @@ def __exit__( traceback: Optional[TracebackType], ) -> None: """Close the writer.""" + if (self._added_files + self._existing_files + self._deleted_files) == 0: + # This is just a guard to ensure that we don't write empty manifest files + raise ValueError("An empty manifest file has been written") + self.closed = True self._writer.__exit__(exc_type, exc_value, traceback) @@ -757,6 +761,8 @@ def add_entry(self, entry: ManifestEntry) -> ManifestWriter: elif entry.status == ManifestEntryStatus.DELETED: self._deleted_files += 1 self._deleted_rows += entry.data_file.record_count + else: + raise ValueError(f"Unknown entry: {entry.status}") self._partitions.append(entry.data_file.partition) diff --git a/tests/utils/test_manifest.py b/tests/utils/test_manifest.py index a812b384fc..ecb99e281e 100644 --- a/tests/utils/test_manifest.py +++ b/tests/utils/test_manifest.py @@ -35,7 +35,7 @@ write_manifest, write_manifest_list, ) -from pyiceberg.partitioning import PartitionField, PartitionSpec +from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC, PartitionField, PartitionSpec from pyiceberg.schema import Schema from pyiceberg.table.snapshots import Operation, Snapshot, Summary from pyiceberg.transforms import IdentityTransform @@ -306,6 +306,23 @@ def test_read_manifest_v2(generated_manifest_file_file_v2: str) -> None: assert entry.status == ManifestEntryStatus.ADDED +def test_write_empty_manifest() -> None: + io = load_file_io() + test_schema = Schema(NestedField(1, "foo", IntegerType(), False)) + with TemporaryDirectory() as tmpdir: + tmp_avro_file = tmpdir + "/test_write_manifest.avro" + + with pytest.raises(ValueError, match="An empty manifest file has been written"): + with write_manifest( + format_version=1, + spec=UNPARTITIONED_PARTITION_SPEC, + schema=test_schema, + output_file=io.new_output(tmp_avro_file), + snapshot_id=8744736658442914487, + ) as _: + pass + + @pytest.mark.parametrize("format_version", [1, 2]) def test_write_manifest( generated_manifest_file_file_v1: str, generated_manifest_file_file_v2: str, format_version: TableVersion