Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Rename data_sequence_number to sequence_number in ManifestEntry
  • Loading branch information
soumya-ghosh committed Jul 10, 2024
commit aa5f82c38d48d1c79ae0d17fe90a68a971e88ece
18 changes: 9 additions & 9 deletions pyiceberg/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,7 @@ def __eq__(self, other: Any) -> bool:
2: Schema(
NestedField(0, "status", IntegerType(), required=True),
NestedField(1, "snapshot_id", LongType(), required=False),
NestedField(3, "data_sequence_number", LongType(), required=False),
NestedField(3, "sequence_number", LongType(), required=False),
NestedField(4, "file_sequence_number", LongType(), required=False),
NestedField(2, "data_file", DATA_FILE_TYPE[2], required=True),
),
Expand All @@ -394,10 +394,10 @@ def manifest_entry_schema_with_data_file(format_version: TableVersion, data_file


class ManifestEntry(Record):
__slots__ = ("status", "snapshot_id", "data_sequence_number", "file_sequence_number", "data_file")
__slots__ = ("status", "snapshot_id", "sequence_number", "file_sequence_number", "data_file")
status: ManifestEntryStatus
snapshot_id: Optional[int]
data_sequence_number: Optional[int]
sequence_number: Optional[int]
file_sequence_number: Optional[int]
data_file: DataFile

Expand Down Expand Up @@ -667,8 +667,8 @@ def _inherit_from_manifest(entry: ManifestEntry, manifest: ManifestFile) -> Mani

# in v1 tables, the data sequence number is not persisted and can be safely defaulted to 0
# in v2 tables, the data sequence number should be inherited iff the entry status is ADDED
if entry.data_sequence_number is None and (manifest.sequence_number == 0 or entry.status == ManifestEntryStatus.ADDED):
entry.data_sequence_number = manifest.sequence_number
if entry.sequence_number is None and (manifest.sequence_number == 0 or entry.status == ManifestEntryStatus.ADDED):
entry.sequence_number = manifest.sequence_number

# in v1 tables, the file sequence number is not persisted and can be safely defaulted to 0
# in v2 tables, the file sequence number should be inherited iff the entry status is ADDED
Expand Down Expand Up @@ -812,10 +812,10 @@ def add_entry(self, entry: ManifestEntry) -> ManifestWriter:

if (
(entry.status == ManifestEntryStatus.ADDED or entry.status == ManifestEntryStatus.EXISTING)
and entry.data_sequence_number is not None
and (self._min_data_sequence_number is None or entry.data_sequence_number < self._min_data_sequence_number)
and entry.sequence_number is not None
and (self._min_data_sequence_number is None or entry.sequence_number < self._min_data_sequence_number)
):
self._min_data_sequence_number = entry.data_sequence_number
self._min_data_sequence_number = entry.sequence_number

self._writer.write_block([self.prepare_entry(entry)])
return self
Expand Down Expand Up @@ -885,7 +885,7 @@ def _meta(self) -> Dict[str, str]:
}

def prepare_entry(self, entry: ManifestEntry) -> ManifestEntry:
if entry.data_sequence_number is None:
if entry.sequence_number is None:
if entry.snapshot_id is not None and entry.snapshot_id != self._snapshot_id:
raise ValueError(f"Found unassigned sequence number for an entry from snapshot: {entry.snapshot_id}")
if entry.status != ManifestEntryStatus.ADDED:
Expand Down
8 changes: 4 additions & 4 deletions pyiceberg/table/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1997,7 +1997,7 @@ def plan_files(self) -> Iterable[FileScanTask]:
min_data_sequence_number = _min_data_file_sequence_number(manifests)

data_entries: List[ManifestEntry] = []
positional_delete_entries = SortedList(key=lambda entry: entry.data_sequence_number or INITIAL_SEQUENCE_NUMBER)
positional_delete_entries = SortedList(key=lambda entry: entry.sequence_number or INITIAL_SEQUENCE_NUMBER)

executor = ExecutorFactory.get_or_create()
for manifest_entry in chain(
Expand Down Expand Up @@ -3150,7 +3150,7 @@ def _write_added_manifest() -> List[ManifestFile]:
ManifestEntry(
status=ManifestEntryStatus.ADDED,
snapshot_id=self._snapshot_id,
data_sequence_number=None,
sequence_number=None,
file_sequence_number=None,
data_file=data_file,
)
Expand Down Expand Up @@ -3568,7 +3568,7 @@ def _get_entries(manifest: ManifestFile) -> List[ManifestEntry]:
ManifestEntry(
status=ManifestEntryStatus.DELETED,
snapshot_id=entry.snapshot_id,
data_sequence_number=entry.data_sequence_number,
sequence_number=entry.sequence_number,
file_sequence_number=entry.file_sequence_number,
data_file=entry.data_file,
)
Expand Down Expand Up @@ -4016,7 +4016,7 @@ def _readable_metrics_struct(bound_type: PrimitiveType) -> pa.StructType:
entries.append({
"status": entry.status.value,
"snapshot_id": entry.snapshot_id,
"sequence_number": entry.data_sequence_number,
"sequence_number": entry.sequence_number,
"file_sequence_number": entry.file_sequence_number,
"data_file": {
"content": entry.data_file.content,
Expand Down
10 changes: 5 additions & 5 deletions tests/avro/test_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ def test_write_manifest_entry_with_iceberg_read_with_fastavro_v1() -> None:
entry = ManifestEntry(
status=ManifestEntryStatus.ADDED,
snapshot_id=8638475580105682862,
data_sequence_number=0,
sequence_number=0,
file_sequence_number=0,
data_file=data_file,
)
Expand Down Expand Up @@ -173,7 +173,7 @@ def test_write_manifest_entry_with_iceberg_read_with_fastavro_v1() -> None:
v2_entry = todict(entry)

# These are not written in V1
del v2_entry["data_sequence_number"]
del v2_entry["sequence_number"]
del v2_entry["file_sequence_number"]
del v2_entry["data_file"]["content"]
del v2_entry["data_file"]["equality_ids"]
Expand Down Expand Up @@ -206,7 +206,7 @@ def test_write_manifest_entry_with_iceberg_read_with_fastavro_v2() -> None:
entry = ManifestEntry(
status=ManifestEntryStatus.ADDED,
snapshot_id=8638475580105682862,
data_sequence_number=0,
sequence_number=0,
file_sequence_number=0,
data_file=data_file,
)
Expand Down Expand Up @@ -263,7 +263,7 @@ def test_write_manifest_entry_with_fastavro_read_with_iceberg(format_version: in
entry = ManifestEntry(
status=ManifestEntryStatus.ADDED,
snapshot_id=8638475580105682862,
data_sequence_number=0,
sequence_number=0,
file_sequence_number=0,
data_file=data_file,
)
Expand Down Expand Up @@ -305,7 +305,7 @@ def test_write_manifest_entry_with_fastavro_read_with_iceberg(format_version: in
status=ManifestEntryStatus.ADDED,
snapshot_id=8638475580105682862,
# Not part of v1
data_sequence_number=None,
sequence_number=None,
file_sequence_number=None,
data_file=v1_datafile,
)
Expand Down
10 changes: 5 additions & 5 deletions tests/utils/test_manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def test_read_manifest_entry(generated_manifest_entry_file: str) -> None:

assert manifest_entry.status == ManifestEntryStatus.ADDED
assert manifest_entry.snapshot_id == 8744736658442914487
assert manifest_entry.data_sequence_number == 0
assert manifest_entry.sequence_number == 0
assert isinstance(manifest_entry.data_file, DataFile)

data_file = manifest_entry.data_file
Expand Down Expand Up @@ -250,7 +250,7 @@ def test_read_manifest_v1(generated_manifest_file_file_v1: str) -> None:

entry = entries[0]

assert entry.data_sequence_number == 0
assert entry.sequence_number == 0
assert entry.file_sequence_number == 0
assert entry.snapshot_id == 8744736658442914487
assert entry.status == ManifestEntryStatus.ADDED
Expand Down Expand Up @@ -300,7 +300,7 @@ def test_read_manifest_v2(generated_manifest_file_file_v2: str) -> None:

entry = entries[0]

assert entry.data_sequence_number == 3
assert entry.sequence_number == 3
assert entry.file_sequence_number == 3
assert entry.snapshot_id == 8744736658442914487
assert entry.status == ManifestEntryStatus.ADDED
Expand Down Expand Up @@ -379,7 +379,7 @@ def test_write_manifest(

assert manifest_entry.status == ManifestEntryStatus.ADDED
assert manifest_entry.snapshot_id == 8744736658442914487
assert manifest_entry.data_sequence_number == -1 if format_version == 1 else 3
assert manifest_entry.sequence_number == -1 if format_version == 1 else 3
assert isinstance(manifest_entry.data_file, DataFile)

data_file = manifest_entry.data_file
Expand Down Expand Up @@ -556,7 +556,7 @@ def test_write_manifest_list(

entry = entries[0]

assert entry.data_sequence_number == 0 if format_version == 1 else 3
assert entry.sequence_number == 0 if format_version == 1 else 3
assert entry.file_sequence_number == 0 if format_version == 1 else 3
assert entry.snapshot_id == 8744736658442914487
assert entry.status == ManifestEntryStatus.ADDED