Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Fix the CI
  • Loading branch information
Fokko committed Apr 4, 2024
commit cf65c2d6601012dda178daa0b5b047f73bb411b3
33 changes: 29 additions & 4 deletions pyiceberg/table/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3291,7 +3291,8 @@ def _readable_metrics_struct(bound_type: PrimitiveType) -> pa.StructType:
pa.field(schema.find_column_name(field.field_id), _readable_metrics_struct(field.field_type), nullable=False)
)

pa_record_struct = schema_to_pyarrow(self.tbl.metadata.specs_struct())
partition_record = self.tbl.metadata.specs_struct()
pa_record_struct = schema_to_pyarrow(partition_record)

entries_schema = pa.schema([
pa.field('status', pa.int8(), nullable=False),
Expand Down Expand Up @@ -3325,8 +3326,8 @@ def _readable_metrics_struct(bound_type: PrimitiveType) -> pa.StructType:

entries = []
if snapshot := self.tbl.metadata.current_snapshot():
for manifests in snapshot.manifests(self.tbl.io):
for entry in manifests.fetch_manifest_entry(io=self.tbl.io):
for manifest in snapshot.manifests(self.tbl.io):
for entry in manifest.fetch_manifest_entry(io=self.tbl.io):
column_sizes = entry.data_file.column_sizes or {}
value_counts = entry.data_file.value_counts or {}
null_value_counts = entry.data_file.null_value_counts or {}
Expand All @@ -3350,12 +3351,36 @@ def _readable_metrics_struct(bound_type: PrimitiveType) -> pa.StructType:
for field in self.tbl.metadata.schema().fields
}

partition = entry.data_file.partition
partition_record_dict = {
field.name: partition[pos]
for pos, field in enumerate(self.tbl.metadata.specs()[manifest.partition_spec_id].fields)
}

entries.append({
'status': entry.status.value,
'snapshot_id': entry.snapshot_id,
'sequence_number': entry.data_sequence_number,
'file_sequence_number': entry.file_sequence_number,
'data_file': entry.data_file.__dict__,
'data_file': {
"content": entry.data_file.content,
"file_path": entry.data_file.file_path,
"file_format": entry.data_file.file_format,
"partition": partition_record_dict,
"record_count": entry.data_file.record_count,
"file_size_in_bytes": entry.data_file.file_size_in_bytes,
"column_sizes": dict(entry.data_file.column_sizes),
"value_counts": dict(entry.data_file.value_counts),
"null_value_counts": dict(entry.data_file.null_value_counts),
"nan_value_counts": entry.data_file.nan_value_counts,
"lower_bounds": entry.data_file.lower_bounds,
"upper_bounds": entry.data_file.upper_bounds,
"key_metadata": entry.data_file.key_metadata,
"split_offsets": entry.data_file.split_offsets,
"equality_ids": entry.data_file.equality_ids,
"sort_order_id": entry.data_file.sort_order_id,
"spec_id": entry.data_file.spec_id,
},
'readable_metrics': readable_metrics,
})

Expand Down
4 changes: 4 additions & 0 deletions pyiceberg/utils/lazydict.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,3 +66,7 @@ def __len__(self) -> int:
"""Return the number of items in the dictionary."""
source = self._dict or self._build_dict()
return len(source)

def __dict__(self) -> Dict[K, V]: # type: ignore
"""Convert the lazy dict in a dict."""
return self._dict or self._build_dict()