Fix the CI

apache · Fokko · Apr 4, 2024 · Mar 27, 2024 · Mar 28, 2024 · Apr 3, 2024
commit cf65c2d6601012dda178daa0b5b047f73bb411b3
diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py
@@ -3291,7 +3291,8 @@ def _readable_metrics_struct(bound_type: PrimitiveType) -> pa.StructType:
                 pa.field(schema.find_column_name(field.field_id), _readable_metrics_struct(field.field_type), nullable=False)
             )
 
-        pa_record_struct = schema_to_pyarrow(self.tbl.metadata.specs_struct())
+        partition_record = self.tbl.metadata.specs_struct()
+        pa_record_struct = schema_to_pyarrow(partition_record)
 
         entries_schema = pa.schema([
             pa.field('status', pa.int8(), nullable=False),
@@ -3325,8 +3326,8 @@ def _readable_metrics_struct(bound_type: PrimitiveType) -> pa.StructType:
 
         entries = []
         if snapshot := self.tbl.metadata.current_snapshot():
-            for manifests in snapshot.manifests(self.tbl.io):
-                for entry in manifests.fetch_manifest_entry(io=self.tbl.io):
+            for manifest in snapshot.manifests(self.tbl.io):
+                for entry in manifest.fetch_manifest_entry(io=self.tbl.io):
                     column_sizes = entry.data_file.column_sizes or {}
                     value_counts = entry.data_file.value_counts or {}
                     null_value_counts = entry.data_file.null_value_counts or {}
@@ -3350,12 +3351,36 @@ def _readable_metrics_struct(bound_type: PrimitiveType) -> pa.StructType:
                         for field in self.tbl.metadata.schema().fields
                     }
 
+                    partition = entry.data_file.partition
+                    partition_record_dict = {
+                        field.name: partition[pos]
+                        for pos, field in enumerate(self.tbl.metadata.specs()[manifest.partition_spec_id].fields)
+                    }
+
                     entries.append({
                         'status': entry.status.value,
                         'snapshot_id': entry.snapshot_id,
                         'sequence_number': entry.data_sequence_number,
                         'file_sequence_number': entry.file_sequence_number,
-                        'data_file': entry.data_file.__dict__,
+                        'data_file': {
+                            "content": entry.data_file.content,
+                            "file_path": entry.data_file.file_path,
+                            "file_format": entry.data_file.file_format,
+                            "partition": partition_record_dict,
+                            "record_count": entry.data_file.record_count,
+                            "file_size_in_bytes": entry.data_file.file_size_in_bytes,
+                            "column_sizes": dict(entry.data_file.column_sizes),
+                            "value_counts": dict(entry.data_file.value_counts),
+                            "null_value_counts": dict(entry.data_file.null_value_counts),
+                            "nan_value_counts": entry.data_file.nan_value_counts,
+                            "lower_bounds": entry.data_file.lower_bounds,
+                            "upper_bounds": entry.data_file.upper_bounds,
+                            "key_metadata": entry.data_file.key_metadata,
+                            "split_offsets": entry.data_file.split_offsets,
+                            "equality_ids": entry.data_file.equality_ids,
+                            "sort_order_id": entry.data_file.sort_order_id,
+                            "spec_id": entry.data_file.spec_id,
+                        },
                         'readable_metrics': readable_metrics,
                     })
 

diff --git a/pyiceberg/utils/lazydict.py b/pyiceberg/utils/lazydict.py
@@ -66,3 +66,7 @@ def __len__(self) -> int:
         """Return the number of items in the dictionary."""
         source = self._dict or self._build_dict()
         return len(source)
+
+    def __dict__(self) -> Dict[K, V]:  # type: ignore
+        """Convert the lazy dict in a dict."""
+        return self._dict or self._build_dict()