Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
save
  • Loading branch information
jqin61 committed Apr 5, 2024
commit e320b73f9548743d9d9c24b6a6e3155011e9d3a9
16 changes: 16 additions & 0 deletions pyiceberg/expressions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,10 @@ def __repr__(self) -> str:
@abstractmethod
def as_bound(self) -> Type[BoundUnaryPredicate[Any]]: ...

def __hash__(self) -> int:
"""Return hash value of the UnaryPredicate class."""
return hash(str(self))


class BoundUnaryPredicate(BoundPredicate[L], ABC):
def __repr__(self) -> str:
Expand Down Expand Up @@ -412,6 +416,10 @@ def __invert__(self) -> BoundNotNull[L]:
def as_unbound(self) -> Type[IsNull]:
return IsNull

def __hash__(self) -> int:
"""Return hash value of the BoundIsNull class."""
return hash(str(self))


class BoundNotNull(BoundUnaryPredicate[L]):
def __new__(cls, term: BoundTerm[L]): # type: ignore # pylint: disable=W0221
Expand Down Expand Up @@ -698,6 +706,10 @@ def __repr__(self) -> str:
@abstractmethod
def as_bound(self) -> Type[BoundLiteralPredicate[L]]: ...

def __hash__(self) -> int:
"""Return hash value of the LiteralPredicate class."""
return hash(str(self))


class BoundLiteralPredicate(BoundPredicate[L], ABC):
literal: Literal[L]
Expand Down Expand Up @@ -731,6 +743,10 @@ def __invert__(self) -> BoundNotEqualTo[L]:
def as_unbound(self) -> Type[EqualTo[L]]:
return EqualTo

def __hash__(self) -> int:
"""Return hash value of the BoundEqualTo class."""
return hash(str(self))


class BoundNotEqualTo(BoundLiteralPredicate[L]):
def __invert__(self) -> BoundEqualTo[L]:
Expand Down
4 changes: 4 additions & 0 deletions pyiceberg/io/pyarrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -1777,6 +1777,10 @@ def write_parquet(task: WriteTask) -> DataFile:
with fo.create(overwrite=True) as fos:
with pq.ParquetWriter(fos, schema=arrow_file_schema, **parquet_writer_kwargs) as writer:
writer.write(pa.Table.from_batches(task.record_batches), row_group_size=row_group_size)
arrow_table = pa.Table.from_batches(task.record_batches)
# align the columns accordingly in case input arrow table has columns in order different from iceberg table
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you provide an example of when this would happen? This only handles top-level columns.

df_to_write = arrow_table.select(arrow_file_schema.names)
writer.write_table(df_to_write, row_group_size=row_group_size)

statistics = data_file_statistics_from_parquet_metadata(
parquet_metadata=writer.writer.metadata,
Expand Down