Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
add ListPacker + tests
  • Loading branch information
HonahX committed Jun 3, 2024
commit 6803ebad1caca83da0898a11920c3c7366da8953
26 changes: 26 additions & 0 deletions pyiceberg/utils/bin_packing.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,3 +104,29 @@ def remove_bin(self) -> Bin[T]:
return bin_
else:
return self.bins.pop(0)


class ListPacker(Generic[T]):
_target_weight: int
_lookback: int
_largest_bin_first: bool

def __init__(self, target_weight: int, lookback: int, largest_bin_first: bool) -> None:
self._target_weight = target_weight
self._lookback = lookback
self._largest_bin_first = largest_bin_first

def pack(self, items: List[T], weight_func: Callable[[T], int]) -> List[List[T]]:
return list(
PackingIterator(
items=items,
target_weight=self._target_weight,
lookback=self._lookback,
weight_func=weight_func,
largest_bin_first=self._largest_bin_first,
)
)

def pack_end(self, items: List[T], weight_func: Callable[[T], int]) -> List[List[T]]:
packed = self.pack(items=list(reversed(items)), weight_func=weight_func)
return [list(reversed(bin_items)) for bin_items in reversed(packed)]
46 changes: 45 additions & 1 deletion tests/utils/test_bin_packing.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@

import pytest

from pyiceberg.utils.bin_packing import PackingIterator
from pyiceberg.utils.bin_packing import ListPacker, PackingIterator

INT_MAX = 2147483647


@pytest.mark.parametrize(
Expand Down Expand Up @@ -83,4 +85,46 @@ def test_bin_packing_lookback(
def weight_func(x: int) -> int:
return x

packer: ListPacker[int] = ListPacker(target_weight, lookback, largest_bin_first)

assert list(PackingIterator(splits, target_weight, lookback, weight_func, largest_bin_first)) == expected_lists
assert list(packer.pack(splits, weight_func)) == expected_lists


@pytest.mark.parametrize(
"splits, target_weight, lookback, largest_bin_first, expected_lists",
[
# Single Lookback Tests
([1, 2, 3, 4, 5], 3, 1, False, [[1, 2], [3], [4], [5]]),
([1, 2, 3, 4, 5], 4, 1, False, [[1, 2], [3], [4], [5]]),
([1, 2, 3, 4, 5], 5, 1, False, [[1], [2, 3], [4], [5]]),
([1, 2, 3, 4, 5], 6, 1, False, [[1, 2, 3], [4], [5]]),
([1, 2, 3, 4, 5], 7, 1, False, [[1, 2], [3, 4], [5]]),
([1, 2, 3, 4, 5], 8, 1, False, [[1, 2], [3, 4], [5]]),
([1, 2, 3, 4, 5], 9, 1, False, [[1, 2, 3], [4, 5]]),
([1, 2, 3, 4, 5], 11, 1, False, [[1, 2, 3], [4, 5]]),
([1, 2, 3, 4, 5], 12, 1, False, [[1, 2], [3, 4, 5]]),
([1, 2, 3, 4, 5], 14, 1, False, [[1], [2, 3, 4, 5]]),
([1, 2, 3, 4, 5], 15, 1, False, [[1, 2, 3, 4, 5]]),
# Unlimited Lookback Tests
([1, 2, 3, 4, 5], 3, INT_MAX, False, [[1, 2], [3], [4], [5]]),
([1, 2, 3, 4, 5], 4, INT_MAX, False, [[2], [1, 3], [4], [5]]),
([1, 2, 3, 4, 5], 5, INT_MAX, False, [[2, 3], [1, 4], [5]]),
([1, 2, 3, 4, 5], 6, INT_MAX, False, [[3], [2, 4], [1, 5]]),
([1, 2, 3, 4, 5], 7, INT_MAX, False, [[1], [3, 4], [2, 5]]),
([1, 2, 3, 4, 5], 8, INT_MAX, False, [[1, 2, 4], [3, 5]]),
([1, 2, 3, 4, 5], 9, INT_MAX, False, [[1, 2, 3], [4, 5]]),
([1, 2, 3, 4, 5], 10, INT_MAX, False, [[2, 3], [1, 4, 5]]),
([1, 2, 3, 4, 5], 11, INT_MAX, False, [[1, 3], [2, 4, 5]]),
([1, 2, 3, 4, 5], 12, INT_MAX, False, [[1, 2], [3, 4, 5]]),
([1, 2, 3, 4, 5], 13, INT_MAX, False, [[2], [1, 3, 4, 5]]),
([1, 2, 3, 4, 5], 14, INT_MAX, False, [[1], [2, 3, 4, 5]]),
([1, 2, 3, 4, 5], 15, INT_MAX, False, [[1, 2, 3, 4, 5]]),
],
)
def test_reverse_bin_packing_lookback(
splits: List[int], target_weight: int, lookback: int, largest_bin_first: bool, expected_lists: List[List[int]]
) -> None:
packer: ListPacker[int] = ListPacker(target_weight, lookback, largest_bin_first)
result = packer.pack_end(splits, lambda x: x)
assert result == expected_lists