Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
59 commits
Select commit Hold shift + click to select a range
fccb74b
test
mattmartin14 Jan 14, 2025
7298589
unit testing
mattmartin14 Jan 14, 2025
25bc9cf
adding unit tests
mattmartin14 Jan 14, 2025
af6c868
adding unit tests
mattmartin14 Jan 14, 2025
94be807
adding unit tests
mattmartin14 Jan 14, 2025
269d9f5
adding unit tests
mattmartin14 Jan 15, 2025
f44c61a
adding unit tests
mattmartin14 Jan 15, 2025
a96fdf9
finished unit tests
mattmartin14 Jan 16, 2025
fa5ab35
removed unnecesary return
mattmartin14 Jan 16, 2025
cfa2277
updated poetry manifest list for datafusion package dependency
mattmartin14 Jan 17, 2025
35f29be
added license headers, cleaned up dead code
mattmartin14 Jan 22, 2025
6c68d0d
updated the merge function to use bools for matched and not matched rows
mattmartin14 Jan 29, 2025
2d1e8ae
incorporated changes for boolExpression. It simplified the filters a lot
mattmartin14 Jan 31, 2025
f988f25
moved the filter build function to a separate function to accomodate …
mattmartin14 Jan 31, 2025
43393b4
removed unneccessary comment
mattmartin14 Jan 31, 2025
9a561b4
removed test files
mattmartin14 Jan 31, 2025
9ef39a6
bug fixes and removed some more dependency on datafusion
mattmartin14 Feb 3, 2025
2ba1ed6
updated various items including adding a dataclass return result
mattmartin14 Feb 4, 2025
a42eecd
updated merge_rows to remove dependency from datafusion! wahoo
mattmartin14 Feb 4, 2025
1305f58
renamed merge_rows to upsert, removed unnecessary code. will put in f…
mattmartin14 Feb 5, 2025
b2be3db
adding params to unit testing for pytest; having some errors
mattmartin14 Feb 5, 2025
f5688ad
fixed bugs on unit testing; added context wrapper for txn; fixed vari…
mattmartin14 Feb 5, 2025
7d55a4e
bug fixes
mattmartin14 Feb 6, 2025
2e14767
updated some error throwing items
mattmartin14 Feb 6, 2025
85c5848
moved datafusion to just a dev dependency in poetry toml
mattmartin14 Feb 6, 2025
6472071
updated UpsertRow class to be recognized in the return statement
mattmartin14 Feb 6, 2025
51c34da
removed some spaces and streamlined assert statements in unit testing
mattmartin14 Feb 6, 2025
862a69a
updated test cases to use an InMemory catalog
mattmartin14 Feb 7, 2025
3731b86
updated some formatting; added more commentary on the rows_to_update …
mattmartin14 Feb 7, 2025
bbb35d6
rebased poetry lock file and pyproject.toml file; removed sf repo info
mattmartin14 Feb 10, 2025
c8189c9
Merge branch 'main' into main
mattmartin14 Feb 10, 2025
02af4d4
updated equality checks with not instead of == false
mattmartin14 Feb 10, 2025
cc75192
ran ruff check --fix
mattmartin14 Feb 10, 2025
998d98b
manually added lint fixes and updated poetry toml and lock files. tha…
mattmartin14 Feb 11, 2025
513c839
added formatting fices
mattmartin14 Feb 11, 2025
0fd6446
remove the node_modules
mattmartin14 Feb 11, 2025
5fc3478
updated code for another round of fixes
mattmartin14 Feb 11, 2025
6cef789
removed npm uneeded files
mattmartin14 Feb 11, 2025
40b69b8
fixed formatting on upsert function for docs build
mattmartin14 Feb 12, 2025
804c526
Merge branch 'main' into main
mattmartin14 Feb 12, 2025
09e0347
rebased for poetry lock files
mattmartin14 Feb 12, 2025
ca2d904
updated lock files. thanks kevin
mattmartin14 Feb 12, 2025
77375fb
fixed other changes
mattmartin14 Feb 12, 2025
ba4db49
fixed gitignore file
mattmartin14 Feb 12, 2025
622e66c
no whitespace
mattmartin14 Feb 12, 2025
9e79dad
fixed vendor fb file from kevins changes
mattmartin14 Feb 12, 2025
4cbf3e3
reverting vendor changes
mattmartin14 Feb 12, 2025
5333a1e
removing node modules
mattmartin14 Feb 12, 2025
11a25be
updating vendor files
mattmartin14 Feb 12, 2025
03a8d10
Update vendor/fb303/FacebookService.py
mattmartin14 Feb 12, 2025
8a2143c
updated vendor files
mattmartin14 Feb 12, 2025
e719cf8
updated vendor files
mattmartin14 Feb 12, 2025
245b4a9
attempting to update poetry files
mattmartin14 Feb 12, 2025
e3e9611
Merge branch 'main' into main
mattmartin14 Feb 12, 2025
e575b3c
restore vendor/
kevinjqliu Feb 13, 2025
e4e530f
resetore pyproject.toml
kevinjqliu Feb 13, 2025
2ff2083
poetry lock
kevinjqliu Feb 13, 2025
8585d2d
add datafusion to tool.mypy.overrides
kevinjqliu Feb 13, 2025
f673b70
Merge remote-tracking branch 'apache/main' into StateFarmIns/main
kevinjqliu Feb 13, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
updated some error throwing items
  • Loading branch information
mattmartin14 committed Feb 6, 2025
commit 2e14767e2231c134beb4a15cbb0fc7f18d04e9a9
47 changes: 23 additions & 24 deletions pyiceberg/table/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1111,11 +1111,11 @@ def upsert(self, df: pa.Table, join_cols: list
from pyiceberg.table import upsert_util

if when_matched_update_all == False and when_not_matched_insert_all == False:
raise Exception('no upsert options selected...exiting')
raise ValueError('no upsert options selected...exiting')

if upsert_util.has_duplicate_rows(df, join_cols):

raise Exception('Duplicate rows found in source dataset based on the key columns. No upsert executed')
raise ValueError('Duplicate rows found in source dataset based on the key columns. No upsert executed')

#get list of rows that exist so we don't have to load the entire target table
matched_predicate = upsert_util.create_match_filter(df, join_cols)
Expand All @@ -1124,38 +1124,37 @@ def upsert(self, df: pa.Table, join_cols: list
update_row_cnt = 0
insert_row_cnt = 0

try:


with self.transaction() as txn:
if when_matched_update_all:
#function get_rows_to_update is doing a check on non-key columns to see if any of the values have actually changed
rows_to_update = upsert_util.get_rows_to_update(df, matched_iceberg_table, join_cols)
with self.transaction() as txn:

if when_matched_update_all:

#function get_rows_to_update is doing a check on non-key columns to see if any of the values have actually changed
rows_to_update = upsert_util.get_rows_to_update(df, matched_iceberg_table, join_cols)

update_row_cnt = len(rows_to_update)
update_row_cnt = len(rows_to_update)

#build the match predicate filter
overwrite_mask_predicate = upsert_util.create_match_filter(rows_to_update, join_cols)
#build the match predicate filter
overwrite_mask_predicate = upsert_util.create_match_filter(rows_to_update, join_cols)

txn.overwrite(rows_to_update, overwrite_filter=overwrite_mask_predicate)
txn.overwrite(rows_to_update, overwrite_filter=overwrite_mask_predicate)


if when_not_matched_insert_all:
rows_to_insert = upsert_util.get_rows_to_insert(df, matched_iceberg_table, join_cols)
if when_not_matched_insert_all:

rows_to_insert = upsert_util.get_rows_to_insert(df, matched_iceberg_table, join_cols)

insert_row_cnt = len(rows_to_insert)
insert_row_cnt = len(rows_to_insert)

txn.append(rows_to_insert)
txn.append(rows_to_insert)

return {
"rows_updated": update_row_cnt,
"rows_inserted": insert_row_cnt
}
return {
"rows_updated": update_row_cnt,
"rows_inserted": insert_row_cnt
}

except Exception as e:
raise e
#return UpsertResult(rows_updated=update_row_cnt, rows_inserted=insert_row_cnt)

def append(self, df: pa.Table, snapshot_properties: Dict[str, str] = EMPTY_DICT) -> None:
"""
Expand Down
1 change: 0 additions & 1 deletion pyiceberg/table/upsert_util.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
Expand Down