Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions python/pyspark/pandas/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -2872,6 +2872,10 @@ def to_markdown(self, buf=None, mode=None) -> str:
str
Series or DataFrame in Markdown-friendly format.

Notes
-----
Requires the `tabulate <https://pypi.org/project/tabulate>`_ package.

Examples
--------
>>> kser = ps.Series(["elk", "pig", "dog", "quetzal"], name="animal")
Expand Down
19 changes: 19 additions & 0 deletions python/pyspark/pandas/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1697,6 +1697,25 @@ def _select_cols_else(
)

def __setitem__(self, key, value):
if is_list_like(value) and not isinstance(value, spark.Column):
iloc_item = self[key]
if not is_list_like(key) or not is_list_like(iloc_item):
raise ValueError("setting an array element with a sequence.")
else:
shape_iloc_item = iloc_item.shape
len_iloc_item = shape_iloc_item[0]
len_value = len(value)
if len_iloc_item != len_value:
if self._is_series:
raise ValueError(
"cannot set using a list-like indexer with a different length than "
"the value"
)
else:
raise ValueError(
"shape mismatch: value array of shape ({},) could not be broadcast "
"to indexing result of shape {}".format(len_value, shape_iloc_item)
)
super().__setitem__(key, value)
# Update again with resolved_copy to drop extra columns.
self._kdf._update_internal_frame(
Expand Down
76 changes: 33 additions & 43 deletions python/pyspark/pandas/tests/test_ops_on_diff_frames.py
Original file line number Diff line number Diff line change
Expand Up @@ -1151,25 +1151,17 @@ def test_frame_iloc_setitem(self):
pdf.iloc[[0, 1, 2], 1] = -pdf.max_speed
self.assert_eq(kdf, pdf)

# TODO: matching the behavior with pandas 1.2 and uncomment below test
# with self.assertRaisesRegex(
# ValueError,
# "shape mismatch: value array of shape (3,) could not be broadcast to indexing "
# "result of shape (2,1)",
# ):
# kdf.iloc[[1, 2], [1]] = -another_kdf.max_speed
with self.assertRaisesRegex(
ValueError, "shape mismatch",
):
kdf.iloc[[1, 2], [1]] = -another_kdf.max_speed

kdf.iloc[[0, 1, 2], 1] = 10 * another_kdf.max_speed
pdf.iloc[[0, 1, 2], 1] = 10 * pdf.max_speed
self.assert_eq(kdf, pdf)

# TODO: matching the behavior with pandas 1.2 and uncomment below test
# with self.assertRaisesRegex(
# ValueError,
# "shape mismatch: value array of shape (3,) could not be broadcast to indexing "
# "result of shape (1,)",
# ):
# kdf.iloc[[0], 1] = 10 * another_kdf.max_speed
with self.assertRaisesRegex(ValueError, "shape mismatch"):
kdf.iloc[[0], 1] = 10 * another_kdf.max_speed

def test_series_loc_setitem(self):
pdf = pd.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]}, index=["cobra", "viper", "sidewinder"])
Expand Down Expand Up @@ -1269,36 +1261,35 @@ def test_series_iloc_setitem(self):
self.assert_eq(kdf, pdf)
self.assert_eq(ksery, psery)

# TODO: matching the behavior with pandas 1.2 and uncomment below test.
# with self.assertRaisesRegex(
# ValueError,
# "cannot set using a list-like indexer with a different length than the value",
# ):
# kser.iloc[[1, 2]] = -kser_another
with self.assertRaisesRegex(
ValueError,
"cannot set using a list-like indexer with a different length than the value",
):
kser.iloc[[1, 2]] = -kser_another

kser.iloc[[0, 1, 2]] = 10 * kser_another
pser.iloc[[0, 1, 2]] = 10 * pser_another
self.assert_eq(kser, pser)
self.assert_eq(kdf, pdf)
self.assert_eq(ksery, psery)

# with self.assertRaisesRegex(
# ValueError,
# "cannot set using a list-like indexer with a different length than the value",
# ):
# kser.iloc[[0]] = 10 * kser_another
with self.assertRaisesRegex(
ValueError,
"cannot set using a list-like indexer with a different length than the value",
):
kser.iloc[[0]] = 10 * kser_another

kser1.iloc[[0, 1, 2]] = -kser_another
pser1.iloc[[0, 1, 2]] = -pser_another
self.assert_eq(kser1, pser1)
self.assert_eq(kdf, pdf)
self.assert_eq(ksery, psery)

# with self.assertRaisesRegex(
# ValueError,
# "cannot set using a list-like indexer with a different length than the value",
# ):
# kser1.iloc[[1, 2]] = -kser_another
with self.assertRaisesRegex(
ValueError,
"cannot set using a list-like indexer with a different length than the value",
):
kser1.iloc[[1, 2]] = -kser_another

pdf = pd.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]}, index=["cobra", "viper", "sidewinder"])
kdf = ps.from_pandas(pdf)
Expand All @@ -1317,24 +1308,23 @@ def test_series_iloc_setitem(self):
self.assert_eq(kdf, pdf)
self.assert_eq(ksery, psery)

# TODO: matching the behavior with pandas 1.2 and uncomment below test.
# with self.assertRaisesRegex(
# ValueError,
# "cannot set using a list-like indexer with a different length than the value",
# ):
# kiloc[[1, 2]] = -kser_another
with self.assertRaisesRegex(
ValueError,
"cannot set using a list-like indexer with a different length than the value",
):
kiloc[[1, 2]] = -kser_another

kiloc[[0, 1, 2]] = 10 * kser_another
piloc[[0, 1, 2]] = 10 * pser_another
self.assert_eq(kser, pser)
self.assert_eq(kdf, pdf)
self.assert_eq(ksery, psery)

# with self.assertRaisesRegex(
# ValueError,
# "cannot set using a list-like indexer with a different length than the value",
# ):
# kiloc[[0]] = 10 * kser_another
with self.assertRaisesRegex(
ValueError,
"cannot set using a list-like indexer with a different length than the value",
):
kiloc[[0]] = 10 * kser_another

def test_update(self):
pdf = pd.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]})
Expand Down Expand Up @@ -1863,7 +1853,7 @@ def test_frame_iloc_setitem(self):
another_kdf = ps.DataFrame(pdf)

with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"):
kdf.iloc[[1, 2], [1]] = another_kdf.max_speed
kdf.iloc[[1, 2], [1]] = another_kdf.max_speed.iloc[[1, 2]]

def test_series_loc_setitem(self):
pser = pd.Series([1, 2, 3], index=["cobra", "viper", "sidewinder"])
Expand All @@ -1889,7 +1879,7 @@ def test_series_iloc_setitem(self):
kser_another = ps.from_pandas(pser_another)

with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"):
kser.iloc[[1]] = -kser_another
kser.iloc[[1]] = -kser_another.iloc[[1]]

def test_where(self):
pdf1 = pd.DataFrame({"A": [0, 1, 2, 3, 4], "B": [100, 200, 300, 400, 500]})
Expand Down