From f757f62df93f69c53e412f362a12a1b67f27fd77 Mon Sep 17 00:00:00 2001 From: MeeseeksMachine <39504233+meeseeksmachine@users.noreply.github.com> Date: Wed, 9 Sep 2020 02:28:46 -0700 Subject: [PATCH 01/38] Backport PR #36183: DOC: Start 1.1.3 (#36242) Co-authored-by: Simon Hawkins --- doc/source/whatsnew/index.rst | 1 + doc/source/whatsnew/v1.1.2.rst | 2 +- doc/source/whatsnew/v1.1.3.rst | 42 ++++++++++++++++++++++++++++++++++ 3 files changed, 44 insertions(+), 1 deletion(-) create mode 100644 doc/source/whatsnew/v1.1.3.rst diff --git a/doc/source/whatsnew/index.rst b/doc/source/whatsnew/index.rst index 1b5e63dfcf359..33c0750c1dc16 100644 --- a/doc/source/whatsnew/index.rst +++ b/doc/source/whatsnew/index.rst @@ -16,6 +16,7 @@ Version 1.1 .. toctree:: :maxdepth: 2 + v1.1.3 v1.1.2 v1.1.1 v1.1.0 diff --git a/doc/source/whatsnew/v1.1.2.rst b/doc/source/whatsnew/v1.1.2.rst index c6a08f4fb852a..81b8e7df11625 100644 --- a/doc/source/whatsnew/v1.1.2.rst +++ b/doc/source/whatsnew/v1.1.2.rst @@ -61,4 +61,4 @@ Other Contributors ~~~~~~~~~~~~ -.. contributors:: v1.1.1..v1.1.2|HEAD +.. contributors:: v1.1.1..v1.1.2 diff --git a/doc/source/whatsnew/v1.1.3.rst b/doc/source/whatsnew/v1.1.3.rst new file mode 100644 index 0000000000000..e3161012da5d1 --- /dev/null +++ b/doc/source/whatsnew/v1.1.3.rst @@ -0,0 +1,42 @@ +.. _whatsnew_113: + +What's new in 1.1.3 (??) +------------------------ + +These are the changes in pandas 1.1.3. See :ref:`release` for a full changelog +including other versions of pandas. + +{{ header }} + +.. --------------------------------------------------------------------------- + +.. _whatsnew_113.regressions: + +Fixed regressions +~~~~~~~~~~~~~~~~~ +- + +.. --------------------------------------------------------------------------- + +.. _whatsnew_113.bug_fixes: + +Bug fixes +~~~~~~~~~ +- + +.. --------------------------------------------------------------------------- + +.. _whatsnew_113.other: + +Other +~~~~~ +- + +.. --------------------------------------------------------------------------- + +.. _whatsnew_113.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v1.1.2..v1.1.3|HEAD From 019733ed3e6b859fb81bbcd1ffb2e5f26292af86 Mon Sep 17 00:00:00 2001 From: MeeseeksMachine <39504233+meeseeksmachine@users.noreply.github.com> Date: Sun, 13 Sep 2020 05:32:35 -0700 Subject: [PATCH 02/38] Backport PR #36323: CI: install numpy from pip #36296 (#36328) Co-authored-by: Fangchen Li --- ci/build39.sh | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/ci/build39.sh b/ci/build39.sh index f85e1c7def206..b9c76635df99b 100755 --- a/ci/build39.sh +++ b/ci/build39.sh @@ -3,16 +3,9 @@ sudo apt-get install build-essential gcc xvfb pip install --no-deps -U pip wheel setuptools -pip install python-dateutil pytz pytest pytest-xdist hypothesis +pip install numpy python-dateutil pytz pytest pytest-xdist hypothesis pip install cython --pre # https://github.com/cython/cython/issues/3395 -git clone https://github.com/numpy/numpy -cd numpy -python setup.py build_ext --inplace -python setup.py install -cd .. -rm -rf numpy - python setup.py build_ext -inplace python -m pip install --no-build-isolation -e . From ffc0092b69047b63089d7b0272247eb86a0e5d68 Mon Sep 17 00:00:00 2001 From: MeeseeksMachine <39504233+meeseeksmachine@users.noreply.github.com> Date: Sun, 13 Sep 2020 06:07:49 -0700 Subject: [PATCH 03/38] Backport PR #36249: BUG: na parameter for str.startswith and str.endswith not propagating for Series with categorical dtype (#36331) Co-authored-by: Asish Mahapatra --- doc/source/whatsnew/v1.1.3.rst | 2 +- pandas/core/strings.py | 2 +- pandas/tests/test_strings.py | 40 +++++++++++++++++++++++++++------- 3 files changed, 34 insertions(+), 10 deletions(-) diff --git a/doc/source/whatsnew/v1.1.3.rst b/doc/source/whatsnew/v1.1.3.rst index e3161012da5d1..c06990e3f2051 100644 --- a/doc/source/whatsnew/v1.1.3.rst +++ b/doc/source/whatsnew/v1.1.3.rst @@ -22,7 +22,7 @@ Fixed regressions Bug fixes ~~~~~~~~~ -- +- Bug in :meth:`Series.str.startswith` and :meth:`Series.str.endswith` with ``category`` dtype not propagating ``na`` parameter (:issue:`36241`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 6702bf519c52e..4decd86764ccc 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -2050,7 +2050,7 @@ def wrapper2(self, pat, flags=0, **kwargs): @forbid_nonstring_types(forbidden_types, name=name) def wrapper3(self, pat, na=np.nan): result = f(self._parent, pat, na=na) - return self._wrap_result(result, returns_string=returns_string) + return self._wrap_result(result, returns_string=returns_string, fill_value=na) wrapper = wrapper3 if na else wrapper2 if flags else wrapper1 diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index d9396d70f9112..c792a48d3ef08 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -29,6 +29,8 @@ def assert_series_or_index_equal(left, right): ("decode", ("UTF-8",), {}), ("encode", ("UTF-8",), {}), ("endswith", ("a",), {}), + ("endswith", ("a",), {"na": True}), + ("endswith", ("a",), {"na": False}), ("extract", ("([a-z]*)",), {"expand": False}), ("extract", ("([a-z]*)",), {"expand": True}), ("extractall", ("([a-z]*)",), {}), @@ -58,6 +60,8 @@ def assert_series_or_index_equal(left, right): ("split", (" ",), {"expand": False}), ("split", (" ",), {"expand": True}), ("startswith", ("a",), {}), + ("startswith", ("a",), {"na": True}), + ("startswith", ("a",), {"na": False}), # translating unicode points of "a" to "d" ("translate", ({97: 100},), {}), ("wrap", (2,), {}), @@ -838,15 +842,23 @@ def test_contains_for_object_category(self): expected = Series([True, False, False, True, False]) tm.assert_series_equal(result, expected) - def test_startswith(self): - values = Series(["om", np.nan, "foo_nom", "nom", "bar_foo", np.nan, "foo"]) + @pytest.mark.parametrize("dtype", [None, "category"]) + @pytest.mark.parametrize("null_value", [None, np.nan, pd.NA]) + @pytest.mark.parametrize("na", [True, False]) + def test_startswith(self, dtype, null_value, na): + # add category dtype parametrizations for GH-36241 + values = Series( + ["om", null_value, "foo_nom", "nom", "bar_foo", null_value, "foo"], + dtype=dtype, + ) result = values.str.startswith("foo") exp = Series([False, np.nan, True, False, False, np.nan, True]) tm.assert_series_equal(result, exp) - result = values.str.startswith("foo", na=True) - tm.assert_series_equal(result, exp.fillna(True).astype(bool)) + result = values.str.startswith("foo", na=na) + exp = Series([False, na, True, False, False, na, True]) + tm.assert_series_equal(result, exp) # mixed mixed = np.array( @@ -867,15 +879,23 @@ def test_startswith(self): ) tm.assert_series_equal(rs, xp) - def test_endswith(self): - values = Series(["om", np.nan, "foo_nom", "nom", "bar_foo", np.nan, "foo"]) + @pytest.mark.parametrize("dtype", [None, "category"]) + @pytest.mark.parametrize("null_value", [None, np.nan, pd.NA]) + @pytest.mark.parametrize("na", [True, False]) + def test_endswith(self, dtype, null_value, na): + # add category dtype parametrizations for GH-36241 + values = Series( + ["om", null_value, "foo_nom", "nom", "bar_foo", null_value, "foo"], + dtype=dtype, + ) result = values.str.endswith("foo") exp = Series([False, np.nan, False, False, True, np.nan, True]) tm.assert_series_equal(result, exp) - result = values.str.endswith("foo", na=False) - tm.assert_series_equal(result, exp.fillna(False).astype(bool)) + result = values.str.endswith("foo", na=na) + exp = Series([False, na, False, False, True, na, True]) + tm.assert_series_equal(result, exp) # mixed mixed = np.array( @@ -3552,6 +3572,10 @@ def test_string_array(any_string_method): assert result.dtype == "boolean" result = result.astype(object) + elif expected.dtype == "bool": + assert result.dtype == "boolean" + result = result.astype("bool") + elif expected.dtype == "float" and expected.isna().any(): assert result.dtype == "Int64" result = result.astype("float") From 20003347f81d76b3cbaa8259b577e19f76603120 Mon Sep 17 00:00:00 2001 From: MeeseeksMachine <39504233+meeseeksmachine@users.noreply.github.com> Date: Sun, 13 Sep 2020 06:27:21 -0700 Subject: [PATCH 04/38] Backport PR #36147: REGR: Series access with Index of tuples/frozenset (#36332) Co-authored-by: Richard Shadrach <45562402+rhshadrach@users.noreply.github.com> --- doc/source/whatsnew/v1.1.3.rst | 2 ++ pandas/core/series.py | 22 +++++++++---------- pandas/tests/series/indexing/test_indexing.py | 21 +++++++++++++++++- 3 files changed, 33 insertions(+), 12 deletions(-) diff --git a/doc/source/whatsnew/v1.1.3.rst b/doc/source/whatsnew/v1.1.3.rst index c06990e3f2051..25d223418fc92 100644 --- a/doc/source/whatsnew/v1.1.3.rst +++ b/doc/source/whatsnew/v1.1.3.rst @@ -14,6 +14,8 @@ including other versions of pandas. Fixed regressions ~~~~~~~~~~~~~~~~~ +- Fixed regression in :meth:`Series.__getitem__` incorrectly raising when the input was a tuple (:issue:`35534`) +- Fixed regression in :meth:`Series.__getitem__` incorrectly raising when the input was a frozenset (:issue:`35747`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/series.py b/pandas/core/series.py index ef3be854bc3bb..b6ff7b33d27cb 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -881,21 +881,19 @@ def __getitem__(self, key): elif key_is_scalar: return self._get_value(key) - if ( - isinstance(key, tuple) - and is_hashable(key) - and isinstance(self.index, MultiIndex) - ): + if is_hashable(key): # Otherwise index.get_value will raise InvalidIndexError try: + # For labels that don't resolve as scalars like tuples and frozensets result = self._get_value(key) return result except KeyError: - # We still have the corner case where this tuple is a key - # in the first level of our MultiIndex - return self._get_values_tuple(key) + if isinstance(key, tuple) and isinstance(self.index, MultiIndex): + # We still have the corner case where a tuple is a key + # in the first level of our MultiIndex + return self._get_values_tuple(key) if is_iterator(key): key = list(key) @@ -955,7 +953,7 @@ def _get_values_tuple(self, key): return result if not isinstance(self.index, MultiIndex): - raise ValueError("Can only tuple-index with a MultiIndex") + raise ValueError("key of type tuple not found and not a MultiIndex") # If key is contained, would have returned by now indexer, new_index = self.index.get_loc_level(key) @@ -1009,9 +1007,11 @@ def __setitem__(self, key, value): # GH#12862 adding an new key to the Series self.loc[key] = value - except TypeError as e: + except TypeError as err: if isinstance(key, tuple) and not isinstance(self.index, MultiIndex): - raise ValueError("Can only tuple-index with a MultiIndex") from e + raise ValueError( + "key of type tuple not found and not a MultiIndex" + ) from err if com.is_bool_indexer(key): key = check_bool_indexer(self.index, key) diff --git a/pandas/tests/series/indexing/test_indexing.py b/pandas/tests/series/indexing/test_indexing.py index 3ed25b8bca566..1fafdf00393e1 100644 --- a/pandas/tests/series/indexing/test_indexing.py +++ b/pandas/tests/series/indexing/test_indexing.py @@ -383,7 +383,7 @@ def test_2d_to_1d_assignment_raises(): @pytest.mark.filterwarnings("ignore:Using a non-tuple:FutureWarning") def test_basic_getitem_setitem_corner(datetime_series): # invalid tuples, e.g. td.ts[:, None] vs. td.ts[:, 2] - msg = "Can only tuple-index with a MultiIndex" + msg = "key of type tuple not found and not a MultiIndex" with pytest.raises(ValueError, match=msg): datetime_series[:, 2] with pytest.raises(ValueError, match=msg): @@ -942,3 +942,22 @@ def assert_slices_equivalent(l_slc, i_slc): for key2 in [keystr2, box(keystr2)]: assert_slices_equivalent(SLC[key2:key:-1], SLC[13:8:-1]) assert_slices_equivalent(SLC[key:key2:-1], SLC[0:0:-1]) + + +def test_tuple_index(): + # GH 35534 - Selecting values when a Series has an Index of tuples + s = pd.Series([1, 2], index=[("a",), ("b",)]) + assert s[("a",)] == 1 + assert s[("b",)] == 2 + s[("b",)] = 3 + assert s[("b",)] == 3 + + +def test_frozenset_index(): + # GH35747 - Selecting values when a Series has an Index of frozenset + idx0, idx1 = frozenset("a"), frozenset("b") + s = pd.Series([1, 2], index=[idx0, idx1]) + assert s[idx0] == 1 + assert s[idx1] == 2 + s[idx1] = 3 + assert s[idx1] == 3 From 21ed2e8efe31309397f15680ddfa08320efab447 Mon Sep 17 00:00:00 2001 From: MeeseeksMachine <39504233+meeseeksmachine@users.noreply.github.com> Date: Sun, 13 Sep 2020 08:52:01 -0700 Subject: [PATCH 05/38] Backport PR #36316: BUG: Don't overflow with large int scalar (#36334) Co-authored-by: Daniel Saxton <2658661+dsaxton@users.noreply.github.com> --- doc/source/whatsnew/v1.1.3.rst | 1 + pandas/core/dtypes/cast.py | 5 +++++ pandas/tests/series/test_constructors.py | 7 +++++++ 3 files changed, 13 insertions(+) diff --git a/doc/source/whatsnew/v1.1.3.rst b/doc/source/whatsnew/v1.1.3.rst index 25d223418fc92..5cbd160f29d66 100644 --- a/doc/source/whatsnew/v1.1.3.rst +++ b/doc/source/whatsnew/v1.1.3.rst @@ -25,6 +25,7 @@ Fixed regressions Bug fixes ~~~~~~~~~ - Bug in :meth:`Series.str.startswith` and :meth:`Series.str.endswith` with ``category`` dtype not propagating ``na`` parameter (:issue:`36241`) +- Bug in :class:`Series` constructor where integer overflow would occur for sufficiently large scalar inputs when an index was provided (:issue:`36291`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index e6b4cb598989b..a87bddef481b5 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -697,6 +697,11 @@ def infer_dtype_from_scalar(val, pandas_dtype: bool = False) -> Tuple[DtypeObj, else: dtype = np.dtype(np.int64) + try: + np.array(val, dtype=dtype) + except OverflowError: + dtype = np.array(val).dtype + elif is_float(val): if isinstance(val, np.floating): dtype = np.dtype(type(val)) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index ce078059479b4..f811806a897ee 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1474,3 +1474,10 @@ def test_construction_from_ordered_collection(self): result = Series({"a": 1, "b": 2}.values()) expected = Series([1, 2]) tm.assert_series_equal(result, expected) + + def test_construction_from_large_int_scalar_no_overflow(self): + # https://github.com/pandas-dev/pandas/issues/36291 + n = 1_000_000_000_000_000_000_000 + result = Series(n, index=[0]) + expected = Series(n) + tm.assert_series_equal(result, expected) From ecb5e408c6d510f6de4ec764775b4de707228c9c Mon Sep 17 00:00:00 2001 From: MeeseeksMachine <39504233+meeseeksmachine@users.noreply.github.com> Date: Sun, 13 Sep 2020 08:52:27 -0700 Subject: [PATCH 06/38] Backport PR #36303: REGR: Fix IntegerArray unary ops regression (#36333) Co-authored-by: Daniel Saxton <2658661+dsaxton@users.noreply.github.com> --- doc/source/whatsnew/v1.1.3.rst | 1 + pandas/conftest.py | 13 ++++++ pandas/core/arrays/integer.py | 9 ++++ pandas/core/generic.py | 5 ++- .../tests/arrays/integer/test_arithmetic.py | 38 +++++++++++++++++ pandas/tests/frame/test_operators.py | 2 +- pandas/tests/series/test_operators.py | 41 +++++++++++++++++++ 7 files changed, 107 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.1.3.rst b/doc/source/whatsnew/v1.1.3.rst index 5cbd160f29d66..2457d00eb2173 100644 --- a/doc/source/whatsnew/v1.1.3.rst +++ b/doc/source/whatsnew/v1.1.3.rst @@ -14,6 +14,7 @@ including other versions of pandas. Fixed regressions ~~~~~~~~~~~~~~~~~ +- Fixed regression in :class:`IntegerArray` unary plus and minus operations raising a ``TypeError`` (:issue:`36063`) - Fixed regression in :meth:`Series.__getitem__` incorrectly raising when the input was a tuple (:issue:`35534`) - Fixed regression in :meth:`Series.__getitem__` incorrectly raising when the input was a frozenset (:issue:`35747`) - diff --git a/pandas/conftest.py b/pandas/conftest.py index e0adb37e7d2f5..74cab2e038a27 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -1032,6 +1032,19 @@ def any_nullable_int_dtype(request): return request.param +@pytest.fixture(params=tm.SIGNED_EA_INT_DTYPES) +def any_signed_nullable_int_dtype(request): + """ + Parameterized fixture for any signed nullable integer dtype. + + * 'Int8' + * 'Int16' + * 'Int32' + * 'Int64' + """ + return request.param + + @pytest.fixture(params=tm.ALL_REAL_DTYPES) def any_real_dtype(request): """ diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 57df067c7b16e..0cbcd905e4a8c 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -364,6 +364,15 @@ def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False): ) super().__init__(values, mask, copy=copy) + def __neg__(self): + return type(self)(-self._data, self._mask) + + def __pos__(self): + return self + + def __abs__(self): + return type(self)(np.abs(self._data), self._mask) + @classmethod def _from_sequence(cls, scalars, dtype=None, copy: bool = False) -> "IntegerArray": return integer_array(scalars, dtype=dtype, copy=copy) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 935bad2624637..be85ab251c0c3 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1310,7 +1310,10 @@ def __pos__(self): ): arr = operator.pos(values) else: - raise TypeError(f"Unary plus expects numeric dtype, not {values.dtype}") + raise TypeError( + "Unary plus expects bool, numeric, timedelta, " + f"or object dtype, not {values.dtype}" + ) return self.__array_wrap__(arr) def __invert__(self): diff --git a/pandas/tests/arrays/integer/test_arithmetic.py b/pandas/tests/arrays/integer/test_arithmetic.py index d309f6423e0c1..f549a7caeab1d 100644 --- a/pandas/tests/arrays/integer/test_arithmetic.py +++ b/pandas/tests/arrays/integer/test_arithmetic.py @@ -261,3 +261,41 @@ def test_reduce_to_float(op): index=pd.Index(["a", "b"], name="A"), ) tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "source, target", + [ + ([1, 2, 3], [-1, -2, -3]), + ([1, 2, None], [-1, -2, None]), + ([-1, 0, 1], [1, 0, -1]), + ], +) +def test_unary_minus_nullable_int(any_signed_nullable_int_dtype, source, target): + dtype = any_signed_nullable_int_dtype + arr = pd.array(source, dtype=dtype) + result = -arr + expected = pd.array(target, dtype=dtype) + tm.assert_extension_array_equal(result, expected) + + +@pytest.mark.parametrize( + "source", [[1, 2, 3], [1, 2, None], [-1, 0, 1]], +) +def test_unary_plus_nullable_int(any_signed_nullable_int_dtype, source): + dtype = any_signed_nullable_int_dtype + expected = pd.array(source, dtype=dtype) + result = +expected + tm.assert_extension_array_equal(result, expected) + + +@pytest.mark.parametrize( + "source, target", + [([1, 2, 3], [1, 2, 3]), ([1, -2, None], [1, 2, None]), ([-1, 0, 1], [1, 0, 1])], +) +def test_abs_nullable_int(any_signed_nullable_int_dtype, source, target): + dtype = any_signed_nullable_int_dtype + s = pd.array(source, dtype=dtype) + result = abs(s) + expected = pd.array(target, dtype=dtype) + tm.assert_extension_array_equal(result, expected) diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index fede1ca23a8ce..8cf66e2737249 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -119,7 +119,7 @@ def test_pos_object(self, df): "df", [pd.DataFrame({"a": pd.to_datetime(["2017-01-22", "1970-01-01"])})] ) def test_pos_raises(self, df): - msg = re.escape("Unary plus expects numeric dtype, not datetime64[ns]") + msg = "Unary plus expects .* dtype, not datetime64\\[ns\\]" with pytest.raises(TypeError, match=msg): (+df) with pytest.raises(TypeError, match=msg): diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index e1c9682329271..aee947e738525 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -536,3 +536,44 @@ def test_invert(self): ser = tm.makeStringSeries() ser.name = "series" tm.assert_series_equal(-(ser < 0), ~(ser < 0)) + + @pytest.mark.parametrize( + "source, target", + [ + ([1, 2, 3], [-1, -2, -3]), + ([1, 2, None], [-1, -2, None]), + ([-1, 0, 1], [1, 0, -1]), + ], + ) + def test_unary_minus_nullable_int( + self, any_signed_nullable_int_dtype, source, target + ): + dtype = any_signed_nullable_int_dtype + s = pd.Series(source, dtype=dtype) + result = -s + expected = pd.Series(target, dtype=dtype) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "source", [[1, 2, 3], [1, 2, None], [-1, 0, 1]], + ) + def test_unary_plus_nullable_int(self, any_signed_nullable_int_dtype, source): + dtype = any_signed_nullable_int_dtype + expected = pd.Series(source, dtype=dtype) + result = +expected + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "source, target", + [ + ([1, 2, 3], [1, 2, 3]), + ([1, -2, None], [1, 2, None]), + ([-1, 0, 1], [1, 0, 1]), + ], + ) + def test_abs_nullable_int(self, any_signed_nullable_int_dtype, source, target): + dtype = any_signed_nullable_int_dtype + s = pd.Series(source, dtype=dtype) + result = abs(s) + expected = pd.Series(target, dtype=dtype) + tm.assert_series_equal(result, expected) From 4bebdbc58276405fa7c02b5d1ece5f86c9d7312e Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Sun, 13 Sep 2020 20:21:21 +0100 Subject: [PATCH 07/38] Backport PR #36231: BUG: Fixe unintentionally added suffix in DataFrame.apply/agg and Series.apply/agg (#36340) Co-authored-by: Kaiqi Dong --- doc/source/whatsnew/v1.1.3.rst | 1 + pandas/core/aggregation.py | 3 +-- pandas/core/groupby/generic.py | 2 ++ pandas/tests/frame/apply/test_frame_apply.py | 11 +++++++++++ pandas/tests/groupby/aggregate/test_aggregate.py | 15 +++++++++++++++ pandas/tests/series/apply/test_series_apply.py | 8 ++++++++ 6 files changed, 38 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.1.3.rst b/doc/source/whatsnew/v1.1.3.rst index 2457d00eb2173..d789518f93f6d 100644 --- a/doc/source/whatsnew/v1.1.3.rst +++ b/doc/source/whatsnew/v1.1.3.rst @@ -14,6 +14,7 @@ including other versions of pandas. Fixed regressions ~~~~~~~~~~~~~~~~~ +- Fixed regression in :meth:`DataFrame.agg`, :meth:`DataFrame.apply`, :meth:`Series.agg`, and :meth:`Series.apply` where internal suffix is exposed to the users when no relabelling is applied (:issue:`36189`) - Fixed regression in :class:`IntegerArray` unary plus and minus operations raising a ``TypeError`` (:issue:`36063`) - Fixed regression in :meth:`Series.__getitem__` incorrectly raising when the input was a tuple (:issue:`35534`) - Fixed regression in :meth:`Series.__getitem__` incorrectly raising when the input was a frozenset (:issue:`35747`) diff --git a/pandas/core/aggregation.py b/pandas/core/aggregation.py index 891048ae82dfd..73e470e4f2610 100644 --- a/pandas/core/aggregation.py +++ b/pandas/core/aggregation.py @@ -63,7 +63,7 @@ def reconstruct_func( Examples -------- >>> reconstruct_func(None, **{"foo": ("col", "min")}) - (True, defaultdict(None, {'col': ['min']}), ('foo',), array([0])) + (True, defaultdict(, {'col': ['min']}), ('foo',), array([0])) >>> reconstruct_func("min") (False, 'min', None, None) @@ -87,7 +87,6 @@ def reconstruct_func( if relabeling: func, columns, order = normalize_keyword_aggregation(kwargs) - func = maybe_mangle_lambdas(func) return relabeling, func, columns, order diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index f5858c5c54f1d..08c988fa05b6a 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -946,6 +946,8 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs) func, *args, engine=engine, engine_kwargs=engine_kwargs, **kwargs ) + func = maybe_mangle_lambdas(func) + result, how = self._aggregate(func, *args, **kwargs) if how is None: return result diff --git a/pandas/tests/frame/apply/test_frame_apply.py b/pandas/tests/frame/apply/test_frame_apply.py index 5a1e448beb40f..adcd54fd7a7dc 100644 --- a/pandas/tests/frame/apply/test_frame_apply.py +++ b/pandas/tests/frame/apply/test_frame_apply.py @@ -1550,3 +1550,14 @@ def test_apply_empty_list_reduce(): result = df.apply(lambda x: [], result_type="reduce") expected = pd.Series({"a": [], "b": []}, dtype=object) tm.assert_series_equal(result, expected) + + +def test_apply_no_suffix_index(): + # GH36189 + pdf = pd.DataFrame([[4, 9]] * 3, columns=["A", "B"]) + result = pdf.apply(["sum", lambda x: x.sum(), lambda x: x.sum()]) + expected = pd.DataFrame( + {"A": [12, 12, 12], "B": [27, 27, 27]}, index=["sum", "", ""] + ) + + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index ce9d4b892d775..127d3fadee555 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -1074,3 +1074,18 @@ def test_nonagg_agg(): expected = g.agg("cumsum") tm.assert_frame_equal(result, expected) + + +def test_agg_no_suffix_index(): + # GH36189 + df = pd.DataFrame([[4, 9]] * 3, columns=["A", "B"]) + result = df.agg(["sum", lambda x: x.sum(), lambda x: x.sum()]) + expected = pd.DataFrame( + {"A": [12, 12, 12], "B": [27, 27, 27]}, index=["sum", "", ""] + ) + tm.assert_frame_equal(result, expected) + + # test Series case + result = df["A"].agg(["sum", lambda x: x.sum(), lambda x: x.sum()]) + expected = pd.Series([12, 12, 12], index=["sum", "", ""], name="A") + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/apply/test_series_apply.py b/pandas/tests/series/apply/test_series_apply.py index 308398642895c..803f4e5e54d2b 100644 --- a/pandas/tests/series/apply/test_series_apply.py +++ b/pandas/tests/series/apply/test_series_apply.py @@ -471,6 +471,14 @@ def test_transform_none_to_type(self): with pytest.raises(TypeError, match=msg): df.transform({"a": int}) + def test_series_apply_no_suffix_index(self): + # GH36189 + s = pd.Series([4] * 3) + result = s.apply(["sum", lambda x: x.sum(), lambda x: x.sum()]) + expected = pd.Series([12, 12, 12], index=["sum", "", ""]) + + tm.assert_series_equal(result, expected) + class TestSeriesMap: def test_map(self, datetime_series): From 2dd1f080641ea97e8b85a1ed3526f91f8db4867f Mon Sep 17 00:00:00 2001 From: MeeseeksMachine <39504233+meeseeksmachine@users.noreply.github.com> Date: Sun, 13 Sep 2020 14:42:33 -0700 Subject: [PATCH 08/38] Backport PR #36174: BUG: Ensure read_spss accepts pathlib Paths (GH33666) (#36347) Co-authored-by: Dan Moore <9156191+drmrd@users.noreply.github.com> --- doc/source/whatsnew/v1.1.3.rst | 1 + pandas/io/spss.py | 4 +++- pandas/tests/io/test_spss.py | 7 +++++-- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.1.3.rst b/doc/source/whatsnew/v1.1.3.rst index d789518f93f6d..8e283aec39786 100644 --- a/doc/source/whatsnew/v1.1.3.rst +++ b/doc/source/whatsnew/v1.1.3.rst @@ -26,6 +26,7 @@ Fixed regressions Bug fixes ~~~~~~~~~ +- Bug in :func:`read_spss` where passing a ``pathlib.Path`` as ``path`` would raise a ``TypeError`` (:issue:`33666`) - Bug in :meth:`Series.str.startswith` and :meth:`Series.str.endswith` with ``category`` dtype not propagating ``na`` parameter (:issue:`36241`) - Bug in :class:`Series` constructor where integer overflow would occur for sufficiently large scalar inputs when an index was provided (:issue:`36291`) diff --git a/pandas/io/spss.py b/pandas/io/spss.py index 9605faeb36590..79cdfbf15392a 100644 --- a/pandas/io/spss.py +++ b/pandas/io/spss.py @@ -7,6 +7,8 @@ from pandas.core.api import DataFrame +from pandas.io.common import stringify_path + def read_spss( path: Union[str, Path], @@ -40,6 +42,6 @@ def read_spss( usecols = list(usecols) # pyreadstat requires a list df, _ = pyreadstat.read_sav( - path, usecols=usecols, apply_value_formats=convert_categoricals + stringify_path(path), usecols=usecols, apply_value_formats=convert_categoricals ) return df diff --git a/pandas/tests/io/test_spss.py b/pandas/tests/io/test_spss.py index 013f56f83c5ec..a4894ff66ab9f 100644 --- a/pandas/tests/io/test_spss.py +++ b/pandas/tests/io/test_spss.py @@ -1,3 +1,5 @@ +from pathlib import Path + import numpy as np import pytest @@ -7,9 +9,10 @@ pyreadstat = pytest.importorskip("pyreadstat") -def test_spss_labelled_num(datapath): +@pytest.mark.parametrize("path_klass", [lambda p: p, Path]) +def test_spss_labelled_num(path_klass, datapath): # test file from the Haven project (https://haven.tidyverse.org/) - fname = datapath("io", "data", "spss", "labelled-num.sav") + fname = path_klass(datapath("io", "data", "spss", "labelled-num.sav")) df = pd.read_spss(fname, convert_categoricals=True) expected = pd.DataFrame({"VAR00002": "This is one"}, index=[0]) From 7361ccb3c93976a6cff7646f9ef5c3fe383e3c80 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Mon, 14 Sep 2020 14:47:58 +0100 Subject: [PATCH 09/38] Backport PR #36175: BUG: read_excel for ods files raising UnboundLocalError in certain cases (#36355) Co-authored-by: Asish Mahapatra --- doc/source/whatsnew/v1.1.3.rst | 1 + pandas/io/excel/_odfreader.py | 26 +++++++++++++----------- pandas/tests/io/data/excel/gh-35802.ods | Bin 0 -> 12692 bytes pandas/tests/io/data/excel/gh-36122.ods | Bin 0 -> 8974 bytes pandas/tests/io/excel/test_readers.py | 17 ++++++++++++++++ 5 files changed, 32 insertions(+), 12 deletions(-) create mode 100755 pandas/tests/io/data/excel/gh-35802.ods create mode 100755 pandas/tests/io/data/excel/gh-36122.ods diff --git a/doc/source/whatsnew/v1.1.3.rst b/doc/source/whatsnew/v1.1.3.rst index 8e283aec39786..7f5340b7022ce 100644 --- a/doc/source/whatsnew/v1.1.3.rst +++ b/doc/source/whatsnew/v1.1.3.rst @@ -18,6 +18,7 @@ Fixed regressions - Fixed regression in :class:`IntegerArray` unary plus and minus operations raising a ``TypeError`` (:issue:`36063`) - Fixed regression in :meth:`Series.__getitem__` incorrectly raising when the input was a tuple (:issue:`35534`) - Fixed regression in :meth:`Series.__getitem__` incorrectly raising when the input was a frozenset (:issue:`35747`) +- Fixed regression in :meth:`read_excel` with `engine="odf"` caused ``UnboundLocalError`` in some cases where cells had nested child nodes (:issue:`36122`, and :issue:`35802`) - .. --------------------------------------------------------------------------- diff --git a/pandas/io/excel/_odfreader.py b/pandas/io/excel/_odfreader.py index 44abaf5d3b3c9..40e2665306d03 100644 --- a/pandas/io/excel/_odfreader.py +++ b/pandas/io/excel/_odfreader.py @@ -191,22 +191,24 @@ def _get_cell_string_value(self, cell) -> str: Find and decode OpenDocument text:s tags that represent a run length encoded sequence of space characters. """ - from odf.element import Element, Text + from odf.element import Element from odf.namespaces import TEXTNS - from odf.text import P, S + from odf.text import S - text_p = P().qname text_s = S().qname - p = cell.childNodes[0] - value = [] - if p.qname == text_p: - for k, fragment in enumerate(p.childNodes): - if isinstance(fragment, Text): - value.append(fragment.data) - elif isinstance(fragment, Element): - if fragment.qname == text_s: - spaces = int(fragment.attributes.get((TEXTNS, "c"), 1)) + + for fragment in cell.childNodes: + if isinstance(fragment, Element): + if fragment.qname == text_s: + spaces = int(fragment.attributes.get((TEXTNS, "c"), 1)) value.append(" " * spaces) + else: + # recursive impl needed in case of nested fragments + # with multiple spaces + # https://github.com/pandas-dev/pandas/pull/36175#discussion_r484639704 + value.append(self._get_cell_string_value(fragment)) + else: + value.append(str(fragment)) return "".join(value) diff --git a/pandas/tests/io/data/excel/gh-35802.ods b/pandas/tests/io/data/excel/gh-35802.ods new file mode 100755 index 0000000000000000000000000000000000000000..f3ad061f1d995488bf029f926dc3eba60fdded2f GIT binary patch literal 12692 zcmdseWmp|c)-DoULU4C?cMnc*cXx-4ySozzPH=a3cZZ+>g1ft4nEB4+%(-Xg`R?Dl z_S3zqclUa$YgKiv)vMkuD+&4m83+gx2q@G5MJ~{iBa9XZ2L-YWd9BG4d!2h z{#N>IV{L3^;%NU5GzUgHfQ^l%zMehZ|C^SvrHvlI=>Jvg?M@y4dtI1+bBDGzwvM*% zs{iF4|4z>Vpa*bt_+1|)B;-G={4aC770CXkK0N~iBTJ(<1KHTq893V8znj;^$`TYv zRuba9SAam?cW?5aO@{q;_OFJvwYM>`H*)yX4gJyW{{Z`+-QP9jy^C!1tc@)HivjTd zrbaU>Jrg4bIzcmlm7cA`|0w^R6=2_#t@NzTjEx)swDyL^WAS6w{`3d|r`&-?;|u(B zU|{y-!%gOyoh(7BV00HLh^y^U@yc?!#Rr+Qo8=-*Yf-hS$9mX<;njs*ip&NSB*S8+ zb^BDDEwoK}%bZIK^;CE%IOpd6d6|^W&eoJ1I*EZaY_OISHQD4YDVN+bt%O}a*zLq% z`)i{hp>QJw&91jRFgeCIpo~@I0EbyIuDAF+;VYa)S=G>b_3~}O_4u*A{E<*!h64cp z*v-J27x`oasAX zBTCRsV?2>LXyI~w-F=~*c*WSLsC5#u^^(7Fb7+$r72iC)-jP-cZ$2kp(?V1j@PAH2s;WS}92kX;!lb!>h{-nVYW&Lna{ zc~oYce_}WQXhXtrNK0;;9D%%vF!Cy+;H50`)=CWu&-Q6==Ew5tK6WqRE?$=74hWiVtNIdhWtK?kbFS-bbi)0iEqq;EyeLdmU9UXsiLtP!P-ss5qx zpdIGq<)tbesXjnO0O^P5X9s$8I!ZrVkjb-ri!??-3{ttcsa&xd?#xF`#^IovWC;X* z4t$qL0}=b6OGqR3oPZs1D6|D;CFUO5W-uum&x!-dv^E{23*Tv(1g~_mHbJepdz*40 z6%>wcI!Fg2zXm~=LzK0VGL+0VclyYUbpp0q`P+f9Tke%yJ6rIDqq2L7Mt_E8*Llew zK#U(c5T< z=(h*V0pMzBTUz2fA1weO(S zW31pHXH8cVo*B<|(UsBFGSzX7==8e4Lu2I%I9iVUh{-znym`4ot*(#u^kgy%*RadY znG(($06&_sIXDYvzUSqn=x+0eoAu1qh2^sE>yg$L1EBRDU6(piM^a)&Bg^cxH-o}k zHLOwPbycw@E?6))l?~^0k;l4J#myWQHG0u-PR=!S z9blr)C2hNysS7kG~LH@ubo$g`B6DY^Z9ffmzj79Hp10 zF@Yz)7^oqCO%BIdzRsCebX^Ues;00i8fv`G3yiP(xFFj4)ojsl1#PC%Yoey8R=dLF z$XXL{iZ{BEePyi(u(%w)+(_=8qpi$r+qUXojdQcNbf!U_;bzU0*q}PZX??@nj{h$LWZ6iucf|5d{%ln zVQ#m9%3VFXJsS7E1)3_Y)C%Wg^ul&MMAfsJ4Usu-($ec93nWPBhPs)Zd3J*R_Dfwx zm7pBlO!+!KL2)RmJizbCQ-Dop%!`C=@zv{jdvQ$0gwIYF z7;3p@j88(sAA|1Q_?~e&>TebDHEr!U#aX0AV7M$ljUeEiXTVKJezk?wA{auH0?!~c-(49#^?oLpar6_E0Kco zs=5v0bB)f733SSpu2M0AcJiAbTW$cYX?+~={=(Zql|ZY3@TFNzP6n<5g1{y>FsfnC zG|1O>)}EZ4NJgM1pCdAiYfzXAjl;c*U=!8J(_d(!i$*@BR-eny+J(1zuTl7$K@pC0 z%#`d&D%(bd-xs2f9Bjo|95e?XMsuW6Kd7%+H4xqpQBrJCxoZ4^Sip%va;BDK^9y}IVvN2E z#_L$S=UW+|4aXjUl_JR1bekKEiF0Yvm=*lBONWN=%cmVJ$ux-6rJ<4LjG!6JVSglu z8aI(xy!|Hyy7Grj^}SG|z0gQ4HfI3!54M~ndip;8pMugY!bUMWY&*!8;F_%Tu>4OZ ztCQXdiO&Vw=atRvvbl^~RU&44$On-eflHu8u#4Wa#xFz;0jF*B)@nYB{A!^kHM1#t zQ819nFd?C``>~~xJACvjmHZwn1Bu=OeC@dMO9ET;-*!!hTYo0>&MUfR8tsui96_G`-rwInx1j&&gx5XJ{OnTH$ zY=Z5l9*qS47U;^)QFGT1ybH>~2P}T%d{u>(9FNkPnNnXjf@v3jDvX<-jmARH6(9Jy zRrpA&H-`r9UbT{@%d7ijF8AJ1!_W;%bZuF5eiO+lWQj5zgGg;t226`l^okKzUZ&eU zMSwG>iiP=mnp!TSajZnLQATsP4)$`k+A$O+yn57WWq`u;chw)iR_B~pN;4_l`^U}O zZ8>%!PcWvv1!N6?Yy#l!dcTZhR_fJt*4x_s1wL&?22UtuN<|bOKRMop-+ZY)~exh&w}vsu?QI2^ZpU+#f5Dm7@~1cUF$? zL5fuF`Fv1;-UM|m?WURWNR!argon5LQ_^|lT3%~n=t@6t*x|mXW3*4Hh=oz(R>@cu zrY<$BoxPVfGco}VeZ=~+ThUrfdm7|C&``w!=S0WT^6q{QpAK=SwpjU8XVnRBo1@{G zUj))&K2&KKb5Lze&+E%aS>#eNoQ|l;kK733eVRF4wG!)M{>OMNhEI!=bvrz#n=F$U zy*ilrZIswGbp-kDKQfqlUW4IKwAvAW}|==ytjFgNLtabl4!sZvHe4ic)5o|nU%$)^4yhcwc31- z1{;e9G@!h_S%v-FVxVen4BJAj9ut=zvClblt(!}uhZ)T6fo9s{C}Y^L{G%DM&xNd? zhF!iQ2*o;W>c&^z48SJBm&9L9H_*K?=V=NL#FTU{Jpph*?K)yD-5hEPF8d{WB<(G}V;xW=fm#CSeH|3trp z-?@T7(hcHlkp_uMT3+44--9m<7KMz0FRw@&r@x+9Mm|*FWyb-`T_=zV?4D?Yi_><2 zLcLg57T?Ct9B*OjL~AXOv@sDvZki=92RUW6nrY2KqCIXiWN4c(leuZnn!5H7O?KU0(0Y75&MF&GmKtpK+<69^d4txzehb*zgvI ztdVQ=WUoy9!@<_(iRhAn?eEPSNr7%wBrG)iv-ZxLn|il*@)T0z{Ml{X0xjj9lyXvj za$}6RsS-_wO)DJufnS9ZQ))?FN+ME&Qz-&o?Lu{t!=M^)ZzleyD?CEYOa*M30 zy|V)6pA@N6UPG5<_o9TTjcI@;rZXsSreaNJMEiLN|9!KEUL{8bS4iA9t>Xqas5?+p z;v52j6E4~?<2@M`}ReHC0#@~$!?dI#y&Z) z)#Q{~^sFk@guR?C>r@y|WmB#W^Rtk+=4Azdnc_&^{)g4Sn#?56wr2f^&O2-0&p&VjKF2&xR z=Ho41P%RIT6D^Z}KKO?=A-v~|5o}kv=TV_0!Si(+w$or&n-XL}gxT`eO*ePijwPaFB0BS(V5L>xCI-GycQ|8&Sq}50ZC9lHy=zHzJ{#f9 zID@F0csRxN*1!N>l=32xRmyG>7Plva4=5ZqUf$wcv8}N(KSomLZnIZ`_~-=AiaRrV3prNm|dvV_S9O-A+)H1ZR&{{dIdAyW}wq(E1 zU&GlnocMfZ;ur(3Ve&wKl&r3yy>L`hSsO@c^YLdQnsUmF*7CHV2K@3-0O(S;t1M~NM3`|DEX)-lGDqJ@I0r+7X1#nXLAJikeQXnI_yvnufyyyR zJ%7^F)ywjA!4lW>ww&=M>A)>cl!(JwV@;KP)#P;t9Mh1>!VB}&#vypjh7tp=&Pm!^ z+3fFADlPen+SZ>?pP}QSuj(AZ45ssAUx80!Ujvo4m==wl@I9Cw%+kOtl zjX~iA9?@b!-2NiqT>c_Eoc{SgANiGEDxyB?eI}2g2`C|wGmU^4$&Oh!K5i%p63mH7 z#e}JpiWKgVf&(*D*gW8kg2Eu9z;`E7xy6vu4GEUWZmYoP4D6OzS;_it+X&L6kn3}d zOabQ{h8&ZgU9xFANiPMwXy43<7~Fq{72eQsk?IaPTxg1IkMsq2`GW$cwI2n(31I|4 z8V(b#Ty)?EQ2=^G@uT*-5+zRK6xwbX0f!A-zw4oKS^)?xUqtz5KX93YJ7tXi5W9&| z2z2mbgTZ4(0+S=kg$OMxb_10Xk8d@SOIXX@c)<3SrEG#Bk0foYyQ{X}^VN67zK(OK z_-<)}ML2v7N`v`EPtPgn1LxnLgi0?GVUy-a@Og!ym8D3K6unRv3fx@WPeAq*`|{Q$ zJSrcLrBxWBhj{N{FP6DXZ-&}Kn75Q))`SppkO?97y3GRXVT-A{bx71OC0gv!Gld$9 zn}G6}$KLX^AdRytvNeUvQg41T8?{wsVjx&iVBoCI6YvVv1g>Q4Mp|i%hmF+nQ--e@ zeI|I9C$dDASYESHwy;r%8nsTPo4qgbfh_{iZK`8*eyHMleZ8l5ThjW1$s@$ZJ?*Iz zsJK4#v34PvWDf3>eb8vvfsC8-0yc^WQRs##FX>x(#yNx;zc#zq2By`Pm-!;CUdqh2 ziRIDF%k$L~>$SS7*fx+@9e}R8nH6=q7zB0wn$vV}+jJ0A;Is9oNIIbu5qLf->&N)x zO?4}#@A|D9m>9F<=Tn1y(}6SlM9XFQ>7`*#^}-?tq6%kPVUd9oC-+-U9?|vYEu5ri zy)7jBu}BlFP>Ws-i2IrG)g&TmHJMCAVKkWMIaZITunT=|fe2@NMt6hE%s%B05?l0e zhquZoIiB#F65x`x;yfZwXrF!j> zW7$$pPt6%syxsks~7A-4Gf`FX`VX^w7f=|Rh^iMNcB2AYj60#lZB#e;-d{dCZ z5NbKw5eTPZ=@gG~$rhmVaS1H1jd9_0OAyv>pqE>b>s{w7_m6^gV%I8F+C64N;4TEu z=7^MTWN%LM(-3_WCaz~X4+cf|uX!InmBz8Nrq%SIlg@i1A|Uo0QFqlumx~nJL?#v5 zp>d)2N!CoUK^s&J)l7{GkMkPbDCFEK91Jbc91jdhmneU0{h{t)UYpemR%8+1*;Ppc ztj0z8qP~T8QPCeiX1WdI3G_U`wOm)3D#KWM6@ZeSJ0c)cGQR!5r8Uz|tz4}7DDTN! zS)`LlsZ{D&v`T#xr7_?f#4ULlFZ6Ct_jjc1BO7le9dpX)_bw>!vr{9jA7 z&y&>9^#oFe`x!xLjV;_QK{6sN>$-}rR^pb~^;CTS!NH{=!bIp6p0qs<;rja_#~8&AK$esF`$T6?k)U64u| zEF{K=0#p9LU`-cKC#lziiRGOO;)}>OygdhQ?w)U`g$bjORh}%}S3M{2EfP!bo~Yx* z&s3QLSb-c|oMqoZl!Un&>D})L?viBu>xHI%-QrCEiSsZxhF<4IAYVPI1zuV)hDXCd zQ4<6M801jF>_`TClZB>J_q?;wB?Mr2pUe=UJQ{y8Kl-UJ)YdMDjf{CXvL&Ix2rJKm zdG-c9qfw>r2Ae&*0mcUnYYf2>+pjCN$*4ZetK09W@p=m-Icjj>ZVb-2-+8~M^PWhx zlar8ufH-i0|0liwcL`sZw{)I?jWytZRtDNsHEfnyk-gR`=$UmFa1HyUxtO&#vL%(X zG^T+ixsGVE@`ysINT7+gCB2{FDPr>thm_{a=<0hN9^oefgPXmY=oTzt1m(3?KZM^4 z)B7{-K+)8s{(5eiUl0Ga{g7&rkhw?HZ+ni7$;nT~O9TBIVKqXE5 zwImK1UxiZp99n{^h4ZT#M$4ER^>$t}lzm>Y67sQA%l(P{xG-wlBhhrNY?rohA{UZ) zcTi&z8d{PZDv406t)$*<+1G{LV)O)6O)V|qpaWGB9J#iT7q>?52nTX8G07q^41WiF zGC`>ou%4q9x`stXHN!r3WEAW5Am0k&Pljwt;X8f0j~m&X;QW%zT@CGg(H|5w$w$gI z8Gd}*Id&<5hb4V1N%q_aK~APDb@g22$=MD*XP#6XNx3Kzxrqy;z=ML8DJWj27^cyp zH-M-q24ljHX9odS7ZM4-@BRw9`}AciLwM|wL5enh-^@PwZZ%gE`B8ZR!{(TpxJv7LY*!cM(0n!I|~Dlivos%h9=- z6Ha!N!!FVdCF2)mzjNo1F_`{3kP1mFwC;U>&~6zvC&fd*QLW@b~4(-P>e(BN$5i& zg(Pe0u2Ft z!7CZQ3tBCiTP`{?PmQ^8=)5$B!1%BEJHRhrQ0cqBY&cYgR(T?M%!39|@fHW=qB&%# zMePpvJuP06VDf!zOhmYlnDVQfOj6Dc+HXX`=(j?HNh9G8KpDcX)nX=LQQ#@y4oy^kPV?@Xy6}0~| z2Ut{J;kVY6Q)y=`oR9rZT{R`!G)G;RW5?*vAwj5UUpZIG5eG0gy3I@VboqY;!XGIo zwFZ2OV)YTpb3Hl3K>cb^TEndvJiLFz)2H^JRuNgbIj}`;4$UJ}#QNI>_g8cB#C- zy;}dymh}a%m#3w0WW@!t;ApFJaeGSYm>$<1jG~{S;c{ z*Yja;U2@*r&`C04<#1XBgwx6DU z=x^gD2{U)Ff9j=U7j3?Fi@zarH2HM+<~M~8Xq zWU!js$zM8MG+i1Y(qmoc74+SM=&aSVLV*JT5xy<9|8s%(?>xx+imQVW0AOZq^3Nq9 zbt~s=PJ|aPZ{cX2Q!}L{Hx-#f+}d`4N*PG|ISqe8DZx6iKyAkh`0Q#qRawbojr+-i z09c$!(2(IyvTw@-)khs_g3XY)N6qu%m=>Sje0ajk$Dfm?+Frw+9zpfRIDUS#AF{L6v=;us6d|KS3tH1dWt#CP*#Yl=o;kR52!eQrpTiMB|3^UKG)5YZ5*sYloI_ z{o|4FkPyu}+oab|TLnH`wxrbpxJ06j!C5GIiriRja8o~d3SFCQJ*gk^Af67*~ADRmafOC0P6#zESTiJ!0cP zvv^+X=CcbguIHCT&#~|sond*AUQ6(wgIAV>rZCya$7*#hWTE_UEw9NLFFz4OUPP<* zffIy%L3D*&o&5*6E_}|f2U53vA={Naz3}hPjaG7D1o%zGy1?9a4hR~UNSxrBp=3jp z#@~W)*dcB>#phskGw_c(hvu6o_U+SzqE-g%+{6cUmbvYnvT88;7C@+D>5y$k$RzGJ zr(RtF#95@FGuacmLOjxEjRw8nVy0BRGhS{&&)gLa!P3VvqGb<}6`Uru5~l02e&SPJcSEn8^NiXv^1CKuCOZSD*Wl zJsW|Mn^g>%18HSHLh5GF5yZpa+y|!dRM8A_0cga130Wgqd=fut{Hv&Il9*VQ_L%iJwF>0#a*d(Jx^0*N;*pXru7 z)Ueal@}^kOo^e)uO?A||920N&i8J^*9414Cc@ix(Z+>gL5`J`Wmsrr`6OoawEVltW zyyAxLgsZr!w8?>})8nL(38k zK%MX09!;JXIx8U#mQHXPDT62vo?Fj!Cwif}JF>)RYC+%e^hHoHZ#!IisIh99{W_Gz z;l5#2*l0GZo2P+dNo2})zp3J(Igzt5> zW_B*k=GV&-`Q=hc-5ve zxe%@gKUz%cAUa@+Ruk4!wAt(e3z&?zd$@T3kK@4f(rX5($Y8iew=em&6z^VTQ0ynr zAw(}KF#DPMXG}lV5^RoAU>?5I&Z|MZ`ik8BX;&uKkSu-b70M+)okAsu-XO9MMZN9j zFTreE7viVz%CBJWPQsfkulx`U2*{KC4=3>l7pR}4rcdVW%=`9E*HScfw9>cMGqZG{ z1N>2>wY4@0m6aBSgT{O(7Ql&%3CX{GzrJn1Am4ZhK#$7&^=~vKZP_o1!q6Xp;o(s~ zBI6;!!F+;8MnXVBM8L+v!a~C!#>2(OC8a|lWI`q3AVGyD!9u0NK_Vl>W5!3}AVuRQ z!{DVrB_YHoC#NK&;i8~p!lL5Gp%*5i6Chv_qhS-I;#FYc7Uty<;1d$(mDc5w)#p_( z6=fojWF-*eW0w?QQ~gY*EI_Xy#I7j8ryCc~g|E6zfUaYhkz2I6qrbC$m{t*PgZw~H202G_5P^p zTdW;g?iihI9^Y)A+3qg;Hdve4Szj_-_kE&+X&I-HX-3+l~2= zt%aeDg~_9}>62d*+rQ?H*QXA**Uq=5uMSt&_f9tt&vwpkk1sBdt{yH9H_lE^FV1(L zFLxd;4=*k+&u$*C?w_6?FJE3>-Ur<4tIoKD(c4HX6&DgvbXhu{Qb`5?`jEBO zndJU*Nk5d_phN0l%$@D*ayEb0roe|$)3yrAc$+J>7BXR9YMD>RrO3S9LPl8E^7Ak|N8huR#O=+sFsX%IA%2FuxPz(oIL<(4dAAyv9%nU89>MMZbyv^!r!Q|*A z^m0vF!-W(x*}3E{s^z)Qq|epyyW-a3nFCmHwQDq{??Ii0GIKa4~ zfSTLYL*=WYk{P324l5t8$;zC+Qu|e*F9VOKYy8ibS00J_<%>-Z&kD`*63KSE@?vMv ziYbnTt+dL#vh?S-^?PZrLmm4L??V0StK)R<%>4uje}NLK$Xe~0+*dk6LYd;j;}e%| z%C#6Jw8^m-D&S=`GmFhr9sPP%+F@=f9C?UJ>WQ$LFNVvaR!?+f!JlbXP`DUbhBIrp zfMZRQc%OAFdA*&7!dUQ{%FQLM?osITF8py{Y-k9hd!TBF(kdf7&0nb8BV_3l)W7}! zyn3#rj(o(bU3oo~`l`LW^w8vezBv$$$LIBQ_y29-wcrjEm;`a#+0g3bZ3F{}3rh=? z3+np)=SUsp?J6l@MFARdX%V`AktE+(TxAJT*4^|7fwyUd7xiH))y32xFu55%xSe$a zU2#nHPyWAd90@^+C=tvST{q5eS5te{tfJD(;1ROz7Gl^QBq;g!6T0dNGFze_(k~sD6rx{?a8u3v4tUG$%fJ)6s33I>CZEtJ!G2|kw7KwMKmA=VzJbD z{c1-Fg(z1{2-w{q^Vbj)xym9x@DjjE911%|W<>@z*9Oo0@EM<*)6Vt$ydb4BPvFCt zElu_j?$fFU5ya;spD=i)teHLL2Oz;d9)3d0sIO(8t(LIC-82~qNL0!5V7}}ixVeP7 z^(?;D;znRG-JqJXDoR6L_70dZlRx{|?z`wK!{oZ50kg{+6+~(DFAK z^{;w=&R@LexqnL<#=m8~|El=snD*ZrxcX+iKPNl>(ELv^@9!!>N&j;M{0GV(X8Q9) i@;+(*mJq7{FonxXg1u#GfPg-|eQe+6A9$MgqyGa7Jc#%J literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/gh-36122.ods b/pandas/tests/io/data/excel/gh-36122.ods new file mode 100755 index 0000000000000000000000000000000000000000..3dfdaf976da4589b48754606c8136196e68132c4 GIT binary patch literal 8974 zcmdUVby$>Z_wFE|ASu#FmvnbXhvd*8HFVdIBLdRW-Cfe%Fd*Hapn!CDOGy7*@3_v+}#v?jb+>h=73b*LJ!a%zq8}u4WCgF*h*=J2E;LnG8h_ z+4wM{`|k7m84phiF~P$-JnwI?NdLg$uK~|=l!P(Y5*Don%qiST|GZo(!8RXOoxBTr z)*D=z-=@NDNJH5#WmdCE$J@l%kUPygHCanXoJ4qN;gg$A+vs9L%cB?P$H0YbHBy!J z+%@SNzd|!v+ZS$oDdg_zFa$)RP!aQ!6?c4|;Uz>9b)e&RMugiLi9qlyZ$U=oV^FOU zG@zCw@~u!PqHMpfqtCNOd6_P@xNkt7w#?n79)0MTPZq!a0bSV0x`fO1J3w;+hD7WOZ za*mpX+7<)Z>8tS)L&=DxE?UA2KOu<_whI!HQ)m*6kv>R<8{CysvWd3Ytj~b!XQ98tQ5gWN4 z-KfMFwZ%WVLp+N53=#T@ZY;g_MZdwTl~3^cukD;acknd4-ieJ19NwEhrgc9-J}A*E zy@9=7>-RZV1GRT+UH-3Y{rK)t0Xw=`8H4Xvdsllie3}#cw#CzL5l;L;u|$ACbOqZo zNwSU0wDm*r=q?X>ZVbw&v0`ttdf2YF<8x4z-3VgVTiC{gC z0Ma3Lu6!I!A)9dioX(0Hv+}o>C)B%;3(!d&!A@hZzs#fQWPra)@MbjHGNK{svay){ zAU+HhD<_4v0eZG8o;RRd#)8Qnt|P=rb5%146z$9xW_s-0n8&WQb>Z9uog8LlK0GdK z-ELOMK8W1nIl)ISj@&}SJjm${K_4e&~hdi!j}ZbM_RYQyA37xLoDXI^B@;$^W~EbkBGu8jnnIGL8vR9+7>DR#8H* zDep2YrWqQ7jozvjXa)ltEYL95wJ-<_Z%b5`;$E0;$|uuJTV|Z|m5)k)$NwEy*xME7 z0DiiXBr^rGget***xW&nY{L|LCC90QHB$@S@_A0gf*FlH&YS-8FW0mZ(cfw6)fKRW z>A?ZX3q%;kp3L*=m7$3Z)+e`dB>rAvHg!>89IOz_@G?KfKvq;@7la5*RcVpT6hNbm z)Hlm(wr$MY^Q&#fuhJPo&)+j(3MOYrHCR!7_S5ec7TrtY&HlImtMGl3qDCW0wq%nw znMY^PL=I><*dJW!ZG_OO4^sQad+aQgd$M5A^yqHUYOOMbOzrT6MX#Px-qglf7h}<1OdVs4? zWkV@~wVGz|*hg?`78C@J8LiNIWqP!#ob7Df-c~)9EW%)|Fm9EIh3eSWt)T9ZxS|4# zyiP)S-V)GAj`;J`fRk^6G|CCnQO_Epja*!8-y+7yBaKx!fnDOuR5e&0%-gCePgDqAxjCV`{TWeR!Hgq!`bJe9+4;TA127s;i;5EOR}S+-)`%SeYH>> z-+t2ku2bGG3MD3#VJg?F$efex)%dWL^6X4Dgef3lt(qoV4DvmJfls%gw9UgkM}_LW z4z#ZaD}H3Kta8g%CT{1@jW285Rw6DRZPFAgkm;&3e*`5fjX5Ed*&VIE<3VXjuf~}7 zhPyYiP>cdjrcV?f4cdqK4_vmM2OF2u{4a$u&o=|LNQ1r@ogZHd0xyGAlKrZ#5h)ej zb%=>}hdHNGtdW;?qLS|#8)efh%tEgS3h1Fj&Vx)Abs zA2w9f#Th}3BvW4Xkcu^e%|mv64WuQU30GUDw=^pD3i%9U|P z1=7@HB2ST79yOiQEJ;d#PRm){kdbZ@mFshPJVJrNJaTd~tbaaps*x4g8LVI?!;z_n z7W-+uj}=nigBQo4DDeu)sw2@}fRTcf!5$r0{`U}8bv@YTh#=M%n&OFQ>cBNv)tK|SAQ2#7 z`)v%-7R#;m^AI+d$L-Sv;i6?boyKo4yCRC_D1^#3vNGmWiFD{wLn|RD7QHxrKk{^rei^m%GrGwc6N8FkJD2oyk+FAG--1{DjnX$M!Jd`*_|)SKnQ+ zQ$9Axpn+7l1UY}7QG1lrHeWaLOyla5)H=vV+@D z6M{$S=QM|#`QKKnPR)ZqS?8{9<-i}=IqSwYN}eo`QmJ>rP16p)pdwyaCB9aD@%D3a z;w=iy3z}>{p51qVzVM-sL9leqTl#_N-ybTu$KYnSCIBpnfCUV1NjfPO<`GPXUIjLD zKvU-o=OYFAW?)}hOc2K%O16}{C=OyLDQLYlFmSFE=qRb$T|0rPeZQ+yTsKuxCvODh z$U4zWA`*0Jz|4l@48cMm+SPT6|c3{_AD|^-=f!IUk z8!On~_)U-z>uEsxy1sU^As*Meitv=aB1@lJ^P5ghhb7r7T6uN}$0Y^aF+1Gy@$pmO0nDp8jzc}j(M)T;TF&ewOm4&K=2bcco~9*oZ8D02cpk0^ zQ9`?c_3TXe@-;-w4yjTN{_9GR5ZWR|H5xph`A2ozsN3wo3s!T7>)HoWpiYr!soW^+ zD|rup%op_PX(juUQ_rbKJHe1RkiW56w+P$X7}f}*+?8C-f^Si)0;^vlmVX7NCpdSz zJwJbb(|}G?J07pID&va*DI0|~c~_Qh`ZeSlxL9=i*Ez9KN=fJd5dg5z{F%6p zXlvtmAAa8*6rmb2;Au|umNRv`7;{@@N}j#R;u2ZvM4~C~fT`3?EV6itZjv?_A>dZo z{(}YB9I;sIv9*PS4<1ePg5^lx4?8Lr+F)`FvfQ<-ZN`fL{TdX-gK285&>OKmbz zULrI2c5r|`HLI&qf%hf(;B0+_$sQG+vREZ4j^!A2MP(}&Sl-@pxfQ|CmGyirGoRV_ zHEK(g*r-VXHhFO0>0%{5hCVvudFsAPrteFsS?OeVdJYpK$+@kk0bMuvAYvV_Jef9$ z@g5=`y7{$%4?E6T1sWDB`P|Z7vPxo&Dxp$O>UN{?&e)5z9AsGEi7x1q(9X^ix~8xk$j0jLMDBfVHS}U^bkH9tl-A4 z5;b_C8y(artDRh*G4~-W(tlI99zS}Bmz@==j_g`FNGJT5!>KQ$hlcC>s(IZwe{h@xD{Metvw{GS|jE*C9PiIcCF;_Qf;`SHTx+-3+(qtFtc7UD^V z*f7IQ&u6h=#{)#v>Kr_SwFWu3_y|54l&)6LnYbB9^oje`dHllaGJR9Qg1RBjaI997 zyd}dyU{!*Kz#EjQbaFj|ip|iP8SDKv(C~Jawv&`+q{`%_u*E}KDe>xfq4{2-1EWk4 z!^W}6ATD~AS(DdQ(57&tf%$B<5xHUe=&VHa4y^RpY1Uw#a{8XNs+NXnkMQoYr=?iv zKyx)kdH3o#jw`~s>9XF->)BJ!rIO86SC?RT)Rz%CS$OCH-{ELV8q&B|c~<BgY~+|s3pwbB7E4hZ*A9g&gRu+;E!Ju(NnFX-^)DyAhQ}ffm1CchYMyD6KdK6^ za#vbq&=L(Hwe@_PV@mq|#I=d60 zhJS&`M1exN)xn!xkDlb~@rEt9p{8o_zyLFCe%zWirUH*&3r z7!>UpU7S?ZIvo6_j~``A>Xyepje6A8-b#Sl;t{$@CC@5Ts`n|=`x+t;r!=~2dp@Oz zCp!ZxUf9`Jl~mV_<3)BcLrU;Mn58T&#d2Udcg+dNU-QB3^H=W_%W(~5zw4-&)pZBx zj%*VrmaZA>mObv$7S6}7^J%)Y9U1bk^2wQ<3|?W^eoMO}`>r46c<&RT8+f^-p@#xn*+YjX@QL$9nEb_{}ZZf>VT(s(ckQs z^?px1i1eR!#o+bj#L=2!PRN12d3yRFsaGqU!f>Ok%t+K`X$lHQpO@O8p#O@uNtZBn zHkN6=RK&Rwj+Ewz?!DWToxIm2uA*S8!yXk^4cGAWaFI;PP(GTCm&daQ>KuqKH%X7g z6xK&iB1iJm(2@4YB2^kwd`Waxn{Z*~<-zG;&dP>gQ?atI7W*}yV>O&WqA1??eWeL< zgSy$<0~OM30?3w@g?v)o;!tfk@XNNpo-A)1Td4PN+9vYWp5IciGrX?~?KnN*i%)jx z^RQ46HZuTMHdu$~@Jt!INRzsSBq_7+pQpU0-HZ`d2PEpRSP>UG-mvus zdj&8nJ4dvB6k-UHxGgg+Ou`#TZRRfH{$UTS>cH3@xJeC zh`*1|2XR;;@i8jLz}gpm*<;w^p!Ea!eV9k@cqdU1^XcJnS~+)k67Q7N4goCdqwJDa zG24B_JqG6US09_L6rSLr?>WW`tRTxq=di7&8hSiWjKXEO+RwUjq;W6lM?htW~e-0~1#=o;RaNkwjH z${KyOtt3YBettmn!gPWsxwOUA3$Es}K+c&QyR&?gvWIKGgU0L-jr<{<&iY5G*^Q(g z@4;OE#wZM%`9Z~iMFbNYo>%kI2p;CyB2Nl6D8r&$iwpg_+E<_Z`E^-*vKe9>-Hp{$ zQ?$RC)PG4)t*56f-#JxqOO-foQlQ_IyCDIjWm_Tyt{@Op_iHV>Zn_6j;BLPO(u!uF zci>W+Bp(dN1LjijGN(?yBq!2rH+mAFEdTs6`eHP+-+dgXYn_j;ci^F+7#TX(gAS3t zLXuu6{FBa!I9Uctp`=um^M>JwqkyL8A2I1)e0gyxK;R_We!25>m0_PHhBIdpvdj}f zEPPNMMA$W>&O-B@9q5;76^jC*&aed2WwAx(f_V!~%s z_|Iq<>Dbtq*m>Dl`8oJ`xdr(I#QCK(BzU-G1bJjc1;m9!#6(|8NlL0o2nzH zloe$)On`dUnmUF$7H$?AFYUEu?e!FG^pzoIY97|QHl~L9md@r5u3#%;4{Lo-7h7{k zupPwL-8;}TFx<;h&DQ}G_S!ty*FE0H&iAeNJ0JhBu(05;=$NR;gqY}r#Kge3tl*U5 z#MI1)%<{PW`mFe%_i6FDnaM?2@kO~AWo2dWODalB%kwK5tBbR%%ZkctTbi32p=}+F zJtK|19~+04TSk}q+v+CYu3KfAKMI@z}}^J#H*>S%3pX?J^V=V<-(a`*Ur_x$E?XXET> z`|{iR)$!)R@yYSk^~LGo_0`GE&CUHh-`@6X6B6D{@{qKcu!`%{?iU#aWgw>4r^kWM zRovnHo_lA625J%H1Mv;vwN^;Yl%KFK1qZy&d8gC%fCam>7+uKfJ9y5TxI7hht{ z3;bmMx-&Z~hx}P+%-pjx&jQak>NJ%0Dk*s1*_}Ku4Age1amX$_PsNez9*)^MxEDdj zQD3LEDAaOV%t|}&QmZRnN)@?U@HeVm6SPh|Ow(iFfS^@gnA(gCW1ae&t~0qXLf^SE ztCa2En7_D4%Kq|g2uCNe#`cRYR`l?IX;Z}ouR*Cc<9r2n+qMiTWtRZEd>yh5ZA-0qH-4X+&#pT6HMfAP@EXZKp-6bckBFrEyFTwO*Ve2jo zD~Xn~=^#OuIOFl_s%5R#RDCXw9SQM127E?F#UMgQ20C0C(8+77MjFuYdHA0RJgoSd z`dU4eglfA2M0}O1>Qyp<-Q@p5pFmBKP%J5==D^4yf;?n&&G5N0ZC z^)J_tyLt0v+?r?LWt~e=gThdrqCxlM0M@E<@c^ywnh&T>5^&ZYVA`I`s#PBCF_ zPLbtK7@@y&jEXWa4=@3L7Nzgk=coOUsQ-!jc?SS+ujBqSmAmS{i_m{>|0favxR;@S zTG(CnpG^Ntl>Rf>y-57i2JWi=B>PpQ{xi?thX;E%jsN8NyJ-DqroZ<{;5SUair9bV zxqo~8G^gM2+>h)(g7Ne^`F# zxW6jA_aVbiQ@CUKukW}M#eZz#AIM+z!g~eyr-j|U>+k-X@s}R_Pt>p3qx(a`PlMcr zG(VNaf8u@(dhT;eKP?OGZ<(fl0)IWy{C$ArcW(062OJoL@=qhadrkk=ODoF2KfJ%~$=%2J&gn!G-Cz9=7`lA` literal 0 HcmV?d00001 diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index b610c5ec3a838..3410e957f893a 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -519,6 +519,23 @@ def test_reader_spaces(self, read_ext): ) tm.assert_frame_equal(actual, expected) + # gh-36122, gh-35802 + @pytest.mark.parametrize( + "basename,expected", + [ + ("gh-35802", DataFrame({"COLUMN": ["Test (1)"]})), + ("gh-36122", DataFrame(columns=["got 2nd sa"])), + ], + ) + def test_read_excel_ods_nested_xml(self, read_ext, basename, expected): + # see gh-35802 + engine = pd.read_excel.keywords["engine"] + if engine != "odf": + pytest.skip(f"Skipped for engine: {engine}") + + actual = pd.read_excel(basename + read_ext) + tm.assert_frame_equal(actual, expected) + def test_reading_all_sheets(self, read_ext): # Test reading all sheet names by setting sheet_name to None, # Ensure a dict is returned. From d5e233335b0c467cf4b1a174cd78d912fccb760e Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 18 Sep 2020 16:30:53 +0100 Subject: [PATCH 10/38] Backport PR #36378: DOC: move release note for #36175 (pt1) (#36398) --- doc/source/whatsnew/v1.1.3.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.3.rst b/doc/source/whatsnew/v1.1.3.rst index 7f5340b7022ce..7efffffcf9d32 100644 --- a/doc/source/whatsnew/v1.1.3.rst +++ b/doc/source/whatsnew/v1.1.3.rst @@ -18,7 +18,7 @@ Fixed regressions - Fixed regression in :class:`IntegerArray` unary plus and minus operations raising a ``TypeError`` (:issue:`36063`) - Fixed regression in :meth:`Series.__getitem__` incorrectly raising when the input was a tuple (:issue:`35534`) - Fixed regression in :meth:`Series.__getitem__` incorrectly raising when the input was a frozenset (:issue:`35747`) -- Fixed regression in :meth:`read_excel` with `engine="odf"` caused ``UnboundLocalError`` in some cases where cells had nested child nodes (:issue:`36122`, and :issue:`35802`) +- Fixed regression in :meth:`read_excel` with ``engine="odf"`` caused ``UnboundLocalError`` in some cases where cells had nested child nodes (:issue:`36122`,:issue:`35802`) - .. --------------------------------------------------------------------------- From 711f923976a54d5dfce1010e4e63f801211873b7 Mon Sep 17 00:00:00 2001 From: Fangchen Li Date: Fri, 18 Sep 2020 16:58:00 -0500 Subject: [PATCH 11/38] BLD/CI: support for 3.9 (#36402) --- .travis.yml | 3 --- asv_bench/asv.conf.json | 2 +- ci/build39.sh | 3 +-- ci/deps/azure-36-32bit.yaml | 2 +- ci/deps/azure-36-locale.yaml | 2 +- ci/deps/azure-36-locale_slow.yaml | 2 +- ci/deps/azure-36-minimum_versions.yaml | 2 +- ci/deps/azure-36-slow.yaml | 2 +- ci/deps/azure-37-locale.yaml | 2 +- ci/deps/azure-37-numpydev.yaml | 2 +- ci/deps/azure-macos-36.yaml | 2 +- ci/deps/azure-windows-36.yaml | 2 +- ci/deps/azure-windows-37.yaml | 2 +- ci/deps/travis-36-cov.yaml | 2 +- ci/deps/travis-36-locale.yaml | 2 +- ci/deps/travis-37-arm64.yaml | 2 +- ci/deps/travis-37.yaml | 2 +- ci/deps/travis-38.yaml | 2 +- doc/source/getting_started/install.rst | 2 +- doc/source/whatsnew/v1.1.3.rst | 15 +++++++++++++++ environment.yml | 2 +- pandas/_libs/writers.pyx | 8 ++------ pyproject.toml | 2 +- requirements-dev.txt | 2 +- setup.py | 3 ++- 25 files changed, 40 insertions(+), 32 deletions(-) diff --git a/.travis.yml b/.travis.yml index b016cf386098e..1e5ea21b0f2d9 100644 --- a/.travis.yml +++ b/.travis.yml @@ -62,9 +62,6 @@ matrix: - arch: arm64 env: - JOB="3.7, arm64" PYTEST_WORKERS=8 ENV_FILE="ci/deps/travis-37-arm64.yaml" PATTERN="(not slow and not network and not clipboard)" - - dist: bionic - env: - - JOB="3.9-dev" PATTERN="(not slow and not network and not clipboard)" before_install: diff --git a/asv_bench/asv.conf.json b/asv_bench/asv.conf.json index 4583fac85b776..3a9c251968a3e 100644 --- a/asv_bench/asv.conf.json +++ b/asv_bench/asv.conf.json @@ -39,7 +39,7 @@ // followed by the pip installed packages). "matrix": { "numpy": [], - "Cython": ["0.29.16"], + "Cython": ["0.29.21"], "matplotlib": [], "sqlalchemy": [], "scipy": [], diff --git a/ci/build39.sh b/ci/build39.sh index b9c76635df99b..f2ef11d5a71f4 100755 --- a/ci/build39.sh +++ b/ci/build39.sh @@ -3,8 +3,7 @@ sudo apt-get install build-essential gcc xvfb pip install --no-deps -U pip wheel setuptools -pip install numpy python-dateutil pytz pytest pytest-xdist hypothesis -pip install cython --pre # https://github.com/cython/cython/issues/3395 +pip install cython numpy python-dateutil pytz pytest pytest-xdist hypothesis python setup.py build_ext -inplace python -m pip install --no-build-isolation -e . diff --git a/ci/deps/azure-36-32bit.yaml b/ci/deps/azure-36-32bit.yaml index 15704cf0d5427..6deb10a408ca4 100644 --- a/ci/deps/azure-36-32bit.yaml +++ b/ci/deps/azure-36-32bit.yaml @@ -22,5 +22,5 @@ dependencies: # see comment above - pip - pip: - - cython>=0.29.16 + - cython>=0.29.21 - pytest>=5.0.1 diff --git a/ci/deps/azure-36-locale.yaml b/ci/deps/azure-36-locale.yaml index 536bb6f899773..72a64332b65db 100644 --- a/ci/deps/azure-36-locale.yaml +++ b/ci/deps/azure-36-locale.yaml @@ -6,7 +6,7 @@ dependencies: - python=3.6.* # tools - - cython>=0.29.16 + - cython>=0.29.21 - pytest>=5.0.1 - pytest-xdist>=1.21 - pytest-asyncio>=0.12.0 diff --git a/ci/deps/azure-36-locale_slow.yaml b/ci/deps/azure-36-locale_slow.yaml index c086b3651afc3..0a151944cdceb 100644 --- a/ci/deps/azure-36-locale_slow.yaml +++ b/ci/deps/azure-36-locale_slow.yaml @@ -6,7 +6,7 @@ dependencies: - python=3.6.* # tools - - cython>=0.29.16 + - cython>=0.29.21 - pytest>=5.0.1 - pytest-xdist>=1.21 - hypothesis>=3.58.0 diff --git a/ci/deps/azure-36-minimum_versions.yaml b/ci/deps/azure-36-minimum_versions.yaml index f5af7bcf36189..5dfde5fc89183 100644 --- a/ci/deps/azure-36-minimum_versions.yaml +++ b/ci/deps/azure-36-minimum_versions.yaml @@ -5,7 +5,7 @@ dependencies: - python=3.6.1 # tools - - cython=0.29.16 + - cython=0.29.21 - pytest=5.0.1 - pytest-xdist>=1.21 - hypothesis>=3.58.0 diff --git a/ci/deps/azure-36-slow.yaml b/ci/deps/azure-36-slow.yaml index 87bad59fa4873..7dd2f44b21c79 100644 --- a/ci/deps/azure-36-slow.yaml +++ b/ci/deps/azure-36-slow.yaml @@ -6,7 +6,7 @@ dependencies: - python=3.6.* # tools - - cython>=0.29.16 + - cython>=0.29.21 - pytest>=5.0.1 - pytest-xdist>=1.21 - hypothesis>=3.58.0 diff --git a/ci/deps/azure-37-locale.yaml b/ci/deps/azure-37-locale.yaml index 6f64c81f299d1..359f1055ec4d3 100644 --- a/ci/deps/azure-37-locale.yaml +++ b/ci/deps/azure-37-locale.yaml @@ -5,7 +5,7 @@ dependencies: - python=3.7.* # tools - - cython>=0.29.16 + - cython>=0.29.21 - pytest>=5.0.1 - pytest-xdist>=1.21 - pytest-asyncio diff --git a/ci/deps/azure-37-numpydev.yaml b/ci/deps/azure-37-numpydev.yaml index 5cb58756a6ac1..7248b1740058f 100644 --- a/ci/deps/azure-37-numpydev.yaml +++ b/ci/deps/azure-37-numpydev.yaml @@ -14,7 +14,7 @@ dependencies: - pytz - pip - pip: - - cython==0.29.16 # GH#34014 + - cython==0.29.21 # GH#34014 - "git+git://github.com/dateutil/dateutil.git" - "--extra-index-url https://pypi.anaconda.org/scipy-wheels-nightly/simple" - "--pre" diff --git a/ci/deps/azure-macos-36.yaml b/ci/deps/azure-macos-36.yaml index eeea249a19ca1..e2e47fe16e34e 100644 --- a/ci/deps/azure-macos-36.yaml +++ b/ci/deps/azure-macos-36.yaml @@ -31,6 +31,6 @@ dependencies: - xlwt - pip - pip: - - cython>=0.29.16 + - cython>=0.29.21 - pyreadstat - pyxlsb diff --git a/ci/deps/azure-windows-36.yaml b/ci/deps/azure-windows-36.yaml index 21b4e86918f3b..04fc1d8816712 100644 --- a/ci/deps/azure-windows-36.yaml +++ b/ci/deps/azure-windows-36.yaml @@ -6,7 +6,7 @@ dependencies: - python=3.6.* # tools - - cython>=0.29.16 + - cython>=0.29.21 - pytest>=5.0.1 - pytest-xdist>=1.21,<2.0.0 # GH 35737 - hypothesis>=3.58.0 diff --git a/ci/deps/azure-windows-37.yaml b/ci/deps/azure-windows-37.yaml index 4d134b43760fe..026d8e1d2844f 100644 --- a/ci/deps/azure-windows-37.yaml +++ b/ci/deps/azure-windows-37.yaml @@ -6,7 +6,7 @@ dependencies: - python=3.7.* # tools - - cython>=0.29.16 + - cython>=0.29.21 - pytest>=5.0.1 - pytest-xdist>=1.21,<2.0.0 # GH 35737 - hypothesis>=3.58.0 diff --git a/ci/deps/travis-36-cov.yaml b/ci/deps/travis-36-cov.yaml index 2457c04e67759..c380423c55f19 100644 --- a/ci/deps/travis-36-cov.yaml +++ b/ci/deps/travis-36-cov.yaml @@ -6,7 +6,7 @@ dependencies: - python=3.6.* # tools - - cython>=0.29.16 + - cython>=0.29.21 - pytest>=5.0.1 - pytest-xdist>=1.21 - hypothesis>=3.58.0 diff --git a/ci/deps/travis-36-locale.yaml b/ci/deps/travis-36-locale.yaml index 8f7e29abc5f3b..21176054ae0d7 100644 --- a/ci/deps/travis-36-locale.yaml +++ b/ci/deps/travis-36-locale.yaml @@ -6,7 +6,7 @@ dependencies: - python=3.6.* # tools - - cython>=0.29.16 + - cython>=0.29.21 - pytest>=5.0.1 - pytest-xdist>=1.21 - hypothesis>=3.58.0 diff --git a/ci/deps/travis-37-arm64.yaml b/ci/deps/travis-37-arm64.yaml index 5cb53489be225..dcea6b7bd32e8 100644 --- a/ci/deps/travis-37-arm64.yaml +++ b/ci/deps/travis-37-arm64.yaml @@ -6,7 +6,7 @@ dependencies: - python=3.7.* # tools - - cython>=0.29.13 + - cython>=0.29.21 - pytest>=5.0.1 - pytest-xdist>=1.21 - hypothesis>=3.58.0 diff --git a/ci/deps/travis-37.yaml b/ci/deps/travis-37.yaml index e896233aac63c..6ded5b73a5d95 100644 --- a/ci/deps/travis-37.yaml +++ b/ci/deps/travis-37.yaml @@ -6,7 +6,7 @@ dependencies: - python=3.7.* # tools - - cython>=0.29.16 + - cython>=0.29.21 - pytest>=5.0.1 - pytest-xdist>=1.21 - hypothesis>=3.58.0 diff --git a/ci/deps/travis-38.yaml b/ci/deps/travis-38.yaml index b879c0f81dab2..874c8dd96d008 100644 --- a/ci/deps/travis-38.yaml +++ b/ci/deps/travis-38.yaml @@ -6,7 +6,7 @@ dependencies: - python=3.8.* # tools - - cython>=0.29.16 + - cython>=0.29.21 - pytest>=5.0.1 - pytest-xdist>=1.21 - hypothesis>=3.58.0 diff --git a/doc/source/getting_started/install.rst b/doc/source/getting_started/install.rst index b79a9cd872c47..de6bfa62d1e7e 100644 --- a/doc/source/getting_started/install.rst +++ b/doc/source/getting_started/install.rst @@ -18,7 +18,7 @@ Instructions for installing from source, Python version support ---------------------- -Officially Python 3.6.1 and above, 3.7, and 3.8. +Officially Python 3.6.1 and above, 3.7, 3.8, and 3.9. Installing pandas ----------------- diff --git a/doc/source/whatsnew/v1.1.3.rst b/doc/source/whatsnew/v1.1.3.rst index 7efffffcf9d32..c920e517ea303 100644 --- a/doc/source/whatsnew/v1.1.3.rst +++ b/doc/source/whatsnew/v1.1.3.rst @@ -10,6 +10,21 @@ including other versions of pandas. .. --------------------------------------------------------------------------- +Enhancements +~~~~~~~~~~~~ + +Added support for new Python version +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Pandas 1.1.3 now supports Python 3.9 (:issue:`36296`). + +Development Changes +^^^^^^^^^^^^^^^^^^^ + +- The minimum version of Cython is now the most recent bug-fix version (0.29.21) (:issue:`36296`). + +.. --------------------------------------------------------------------------- + .. _whatsnew_113.regressions: Fixed regressions diff --git a/environment.yml b/environment.yml index 9efb995e29497..2df55d7e02e1b 100644 --- a/environment.yml +++ b/environment.yml @@ -13,7 +13,7 @@ dependencies: - asv # building - - cython>=0.29.16 + - cython>=0.29.21 # code checks - black=19.10b0 diff --git a/pandas/_libs/writers.pyx b/pandas/_libs/writers.pyx index 40c39aabb7a7a..f6823c3cb0d3f 100644 --- a/pandas/_libs/writers.pyx +++ b/pandas/_libs/writers.pyx @@ -1,11 +1,7 @@ import cython -from cython import Py_ssize_t - -from cpython.bytes cimport PyBytes_GET_SIZE -from cpython.unicode cimport PyUnicode_GET_SIZE - import numpy as np +from cpython cimport PyBytes_GET_SIZE, PyUnicode_GET_LENGTH from numpy cimport ndarray, uint8_t ctypedef fused pandas_string: @@ -144,7 +140,7 @@ cpdef inline Py_ssize_t word_len(object val): Py_ssize_t l = 0 if isinstance(val, str): - l = PyUnicode_GET_SIZE(val) + l = PyUnicode_GET_LENGTH(val) elif isinstance(val, bytes): l = PyBytes_GET_SIZE(val) diff --git a/pyproject.toml b/pyproject.toml index f282f2a085000..098a38958b5cc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ requires = [ "setuptools", "wheel", - "Cython>=0.29.16,<3", # Note: sync with setup.py + "Cython>=0.29.21,<3", # Note: sync with setup.py "numpy==1.15.4; python_version=='3.6' and platform_system!='AIX'", "numpy==1.15.4; python_version=='3.7' and platform_system!='AIX'", "numpy==1.17.3; python_version>='3.8' and platform_system!='AIX'", diff --git a/requirements-dev.txt b/requirements-dev.txt index c0dd77cd73ddc..19bc904265ea4 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -5,7 +5,7 @@ numpy>=1.15,<1.19.0 python-dateutil>=2.7.3 pytz asv -cython>=0.29.16 +cython>=0.29.21 black==19.10b0 cpplint flake8<3.8.0 diff --git a/setup.py b/setup.py index 22da02360619e..5555592de45e0 100755 --- a/setup.py +++ b/setup.py @@ -34,7 +34,7 @@ def is_platform_mac(): min_numpy_ver = "1.15.4" -min_cython_ver = "0.29.16" # note: sync with pyproject.toml +min_cython_ver = "0.29.21" # note: sync with pyproject.toml try: import Cython @@ -200,6 +200,7 @@ def build_extensions(self): "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", "Programming Language :: Cython", "Topic :: Scientific/Engineering", ] From eecc0d15f76d5634e7f34e2e259c2edf729af762 Mon Sep 17 00:00:00 2001 From: MeeseeksMachine <39504233+meeseeksmachine@users.noreply.github.com> Date: Sat, 19 Sep 2020 03:18:18 -0700 Subject: [PATCH 12/38] Backport PR #36371: BUG: Fix MultiIndex column stacking with dupe names (#36396) Co-authored-by: Daniel Saxton <2658661+dsaxton@users.noreply.github.com> --- doc/source/whatsnew/v1.1.3.rst | 1 + pandas/core/reshape/reshape.py | 14 +++++--------- pandas/tests/frame/test_reshape.py | 13 +++++++++++++ 3 files changed, 19 insertions(+), 9 deletions(-) diff --git a/doc/source/whatsnew/v1.1.3.rst b/doc/source/whatsnew/v1.1.3.rst index c920e517ea303..9bb063b2b1590 100644 --- a/doc/source/whatsnew/v1.1.3.rst +++ b/doc/source/whatsnew/v1.1.3.rst @@ -45,6 +45,7 @@ Bug fixes - Bug in :func:`read_spss` where passing a ``pathlib.Path`` as ``path`` would raise a ``TypeError`` (:issue:`33666`) - Bug in :meth:`Series.str.startswith` and :meth:`Series.str.endswith` with ``category`` dtype not propagating ``na`` parameter (:issue:`36241`) - Bug in :class:`Series` constructor where integer overflow would occur for sufficiently large scalar inputs when an index was provided (:issue:`36291`) +- Bug in :meth:`DataFrame.stack` raising a ``ValueError`` when stacking :class:`MultiIndex` columns based on position when the levels had duplicate names (:issue:`36353`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 391313fbb5283..1d4c9a7826178 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -588,19 +588,15 @@ def _stack_multi_columns(frame, level_num=-1, dropna=True): def _convert_level_number(level_num, columns): """ Logic for converting the level number to something we can safely pass - to swaplevel: + to swaplevel. - We generally want to convert the level number into a level name, except - when columns do not have names, in which case we must leave as a level - number + If `level_num` matches a column name return the name from + position `level_num`, otherwise return `level_num`. """ if level_num in columns.names: return columns.names[level_num] - else: - if columns.names[level_num] is None: - return level_num - else: - return columns.names[level_num] + + return level_num this = frame.copy() diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py index 6a8f1e7c1aca2..1b452658cc219 100644 --- a/pandas/tests/frame/test_reshape.py +++ b/pandas/tests/frame/test_reshape.py @@ -1302,3 +1302,16 @@ def test_unstacking_multi_index_df(): ), ) tm.assert_frame_equal(result, expected) + + +def test_stack_positional_level_duplicate_column_names(): + # https://github.com/pandas-dev/pandas/issues/36353 + columns = pd.MultiIndex.from_product([("x", "y"), ("y", "z")], names=["a", "a"]) + df = pd.DataFrame([[1, 1, 1, 1]], columns=columns) + result = df.stack(0) + + new_columns = pd.Index(["y", "z"], name="a") + new_index = pd.MultiIndex.from_tuples([(0, "x"), (0, "y")], names=[None, "a"]) + expected = pd.DataFrame([[1, 1], [1, 1]], index=new_index, columns=new_columns) + + tm.assert_frame_equal(result, expected) From be9b9dced1cb0e75c70c1bd64660e881b9c5170c Mon Sep 17 00:00:00 2001 From: MeeseeksMachine <39504233+meeseeksmachine@users.noreply.github.com> Date: Sat, 19 Sep 2020 03:20:13 -0700 Subject: [PATCH 13/38] Backport PR #36440: REGR: Series[numeric] comparison with str raising on numexpr path (#36473) Co-authored-by: jbrockmendel --- doc/source/whatsnew/v1.1.3.rst | 2 +- pandas/core/indexes/base.py | 6 +++++- pandas/core/ops/array_ops.py | 5 +++++ pandas/tests/arithmetic/test_numeric.py | 20 ++++++++++++++++++++ pandas/tests/indexes/test_numpy_compat.py | 15 --------------- 5 files changed, 31 insertions(+), 17 deletions(-) diff --git a/doc/source/whatsnew/v1.1.3.rst b/doc/source/whatsnew/v1.1.3.rst index 9bb063b2b1590..7e52e4c53d6ff 100644 --- a/doc/source/whatsnew/v1.1.3.rst +++ b/doc/source/whatsnew/v1.1.3.rst @@ -34,7 +34,7 @@ Fixed regressions - Fixed regression in :meth:`Series.__getitem__` incorrectly raising when the input was a tuple (:issue:`35534`) - Fixed regression in :meth:`Series.__getitem__` incorrectly raising when the input was a frozenset (:issue:`35747`) - Fixed regression in :meth:`read_excel` with ``engine="odf"`` caused ``UnboundLocalError`` in some cases where cells had nested child nodes (:issue:`36122`,:issue:`35802`) -- +- Fixed regression in :class:`DataFrame` and :class:`Series` comparisons between numeric arrays and strings (:issue:`35700`,:issue:`36377`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 32bbdf425acab..a18f7bdccd0d0 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -135,10 +135,14 @@ def cmp_method(self, other): with np.errstate(all="ignore"): result = ops.comp_method_OBJECT_ARRAY(op, self._values, other) - else: + elif is_interval_dtype(self.dtype): with np.errstate(all="ignore"): result = op(self._values, np.asarray(other)) + else: + with np.errstate(all="ignore"): + result = ops.comparison_op(self._values, np.asarray(other), op) + if is_bool_dtype(result): return result return ops.invalid_comparison(self, other, op) diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py index 3379ee56b6ad0..31e8d007cae76 100644 --- a/pandas/core/ops/array_ops.py +++ b/pandas/core/ops/array_ops.py @@ -23,6 +23,7 @@ is_bool_dtype, is_integer_dtype, is_list_like, + is_numeric_v_string_like, is_object_dtype, is_scalar, ) @@ -235,6 +236,10 @@ def comparison_op(left: ArrayLike, right: Any, op) -> ArrayLike: else: res_values = np.zeros(lvalues.shape, dtype=bool) + elif is_numeric_v_string_like(lvalues, rvalues): + # GH#36377 going through the numexpr path would incorrectly raise + return invalid_comparison(lvalues, rvalues, op) + elif is_object_dtype(lvalues.dtype): res_values = comp_method_OBJECT_ARRAY(op, lvalues, rvalues) diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py index 2155846b271fc..84ff6e6f29bca 100644 --- a/pandas/tests/arithmetic/test_numeric.py +++ b/pandas/tests/arithmetic/test_numeric.py @@ -89,6 +89,26 @@ def test_compare_invalid(self): b.name = pd.Timestamp("2000-01-01") tm.assert_series_equal(a / b, 1 / (b / a)) + def test_numeric_cmp_string_numexpr_path(self, box): + # GH#36377, GH#35700 + xbox = box if box is not pd.Index else np.ndarray + + obj = pd.Series(np.random.randn(10 ** 5)) + obj = tm.box_expected(obj, box, transpose=False) + + result = obj == "a" + + expected = pd.Series(np.zeros(10 ** 5, dtype=bool)) + expected = tm.box_expected(expected, xbox, transpose=False) + tm.assert_equal(result, expected) + + result = obj != "a" + tm.assert_equal(result, ~expected) + + msg = "Invalid comparison between dtype=float64 and str" + with pytest.raises(TypeError, match=msg): + obj < "a" + # ------------------------------------------------------------------ # Numeric dtypes Arithmetic with Datetime/Timedelta Scalar diff --git a/pandas/tests/indexes/test_numpy_compat.py b/pandas/tests/indexes/test_numpy_compat.py index 043539c173427..4df23d43ec1e1 100644 --- a/pandas/tests/indexes/test_numpy_compat.py +++ b/pandas/tests/indexes/test_numpy_compat.py @@ -114,18 +114,3 @@ def test_numpy_ufuncs_other(index, func): else: with pytest.raises(Exception): func(index) - - -def test_elementwise_comparison_warning(): - # https://github.com/pandas-dev/pandas/issues/22698#issuecomment-458968300 - # np.array([1, 2]) == 'a' returns False, and produces a - # FutureWarning that it'll be [False, False] in the future. - # We just want to ensure that comes through. - # When NumPy dev actually enforces this change, we'll need to skip - # this test. - idx = Index([1, 2]) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = idx == "a" - - expected = np.array([False, False]) - tm.assert_numpy_array_equal(result, expected) From 1aba960f6d91b4eeb6ed43c1bb7b0f1fd67b263e Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Sat, 19 Sep 2020 12:14:40 +0100 Subject: [PATCH 14/38] Backport PR #36266:: BUG: fix isin with nans and large arrays (#36474) Co-authored-by: Hans --- doc/source/whatsnew/v1.1.3.rst | 1 + pandas/core/algorithms.py | 7 ++++++- pandas/tests/test_algos.py | 18 +++++++++++++++++- 3 files changed, 24 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.1.3.rst b/doc/source/whatsnew/v1.1.3.rst index 7e52e4c53d6ff..19ed4d171af13 100644 --- a/doc/source/whatsnew/v1.1.3.rst +++ b/doc/source/whatsnew/v1.1.3.rst @@ -46,6 +46,7 @@ Bug fixes - Bug in :meth:`Series.str.startswith` and :meth:`Series.str.endswith` with ``category`` dtype not propagating ``na`` parameter (:issue:`36241`) - Bug in :class:`Series` constructor where integer overflow would occur for sufficiently large scalar inputs when an index was provided (:issue:`36291`) - Bug in :meth:`DataFrame.stack` raising a ``ValueError`` when stacking :class:`MultiIndex` columns based on position when the levels had duplicate names (:issue:`36353`) +- Bug in :meth:`Series.isin` and :meth:`DataFrame.isin` when using ``NaN`` and a row length above 1,000,000 (:issue:`22205`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 856b4ead3f3cc..67ab3a8548f21 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -438,7 +438,12 @@ def isin(comps: AnyArrayLike, values: AnyArrayLike) -> np.ndarray: # GH16012 # Ensure np.in1d doesn't get object types or it *may* throw an exception if len(comps) > 1_000_000 and not is_object_dtype(comps): - f = np.in1d + # If the the values include nan we need to check for nan explicitly + # since np.nan it not equal to np.nan + if np.isnan(values).any(): + f = lambda c, v: np.logical_or(np.in1d(c, v), np.isnan(c)) + else: + f = np.in1d elif is_integer_dtype(comps): try: values = values.astype("int64", copy=False) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 326c926238f89..a8a55418a619a 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -787,7 +787,6 @@ def test_i8(self): tm.assert_numpy_array_equal(result, expected) def test_large(self): - s = pd.date_range("20000101", periods=2000000, freq="s").values result = algos.isin(s, s[0:2]) expected = np.zeros(len(s), dtype=bool) @@ -827,6 +826,23 @@ def test_same_nan_is_in(self): result = algos.isin(comps, values) tm.assert_numpy_array_equal(expected, result) + def test_same_nan_is_in_large(self): + # https://github.com/pandas-dev/pandas/issues/22205 + s = np.tile(1.0, 1_000_001) + s[0] = np.nan + result = algos.isin(s, [np.nan, 1]) + expected = np.ones(len(s), dtype=bool) + tm.assert_numpy_array_equal(result, expected) + + def test_same_nan_is_in_large_series(self): + # https://github.com/pandas-dev/pandas/issues/22205 + s = np.tile(1.0, 1_000_001) + series = pd.Series(s) + s[0] = np.nan + result = series.isin([np.nan, 1]) + expected = pd.Series(np.ones(len(s), dtype=bool)) + tm.assert_series_equal(result, expected) + def test_same_object_is_in(self): # GH 22160 # there could be special treatment for nans From d05a9caf8fd5ec2c4d39394b30c1c4f2451e7d3b Mon Sep 17 00:00:00 2001 From: MeeseeksMachine <39504233+meeseeksmachine@users.noreply.github.com> Date: Sat, 19 Sep 2020 08:07:16 -0700 Subject: [PATCH 15/38] Backport PR #36385: BUG: Always cast to Categorical in lexsort_indexer (#36477) Co-authored-by: Daniel Saxton <2658661+dsaxton@users.noreply.github.com> --- doc/source/whatsnew/v1.1.3.rst | 1 + pandas/core/sorting.py | 9 +-------- .../tests/frame/methods/test_sort_values.py | 20 +++++++++++++++++++ 3 files changed, 22 insertions(+), 8 deletions(-) diff --git a/doc/source/whatsnew/v1.1.3.rst b/doc/source/whatsnew/v1.1.3.rst index 19ed4d171af13..7d658215d7b76 100644 --- a/doc/source/whatsnew/v1.1.3.rst +++ b/doc/source/whatsnew/v1.1.3.rst @@ -45,6 +45,7 @@ Bug fixes - Bug in :func:`read_spss` where passing a ``pathlib.Path`` as ``path`` would raise a ``TypeError`` (:issue:`33666`) - Bug in :meth:`Series.str.startswith` and :meth:`Series.str.endswith` with ``category`` dtype not propagating ``na`` parameter (:issue:`36241`) - Bug in :class:`Series` constructor where integer overflow would occur for sufficiently large scalar inputs when an index was provided (:issue:`36291`) +- Bug in :meth:`DataFrame.sort_values` raising an ``AttributeError`` when sorting on a key that casts column to categorical dtype (:issue:`36383`) - Bug in :meth:`DataFrame.stack` raising a ``ValueError`` when stacking :class:`MultiIndex` columns based on position when the levels had duplicate names (:issue:`36353`) - Bug in :meth:`Series.isin` and :meth:`DataFrame.isin` when using ``NaN`` and a row length above 1,000,000 (:issue:`22205`) diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index ee73aa42701b0..c090531de4075 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -9,7 +9,6 @@ from pandas.core.dtypes.common import ( ensure_int64, ensure_platform_int, - is_categorical_dtype, is_extension_array_dtype, ) from pandas.core.dtypes.generic import ABCMultiIndex @@ -227,13 +226,7 @@ def lexsort_indexer( keys = [ensure_key_mapped(k, key) for k in keys] for k, order in zip(keys, orders): - # we are already a Categorical - if is_categorical_dtype(k): - cat = k - - # create the Categorical - else: - cat = Categorical(k, ordered=True) + cat = Categorical(k, ordered=True) if na_position not in ["last", "first"]: raise ValueError(f"invalid na_position: {na_position}") diff --git a/pandas/tests/frame/methods/test_sort_values.py b/pandas/tests/frame/methods/test_sort_values.py index c60e7e3b1bdb6..0ca232ec433e7 100644 --- a/pandas/tests/frame/methods/test_sort_values.py +++ b/pandas/tests/frame/methods/test_sort_values.py @@ -691,3 +691,23 @@ def test_sort_values_key_dict_axis(self): result = df.sort_values(1, key=lambda col: -col, axis=1) expected = df.loc[:, ::-1] tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("ordered", [True, False]) + def test_sort_values_key_casts_to_categorical(self, ordered): + # https://github.com/pandas-dev/pandas/issues/36383 + categories = ["c", "b", "a"] + df = pd.DataFrame({"x": [1, 1, 1], "y": ["a", "b", "c"]}) + + def sorter(key): + if key.name == "y": + return pd.Series( + pd.Categorical(key, categories=categories, ordered=ordered) + ) + return key + + result = df.sort_values(by=["x", "y"], key=sorter) + expected = pd.DataFrame( + {"x": [1, 1, 1], "y": ["c", "b", "a"]}, index=pd.Index([2, 1, 0]) + ) + + tm.assert_frame_equal(result, expected) From 4d5ff7e18ed8d1e816f87a60671f481b772d8cbe Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 21 Sep 2020 10:47:09 +0200 Subject: [PATCH 16/38] BUG: Fix astype from float32 to string (#36464) (#36519) Co-authored-by: Daniel Saxton <2658661+dsaxton@users.noreply.github.com> --- doc/source/whatsnew/v1.1.3.rst | 1 + pandas/_libs/lib.pyx | 3 ++- pandas/core/arrays/string_.py | 4 +--- pandas/tests/arrays/string_/test_string.py | 9 +++++++++ pandas/tests/series/methods/test_astype.py | 9 +++++++++ 5 files changed, 22 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v1.1.3.rst b/doc/source/whatsnew/v1.1.3.rst index 7d658215d7b76..72937141c2870 100644 --- a/doc/source/whatsnew/v1.1.3.rst +++ b/doc/source/whatsnew/v1.1.3.rst @@ -47,6 +47,7 @@ Bug fixes - Bug in :class:`Series` constructor where integer overflow would occur for sufficiently large scalar inputs when an index was provided (:issue:`36291`) - Bug in :meth:`DataFrame.sort_values` raising an ``AttributeError`` when sorting on a key that casts column to categorical dtype (:issue:`36383`) - Bug in :meth:`DataFrame.stack` raising a ``ValueError`` when stacking :class:`MultiIndex` columns based on position when the levels had duplicate names (:issue:`36353`) +- Bug in :meth:`Series.astype` showing too much precision when casting from ``np.float32`` to string dtype (:issue:`36451`) - Bug in :meth:`Series.isin` and :meth:`DataFrame.isin` when using ``NaN`` and a row length above 1,000,000 (:issue:`22205`) .. --------------------------------------------------------------------------- diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index eadfcefaac73d..6bf0aba128e39 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -650,11 +650,12 @@ cpdef ndarray[object] ensure_string_array( Py_ssize_t i = 0, n = len(arr) result = np.asarray(arr, dtype="object") + if copy and result is arr: result = result.copy() for i in range(n): - val = result[i] + val = arr[i] if not checknull(val): result[i] = str(val) else: diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index a4778869aee24..b5a83c17a64f0 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -199,11 +199,9 @@ def _from_sequence(cls, scalars, dtype=None, copy=False): if dtype: assert dtype == "string" - result = np.asarray(scalars, dtype="object") - # convert non-na-likes to str, and nan-likes to StringDtype.na_value result = lib.ensure_string_array( - result, na_value=StringDtype.na_value, copy=copy + scalars, na_value=StringDtype.na_value, copy=copy ) return cls(result) diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index efd5d29ae0717..56a8e21edd004 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -336,3 +336,12 @@ def test_memory_usage(): series = pd.Series(["a", "b", "c"], dtype="string") assert 0 < series.nbytes <= series.memory_usage() < series.memory_usage(deep=True) + + +@pytest.mark.parametrize("dtype", [np.float16, np.float32, np.float64]) +def test_astype_from_float_dtype(dtype): + # https://github.com/pandas-dev/pandas/issues/36451 + s = pd.Series([0.1], dtype=dtype) + result = s.astype("string") + expected = pd.Series(["0.1"], dtype="string") + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/methods/test_astype.py b/pandas/tests/series/methods/test_astype.py index b9d90a9fc63dd..7449d8d65ef96 100644 --- a/pandas/tests/series/methods/test_astype.py +++ b/pandas/tests/series/methods/test_astype.py @@ -1,3 +1,4 @@ +import numpy as np import pytest from pandas import Interval, Series, Timestamp, date_range @@ -46,3 +47,11 @@ def test_astype_ignores_errors_for_extension_dtypes(self, values, errors): msg = "(Cannot cast)|(could not convert)" with pytest.raises((ValueError, TypeError), match=msg): values.astype(float, errors=errors) + + @pytest.mark.parametrize("dtype", [np.float16, np.float32, np.float64]) + def test_astype_from_float_to_str(self, dtype): + # https://github.com/pandas-dev/pandas/issues/36451 + s = Series([0.1], dtype=dtype) + result = s.astype(str) + expected = Series(["0.1"]) + tm.assert_series_equal(result, expected) From db4d6be4307dee51af221d7800d49d86679381b3 Mon Sep 17 00:00:00 2001 From: MeeseeksMachine <39504233+meeseeksmachine@users.noreply.github.com> Date: Tue, 22 Sep 2020 05:49:35 -0700 Subject: [PATCH 17/38] Backport PR #36532: BUG: Fix issue in preserving index name on empty DataFrame (#36545) Co-authored-by: Irv Lustig --- doc/source/whatsnew/v1.1.3.rst | 1 + pandas/core/frame.py | 3 ++- pandas/tests/indexing/test_partial.py | 8 ++++++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.3.rst b/doc/source/whatsnew/v1.1.3.rst index 72937141c2870..e3a96c69918db 100644 --- a/doc/source/whatsnew/v1.1.3.rst +++ b/doc/source/whatsnew/v1.1.3.rst @@ -35,6 +35,7 @@ Fixed regressions - Fixed regression in :meth:`Series.__getitem__` incorrectly raising when the input was a frozenset (:issue:`35747`) - Fixed regression in :meth:`read_excel` with ``engine="odf"`` caused ``UnboundLocalError`` in some cases where cells had nested child nodes (:issue:`36122`,:issue:`35802`) - Fixed regression in :class:`DataFrame` and :class:`Series` comparisons between numeric arrays and strings (:issue:`35700`,:issue:`36377`) +- Fixed regression when setting empty :class:`DataFrame` column to a :class:`Series` in preserving name of index in frame (:issue:`36527`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9af9c19392ef7..0cbcb0ce3d700 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3172,7 +3172,8 @@ def _ensure_valid_index(self, value): # GH31368 preserve name of index index_copy = value.index.copy() - index_copy.name = self.index.name + if self.index.name is not None: + index_copy.name = self.index.name self._mgr = self._mgr.reindex_axis(index_copy, axis=1, fill_value=np.nan) diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index 7afbbc2b9ab2b..72bc13e67c040 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -672,3 +672,11 @@ def test_index_name_empty(self): ) tm.assert_frame_equal(df, expected) + + # GH 36527 + df = pd.DataFrame() + series = pd.Series(1.23, index=pd.RangeIndex(4, name="series_index")) + df["series"] = series + expected = pd.DataFrame( + {"series": [1.23] * 4}, index=pd.RangeIndex(4, name="series_index") + ) From da2f3a866a26ccf80a11c31af6d8746c50789bd1 Mon Sep 17 00:00:00 2001 From: MeeseeksMachine <39504233+meeseeksmachine@users.noreply.github.com> Date: Tue, 22 Sep 2020 08:30:53 -0700 Subject: [PATCH 18/38] Backport PR #36546: TST: add missing assert (#36547) Co-authored-by: Joris Van den Bossche --- pandas/tests/indexing/test_partial.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index 72bc13e67c040..337ec683ee745 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -680,3 +680,4 @@ def test_index_name_empty(self): expected = pd.DataFrame( {"series": [1.23] * 4}, index=pd.RangeIndex(4, name="series_index") ) + tm.assert_frame_equal(df, expected) From 6cf0735f398d218a60ca8650fed1d30dd114f4ad Mon Sep 17 00:00:00 2001 From: MeeseeksMachine <39504233+meeseeksmachine@users.noreply.github.com> Date: Wed, 23 Sep 2020 04:31:47 -0700 Subject: [PATCH 19/38] Backport PR #36535: Regr/period range large value/issue 36430 (#36572) Co-authored-by: nrebena --- doc/source/whatsnew/v1.1.3.rst | 1 + pandas/_libs/tslibs/period.pyx | 3 ++- pandas/tests/scalar/period/test_period.py | 7 +++++++ 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.3.rst b/doc/source/whatsnew/v1.1.3.rst index e3a96c69918db..e3b0f59c3edcc 100644 --- a/doc/source/whatsnew/v1.1.3.rst +++ b/doc/source/whatsnew/v1.1.3.rst @@ -36,6 +36,7 @@ Fixed regressions - Fixed regression in :meth:`read_excel` with ``engine="odf"`` caused ``UnboundLocalError`` in some cases where cells had nested child nodes (:issue:`36122`,:issue:`35802`) - Fixed regression in :class:`DataFrame` and :class:`Series` comparisons between numeric arrays and strings (:issue:`35700`,:issue:`36377`) - Fixed regression when setting empty :class:`DataFrame` column to a :class:`Series` in preserving name of index in frame (:issue:`36527`) +- Fixed regression in :class:`Period` incorrect value for ordinal over the maximum timestamp (:issue:`36430`) .. --------------------------------------------------------------------------- diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 86b6533f5caf5..27402c8d255b6 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -861,6 +861,7 @@ cdef int64_t get_time_nanos(int freq, int64_t unix_date, int64_t ordinal) nogil: """ cdef: int64_t sub, factor + int64_t nanos_in_day = 24 * 3600 * 10**9 freq = get_freq_group(freq) @@ -886,7 +887,7 @@ cdef int64_t get_time_nanos(int freq, int64_t unix_date, int64_t ordinal) nogil: # We must have freq == FR_HR factor = 10**9 * 3600 - sub = ordinal - unix_date * 24 * 3600 * 10**9 / factor + sub = ordinal - unix_date * (nanos_in_day / factor) return sub * factor diff --git a/pandas/tests/scalar/period/test_period.py b/pandas/tests/scalar/period/test_period.py index dcef0615121c1..795021a260028 100644 --- a/pandas/tests/scalar/period/test_period.py +++ b/pandas/tests/scalar/period/test_period.py @@ -486,6 +486,13 @@ def test_period_cons_combined(self): with pytest.raises(ValueError, match=msg): Period("2011-01", freq="1D1W") + @pytest.mark.parametrize("hour", range(24)) + def test_period_large_ordinal(self, hour): + # Issue #36430 + # Integer overflow for Period over the maximum timestamp + p = pd.Period(ordinal=2562048 + hour, freq="1H") + assert p.hour == hour + class TestPeriodMethods: def test_round_trip(self): From 159e9ebdcd170b3f050791b18e651c69601d9ffd Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Wed, 23 Sep 2020 13:16:09 +0100 Subject: [PATCH 20/38] Backport PR #36523: DOC: a few sphinx fixes in release notes (#36573) --- doc/source/whatsnew/v1.1.3.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.1.3.rst b/doc/source/whatsnew/v1.1.3.rst index e3b0f59c3edcc..c1effad34ab93 100644 --- a/doc/source/whatsnew/v1.1.3.rst +++ b/doc/source/whatsnew/v1.1.3.rst @@ -33,8 +33,8 @@ Fixed regressions - Fixed regression in :class:`IntegerArray` unary plus and minus operations raising a ``TypeError`` (:issue:`36063`) - Fixed regression in :meth:`Series.__getitem__` incorrectly raising when the input was a tuple (:issue:`35534`) - Fixed regression in :meth:`Series.__getitem__` incorrectly raising when the input was a frozenset (:issue:`35747`) -- Fixed regression in :meth:`read_excel` with ``engine="odf"`` caused ``UnboundLocalError`` in some cases where cells had nested child nodes (:issue:`36122`,:issue:`35802`) -- Fixed regression in :class:`DataFrame` and :class:`Series` comparisons between numeric arrays and strings (:issue:`35700`,:issue:`36377`) +- Fixed regression in :meth:`read_excel` with ``engine="odf"`` caused ``UnboundLocalError`` in some cases where cells had nested child nodes (:issue:`36122`, :issue:`35802`) +- Fixed regression in :class:`DataFrame` and :class:`Series` comparisons between numeric arrays and strings (:issue:`35700`, :issue:`36377`) - Fixed regression when setting empty :class:`DataFrame` column to a :class:`Series` in preserving name of index in frame (:issue:`36527`) - Fixed regression in :class:`Period` incorrect value for ordinal over the maximum timestamp (:issue:`36430`) From 456f0195eee50a542e396b8ffaa8620af20f6ef8 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 25 Sep 2020 10:14:08 +0100 Subject: [PATCH 21/38] numpy version in py36_locale_slow_old_np (#36599) --- ci/deps/azure-36-32bit.yaml | 2 +- ci/deps/azure-36-locale_slow.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/deps/azure-36-32bit.yaml b/ci/deps/azure-36-32bit.yaml index 6deb10a408ca4..456ae4e33d742 100644 --- a/ci/deps/azure-36-32bit.yaml +++ b/ci/deps/azure-36-32bit.yaml @@ -15,7 +15,7 @@ dependencies: - attrs=19.1.0 - gcc_linux-32 - gxx_linux-32 - - numpy=1.14.* + - numpy=1.15.4 - python-dateutil - pytz=2017.2 diff --git a/ci/deps/azure-36-locale_slow.yaml b/ci/deps/azure-36-locale_slow.yaml index 0a151944cdceb..4f5b962d48fcd 100644 --- a/ci/deps/azure-36-locale_slow.yaml +++ b/ci/deps/azure-36-locale_slow.yaml @@ -17,7 +17,7 @@ dependencies: - bottleneck=1.2.* - lxml - matplotlib=2.2.2 - - numpy=1.14.* + - numpy=1.15.4 - openpyxl=2.5.7 - python-dateutil - python-blosc From 8a9238807f34729825e59f6bd3611c14350a81ed Mon Sep 17 00:00:00 2001 From: MeeseeksMachine <39504233+meeseeksmachine@users.noreply.github.com> Date: Fri, 25 Sep 2020 03:10:32 -0700 Subject: [PATCH 22/38] Backport PR #36610: REGR: DataFrame.apply() with raw option and func returning string (#36631) Co-authored-by: Simon Hawkins --- doc/source/whatsnew/v1.1.3.rst | 1 + pandas/core/apply.py | 18 +++++++++++++++++- pandas/tests/frame/apply/test_frame_apply.py | 8 ++++++++ 3 files changed, 26 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.3.rst b/doc/source/whatsnew/v1.1.3.rst index c1effad34ab93..34595ea4ec50f 100644 --- a/doc/source/whatsnew/v1.1.3.rst +++ b/doc/source/whatsnew/v1.1.3.rst @@ -35,6 +35,7 @@ Fixed regressions - Fixed regression in :meth:`Series.__getitem__` incorrectly raising when the input was a frozenset (:issue:`35747`) - Fixed regression in :meth:`read_excel` with ``engine="odf"`` caused ``UnboundLocalError`` in some cases where cells had nested child nodes (:issue:`36122`, :issue:`35802`) - Fixed regression in :class:`DataFrame` and :class:`Series` comparisons between numeric arrays and strings (:issue:`35700`, :issue:`36377`) +- Fixed regression in :meth:`DataFrame.apply` with ``raw=True`` and user-function returning string (:issue:`35940`) - Fixed regression when setting empty :class:`DataFrame` column to a :class:`Series` in preserving name of index in frame (:issue:`36527`) - Fixed regression in :class:`Period` incorrect value for ordinal over the maximum timestamp (:issue:`36430`) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 99a9e1377563c..fd7ffd1b54a70 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -216,7 +216,23 @@ def apply_empty_result(self): def apply_raw(self): """ apply to the values as a numpy array """ - result = np.apply_along_axis(self.f, self.axis, self.values) + + def wrap_function(func): + """ + Wrap user supplied function to work around numpy issue. + + see https://github.com/numpy/numpy/issues/8352 + """ + + def wrapper(*args, **kwargs): + result = func(*args, **kwargs) + if isinstance(result, str): + result = np.array(result, dtype=object) + return result + + return wrapper + + result = np.apply_along_axis(wrap_function(self.f), self.axis, self.values) # TODO: mixed type case if result.ndim == 2: diff --git a/pandas/tests/frame/apply/test_frame_apply.py b/pandas/tests/frame/apply/test_frame_apply.py index adcd54fd7a7dc..1657abcc96d76 100644 --- a/pandas/tests/frame/apply/test_frame_apply.py +++ b/pandas/tests/frame/apply/test_frame_apply.py @@ -1561,3 +1561,11 @@ def test_apply_no_suffix_index(): ) tm.assert_frame_equal(result, expected) + + +def test_apply_raw_returns_string(): + # https://github.com/pandas-dev/pandas/issues/35940 + df = pd.DataFrame({"A": ["aa", "bbb"]}) + result = df.apply(lambda x: x[0], axis=1, raw=True) + expected = pd.Series(["aa", "bbb"]) + tm.assert_series_equal(result, expected) From c6123ad32a40ec94a915ef9166076b3fb5e8cef1 Mon Sep 17 00:00:00 2001 From: MeeseeksMachine <39504233+meeseeksmachine@users.noreply.github.com> Date: Fri, 25 Sep 2020 04:35:56 -0700 Subject: [PATCH 23/38] Backport PR #36613: BUG: Fix unordered cut with Series labels (#36633) Co-authored-by: Daniel Saxton <2658661+dsaxton@users.noreply.github.com> --- doc/source/whatsnew/v1.1.3.rst | 1 + pandas/core/reshape/tile.py | 2 +- pandas/tests/reshape/test_cut.py | 10 ++++++++++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.3.rst b/doc/source/whatsnew/v1.1.3.rst index 34595ea4ec50f..c63a78c76572f 100644 --- a/doc/source/whatsnew/v1.1.3.rst +++ b/doc/source/whatsnew/v1.1.3.rst @@ -52,6 +52,7 @@ Bug fixes - Bug in :meth:`DataFrame.stack` raising a ``ValueError`` when stacking :class:`MultiIndex` columns based on position when the levels had duplicate names (:issue:`36353`) - Bug in :meth:`Series.astype` showing too much precision when casting from ``np.float32`` to string dtype (:issue:`36451`) - Bug in :meth:`Series.isin` and :meth:`DataFrame.isin` when using ``NaN`` and a row length above 1,000,000 (:issue:`22205`) +- Bug in :func:`cut` raising a ``ValueError`` when passed a :class:`Series` of labels with ``ordered=False`` (:issue:`36603`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index f7723bee532ff..aefc6eb4d20ae 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -381,7 +381,7 @@ def _bins_to_cuts( duplicates: str = "raise", ordered: bool = True, ): - if not ordered and not labels: + if not ordered and labels is None: raise ValueError("'labels' must be provided if 'ordered = False'") if duplicates not in ["raise", "drop"]: diff --git a/pandas/tests/reshape/test_cut.py b/pandas/tests/reshape/test_cut.py index 60c80a8abdba6..4d2195da85a13 100644 --- a/pandas/tests/reshape/test_cut.py +++ b/pandas/tests/reshape/test_cut.py @@ -664,3 +664,13 @@ def test_cut_unordered_with_missing_labels_raises_error(): msg = "'labels' must be provided if 'ordered = False'" with pytest.raises(ValueError, match=msg): cut([0.5, 3], bins=[0, 1, 2], ordered=False) + + +def test_cut_unordered_with_series_labels(): + # https://github.com/pandas-dev/pandas/issues/36603 + s = pd.Series([1, 2, 3, 4, 5]) + bins = pd.Series([0, 2, 4, 6]) + labels = pd.Series(["a", "b", "c"]) + result = pd.cut(s, bins=bins, labels=labels, ordered=False) + expected = pd.Series(["a", "a", "b", "b", "c"], dtype="category") + tm.assert_series_equal(result, expected) From 52f8b9aa9a5ddcfab15fe7c2493379ced3786ee1 Mon Sep 17 00:00:00 2001 From: MeeseeksMachine <39504233+meeseeksmachine@users.noreply.github.com> Date: Sat, 26 Sep 2020 01:24:05 -0700 Subject: [PATCH 24/38] Backport PR #36557: [BUG]: Fix bug with pre epoch normalization (#36652) Co-authored-by: patrick <61934744+phofl@users.noreply.github.com> --- doc/source/whatsnew/v1.1.3.rst | 1 + pandas/_libs/tslibs/conversion.pyx | 2 +- pandas/tests/scalar/timestamp/test_unary_ops.py | 6 ++++++ pandas/tests/series/test_datetime_values.py | 8 ++++++++ 4 files changed, 16 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.3.rst b/doc/source/whatsnew/v1.1.3.rst index c63a78c76572f..4ad85fd6bafa6 100644 --- a/doc/source/whatsnew/v1.1.3.rst +++ b/doc/source/whatsnew/v1.1.3.rst @@ -38,6 +38,7 @@ Fixed regressions - Fixed regression in :meth:`DataFrame.apply` with ``raw=True`` and user-function returning string (:issue:`35940`) - Fixed regression when setting empty :class:`DataFrame` column to a :class:`Series` in preserving name of index in frame (:issue:`36527`) - Fixed regression in :class:`Period` incorrect value for ordinal over the maximum timestamp (:issue:`36430`) +- Fixed regression in :meth:`Series.dt.normalize` when normalizing pre-epoch dates the result was shifted one day (:issue:`36294`) .. --------------------------------------------------------------------------- diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index adf1dfbc1ac72..3b52b4d499694 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -830,7 +830,7 @@ cpdef inline datetime localize_pydatetime(datetime dt, object tz): # ---------------------------------------------------------------------- # Normalization -@cython.cdivision +@cython.cdivision(False) cdef inline int64_t normalize_i8_stamp(int64_t local_val) nogil: """ Round the localized nanosecond timestamp down to the previous midnight. diff --git a/pandas/tests/scalar/timestamp/test_unary_ops.py b/pandas/tests/scalar/timestamp/test_unary_ops.py index 8641bbd0a66f2..e8196cd8328e7 100644 --- a/pandas/tests/scalar/timestamp/test_unary_ops.py +++ b/pandas/tests/scalar/timestamp/test_unary_ops.py @@ -397,6 +397,12 @@ def test_normalize(self, tz_naive_fixture, arg): expected = Timestamp("2013-11-30", tz=tz) assert result == expected + def test_normalize_pre_epoch_dates(self): + # GH: 36294 + result = Timestamp("1969-01-01 09:00:00").normalize() + expected = Timestamp("1969-01-01 00:00:00") + assert result == expected + # -------------------------------------------------------------- @td.skip_if_windows diff --git a/pandas/tests/series/test_datetime_values.py b/pandas/tests/series/test_datetime_values.py index 723bd303b1974..b0926089bd7b4 100644 --- a/pandas/tests/series/test_datetime_values.py +++ b/pandas/tests/series/test_datetime_values.py @@ -702,3 +702,11 @@ def test_week_and_weekofyear_are_deprecated(): series.dt.week with tm.assert_produces_warning(FutureWarning): series.dt.weekofyear + + +def test_normalize_pre_epoch_dates(): + # GH: 36294 + s = pd.to_datetime(pd.Series(["1969-01-01 09:00:00", "2016-01-01 09:00:00"])) + result = s.dt.normalize() + expected = pd.to_datetime(pd.Series(["1969-01-01", "2016-01-01"])) + tm.assert_series_equal(result, expected) From c5bd38dfdc86de5e0a947b71215c47b6d2f8e486 Mon Sep 17 00:00:00 2001 From: MeeseeksMachine <39504233+meeseeksmachine@users.noreply.github.com> Date: Sat, 26 Sep 2020 03:21:56 -0700 Subject: [PATCH 25/38] Backport PR #36444: BUG: inconsistent replace (#36658) Co-authored-by: Number42 <32516498+QuentinN42@users.noreply.github.com> --- doc/source/whatsnew/v1.1.3.rst | 1 + pandas/core/internals/blocks.py | 5 ++++- pandas/tests/frame/methods/test_replace.py | 25 ++++++++++++++++++++++ 3 files changed, 30 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.3.rst b/doc/source/whatsnew/v1.1.3.rst index 4ad85fd6bafa6..7c7e40e633acc 100644 --- a/doc/source/whatsnew/v1.1.3.rst +++ b/doc/source/whatsnew/v1.1.3.rst @@ -34,6 +34,7 @@ Fixed regressions - Fixed regression in :meth:`Series.__getitem__` incorrectly raising when the input was a tuple (:issue:`35534`) - Fixed regression in :meth:`Series.__getitem__` incorrectly raising when the input was a frozenset (:issue:`35747`) - Fixed regression in :meth:`read_excel` with ``engine="odf"`` caused ``UnboundLocalError`` in some cases where cells had nested child nodes (:issue:`36122`, :issue:`35802`) +- Fixed regression in :meth:`DataFrame.replace` inconsistent replace when using a float in the replace method (:issue:`35376`) - Fixed regression in :class:`DataFrame` and :class:`Series` comparisons between numeric arrays and strings (:issue:`35700`, :issue:`36377`) - Fixed regression in :meth:`DataFrame.apply` with ``raw=True`` and user-function returning string (:issue:`35940`) - Fixed regression when setting empty :class:`DataFrame` column to a :class:`Series` in preserving name of index in frame (:issue:`36527`) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index ea2b0c972d9aa..12806170a9f6d 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -36,6 +36,7 @@ is_datetime64tz_dtype, is_dtype_equal, is_extension_array_dtype, + is_float, is_float_dtype, is_integer, is_integer_dtype, @@ -1996,7 +1997,9 @@ def _can_hold_element(self, element: Any) -> bool: and not issubclass(tipo.type, (np.datetime64, np.timedelta64)) and self.dtype.itemsize >= tipo.itemsize ) - return is_integer(element) + # We have not inferred an integer from the dtype + # check if we have a builtin int or a float equal to an int + return is_integer(element) or (is_float(element) and element.is_integer()) class DatetimeLikeBlockMixin: diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index 83dfd42ae2a6e..c42039bb92154 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -974,6 +974,31 @@ def test_replace_for_new_dtypes(self, datetime_frame): } ), ), + # GH 35376 + ( + DataFrame([[1, 1.0], [2, 2.0]]), + 1.0, + 5, + DataFrame([[5, 5.0], [2, 2.0]]), + ), + ( + DataFrame([[1, 1.0], [2, 2.0]]), + 1, + 5, + DataFrame([[5, 5.0], [2, 2.0]]), + ), + ( + DataFrame([[1, 1.0], [2, 2.0]]), + 1.0, + 5.0, + DataFrame([[5, 5.0], [2, 2.0]]), + ), + ( + DataFrame([[1, 1.0], [2, 2.0]]), + 1, + 5.0, + DataFrame([[5, 5.0], [2, 2.0]]), + ), ], ) def test_replace_dtypes(self, frame, to_replace, value, expected): From f9531124bf2fa510c397cf2d114edcacb47aabbf Mon Sep 17 00:00:00 2001 From: MeeseeksMachine <39504233+meeseeksmachine@users.noreply.github.com> Date: Sat, 26 Sep 2020 03:24:51 -0700 Subject: [PATCH 26/38] Backport PR #36595: Partial Revert "ENH: infer freq in timedelta_range (#32377)" (#36659) Co-authored-by: Simon Hawkins --- doc/source/whatsnew/v1.1.3.rst | 3 ++- pandas/core/arrays/timedeltas.py | 4 ---- pandas/core/indexes/timedeltas.py | 4 ++-- pandas/tests/arithmetic/test_timedelta64.py | 17 +++++++++++++++++ .../indexes/timedeltas/test_timedelta_range.py | 6 +++++- 5 files changed, 26 insertions(+), 8 deletions(-) diff --git a/doc/source/whatsnew/v1.1.3.rst b/doc/source/whatsnew/v1.1.3.rst index 7c7e40e633acc..aeb9076617787 100644 --- a/doc/source/whatsnew/v1.1.3.rst +++ b/doc/source/whatsnew/v1.1.3.rst @@ -31,6 +31,7 @@ Fixed regressions ~~~~~~~~~~~~~~~~~ - Fixed regression in :meth:`DataFrame.agg`, :meth:`DataFrame.apply`, :meth:`Series.agg`, and :meth:`Series.apply` where internal suffix is exposed to the users when no relabelling is applied (:issue:`36189`) - Fixed regression in :class:`IntegerArray` unary plus and minus operations raising a ``TypeError`` (:issue:`36063`) +- Fixed regression when adding a :meth:`timedelta_range` to a :class:``Timestamp`` raised an ``ValueError`` (:issue:`35897`) - Fixed regression in :meth:`Series.__getitem__` incorrectly raising when the input was a tuple (:issue:`35534`) - Fixed regression in :meth:`Series.__getitem__` incorrectly raising when the input was a frozenset (:issue:`35747`) - Fixed regression in :meth:`read_excel` with ``engine="odf"`` caused ``UnboundLocalError`` in some cases where cells had nested child nodes (:issue:`36122`, :issue:`35802`) @@ -62,7 +63,7 @@ Bug fixes Other ~~~~~ -- +- Reverted enhancement added in pandas-1.1.0 where :func:`timedelta_range` infers a frequency when passed ``start``, ``stop``, and ``periods`` (:issue:`32377`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index a30e1060c64f1..92f1f7ea714ef 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -256,10 +256,6 @@ def _generate_range(cls, start, end, periods, freq, closed=None): index = generate_regular_range(start, end, periods, freq) else: index = np.linspace(start.value, end.value, periods).astype("i8") - if len(index) >= 2: - # Infer a frequency - td = Timedelta(index[1] - index[0]) - freq = to_offset(td) if not left_closed: index = index[1:] diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index dccc8369c5366..af3b2d1e5223f 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -323,8 +323,8 @@ def timedelta_range( >>> pd.timedelta_range(start='1 day', end='5 days', periods=4) TimedeltaIndex(['1 days 00:00:00', '2 days 08:00:00', '3 days 16:00:00', - '5 days 00:00:00'], - dtype='timedelta64[ns]', freq='32H') + '5 days 00:00:00'], + dtype='timedelta64[ns]', freq=None) """ if freq is None and com.any_none(periods, start, end): freq = "D" diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index f94408d657ae5..a5d04759ac8c7 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -2119,3 +2119,20 @@ def test_td64arr_pow_invalid(self, scalar_td, box_with_array): with pytest.raises(TypeError, match=pattern): td1 ** scalar_td + + +def test_add_timestamp_to_timedelta(): + # GH: 35897 + timestamp = pd.Timestamp.now() + result = timestamp + pd.timedelta_range("0s", "1s", periods=31) + expected = pd.DatetimeIndex( + [ + timestamp + + ( + pd.to_timedelta("0.033333333s") * i + + pd.to_timedelta("0.000000001s") * divmod(i, 3)[0] + ) + for i in range(31) + ] + ) + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/timedeltas/test_timedelta_range.py b/pandas/tests/indexes/timedeltas/test_timedelta_range.py index 7d78fbf9ff190..dc3df4427f351 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta_range.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta_range.py @@ -38,7 +38,6 @@ def test_linspace_behavior(self, periods, freq): result = timedelta_range(start="0 days", end="4 days", periods=periods) expected = timedelta_range(start="0 days", end="4 days", freq=freq) tm.assert_index_equal(result, expected) - assert result.freq == freq def test_errors(self): # not enough params @@ -79,3 +78,8 @@ def test_timedelta_range_freq_divide_end(self, start, end, freq, expected_period assert Timedelta(start) == res[0] assert Timedelta(end) >= res[-1] assert len(res) == expected_periods + + def test_timedelta_range_infer_freq(self): + # https://github.com/pandas-dev/pandas/issues/35897 + result = timedelta_range("0s", "1s", periods=31) + assert result.freq is None From bd3e37f9140c9fff3924098cd7754cd49015404a Mon Sep 17 00:00:00 2001 From: MeeseeksMachine <39504233+meeseeksmachine@users.noreply.github.com> Date: Sat, 26 Sep 2020 04:32:17 -0700 Subject: [PATCH 27/38] Backport PR #36560: [BUG]: Fix regression in read_table with delim_whitespace=True (#36661) Co-authored-by: patrick <61934744+phofl@users.noreply.github.com> --- doc/source/whatsnew/v1.1.3.rst | 1 + pandas/io/parsers.py | 10 ++++++++++ pandas/tests/io/parser/test_common.py | 21 +++++++++++++++++++++ 3 files changed, 32 insertions(+) diff --git a/doc/source/whatsnew/v1.1.3.rst b/doc/source/whatsnew/v1.1.3.rst index aeb9076617787..eded30ca45025 100644 --- a/doc/source/whatsnew/v1.1.3.rst +++ b/doc/source/whatsnew/v1.1.3.rst @@ -40,6 +40,7 @@ Fixed regressions - Fixed regression in :meth:`DataFrame.apply` with ``raw=True`` and user-function returning string (:issue:`35940`) - Fixed regression when setting empty :class:`DataFrame` column to a :class:`Series` in preserving name of index in frame (:issue:`36527`) - Fixed regression in :class:`Period` incorrect value for ordinal over the maximum timestamp (:issue:`36430`) +- Fixed regression in :func:`read_table` raised ``ValueError`` when ``delim_whitespace`` was set to ``True`` (:issue:`35958`) - Fixed regression in :meth:`Series.dt.normalize` when normalizing pre-epoch dates the result was shifted one day (:issue:`36294`) .. --------------------------------------------------------------------------- diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index d4f346f8c1087..a02b059967e88 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -752,6 +752,16 @@ def read_table( memory_map=False, float_precision=None, ): + # TODO: validation duplicated in read_csv + if delim_whitespace and (delimiter is not None or sep != "\t"): + raise ValueError( + "Specified a delimiter with both sep and " + "delim_whitespace=True; you can only specify one." + ) + if delim_whitespace: + # In this case sep is not used so we set it to the read_csv + # default to avoid a ValueError + sep = "," return read_csv(**locals()) diff --git a/pandas/tests/io/parser/test_common.py b/pandas/tests/io/parser/test_common.py index 3d5f6ae3a4af9..c6a43d22ca155 100644 --- a/pandas/tests/io/parser/test_common.py +++ b/pandas/tests/io/parser/test_common.py @@ -2191,3 +2191,24 @@ def test_read_csv_with_use_inf_as_na(all_parsers): result = parser.read_csv(StringIO(data), header=None) expected = DataFrame([1.0, np.nan, 3.0]) tm.assert_frame_equal(result, expected) + + +def test_read_table_delim_whitespace_default_sep(all_parsers): + # GH: 35958 + f = StringIO("a b c\n1 -2 -3\n4 5 6") + parser = all_parsers + result = parser.read_table(f, delim_whitespace=True) + expected = DataFrame({"a": [1, 4], "b": [-2, 5], "c": [-3, 6]}) + tm.assert_frame_equal(result, expected) + + +def test_read_table_delim_whitespace_non_default_sep(all_parsers): + # GH: 35958 + f = StringIO("a b c\n1 -2 -3\n4 5 6") + parser = all_parsers + msg = ( + "Specified a delimiter with both sep and " + "delim_whitespace=True; you can only specify one." + ) + with pytest.raises(ValueError, match=msg): + parser.read_table(f, delim_whitespace=True, sep=",") From 5f7f2b2ec1fd5f607d44ddcfe29f8f1586bfac40 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Sat, 26 Sep 2020 13:12:29 +0100 Subject: [PATCH 28/38] CLN: lint fixup on 1.1.x (#36663) --- pandas/tests/frame/methods/test_replace.py | 21 +++------------------ 1 file changed, 3 insertions(+), 18 deletions(-) diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index c42039bb92154..a1d62a103b322 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -975,30 +975,15 @@ def test_replace_for_new_dtypes(self, datetime_frame): ), ), # GH 35376 + (DataFrame([[1, 1.0], [2, 2.0]]), 1.0, 5, DataFrame([[5, 5.0], [2, 2.0]]),), + (DataFrame([[1, 1.0], [2, 2.0]]), 1, 5, DataFrame([[5, 5.0], [2, 2.0]]),), ( DataFrame([[1, 1.0], [2, 2.0]]), 1.0, - 5, - DataFrame([[5, 5.0], [2, 2.0]]), - ), - ( - DataFrame([[1, 1.0], [2, 2.0]]), - 1, - 5, - DataFrame([[5, 5.0], [2, 2.0]]), - ), - ( - DataFrame([[1, 1.0], [2, 2.0]]), - 1.0, - 5.0, - DataFrame([[5, 5.0], [2, 2.0]]), - ), - ( - DataFrame([[1, 1.0], [2, 2.0]]), - 1, 5.0, DataFrame([[5, 5.0], [2, 2.0]]), ), + (DataFrame([[1, 1.0], [2, 2.0]]), 1, 5.0, DataFrame([[5, 5.0], [2, 2.0]]),), ], ) def test_replace_dtypes(self, frame, to_replace, value, expected): From cc21cf8e3cd15ae0e3d20c96085634d6adcd2fe2 Mon Sep 17 00:00:00 2001 From: MeeseeksMachine <39504233+meeseeksmachine@users.noreply.github.com> Date: Sat, 26 Sep 2020 09:56:13 -0700 Subject: [PATCH 29/38] Backport PR #36664: DOC: minor fix for 1.1.3 release notes (#36669) Co-authored-by: Simon Hawkins --- doc/source/whatsnew/v1.1.3.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.3.rst b/doc/source/whatsnew/v1.1.3.rst index eded30ca45025..97db7a3e4862d 100644 --- a/doc/source/whatsnew/v1.1.3.rst +++ b/doc/source/whatsnew/v1.1.3.rst @@ -31,7 +31,7 @@ Fixed regressions ~~~~~~~~~~~~~~~~~ - Fixed regression in :meth:`DataFrame.agg`, :meth:`DataFrame.apply`, :meth:`Series.agg`, and :meth:`Series.apply` where internal suffix is exposed to the users when no relabelling is applied (:issue:`36189`) - Fixed regression in :class:`IntegerArray` unary plus and minus operations raising a ``TypeError`` (:issue:`36063`) -- Fixed regression when adding a :meth:`timedelta_range` to a :class:``Timestamp`` raised an ``ValueError`` (:issue:`35897`) +- Fixed regression when adding a :meth:`timedelta_range` to a :class:`Timestamp` raised an ``ValueError`` (:issue:`35897`) - Fixed regression in :meth:`Series.__getitem__` incorrectly raising when the input was a tuple (:issue:`35534`) - Fixed regression in :meth:`Series.__getitem__` incorrectly raising when the input was a frozenset (:issue:`35747`) - Fixed regression in :meth:`read_excel` with ``engine="odf"`` caused ``UnboundLocalError`` in some cases where cells had nested child nodes (:issue:`36122`, :issue:`35802`) From e99b4674428e1e3dc7525ce0a619ee40f13009d4 Mon Sep 17 00:00:00 2001 From: Daniel Saxton <2658661+dsaxton@users.noreply.github.com> Date: Sat, 26 Sep 2020 13:12:25 -0500 Subject: [PATCH 30/38] Backport PR #36670: DOC: Fix release note typo (#36672) --- doc/source/whatsnew/v1.1.3.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.3.rst b/doc/source/whatsnew/v1.1.3.rst index 97db7a3e4862d..91b9cf59687b3 100644 --- a/doc/source/whatsnew/v1.1.3.rst +++ b/doc/source/whatsnew/v1.1.3.rst @@ -31,7 +31,7 @@ Fixed regressions ~~~~~~~~~~~~~~~~~ - Fixed regression in :meth:`DataFrame.agg`, :meth:`DataFrame.apply`, :meth:`Series.agg`, and :meth:`Series.apply` where internal suffix is exposed to the users when no relabelling is applied (:issue:`36189`) - Fixed regression in :class:`IntegerArray` unary plus and minus operations raising a ``TypeError`` (:issue:`36063`) -- Fixed regression when adding a :meth:`timedelta_range` to a :class:`Timestamp` raised an ``ValueError`` (:issue:`35897`) +- Fixed regression when adding a :meth:`timedelta_range` to a :class:`Timestamp` raised a ``ValueError`` (:issue:`35897`) - Fixed regression in :meth:`Series.__getitem__` incorrectly raising when the input was a tuple (:issue:`35534`) - Fixed regression in :meth:`Series.__getitem__` incorrectly raising when the input was a frozenset (:issue:`35747`) - Fixed regression in :meth:`read_excel` with ``engine="odf"`` caused ``UnboundLocalError`` in some cases where cells had nested child nodes (:issue:`36122`, :issue:`35802`) From 637bdc35ae542d3ff47393e878159e5c37339270 Mon Sep 17 00:00:00 2001 From: MeeseeksMachine <39504233+meeseeksmachine@users.noreply.github.com> Date: Thu, 1 Oct 2020 09:48:38 -0700 Subject: [PATCH 31/38] Backport PR #36706: CI: npdev new exception message (#36751) Co-authored-by: jbrockmendel --- pandas/tests/arithmetic/common.py | 7 +++++++ pandas/tests/frame/test_arithmetic.py | 5 +++++ 2 files changed, 12 insertions(+) diff --git a/pandas/tests/arithmetic/common.py b/pandas/tests/arithmetic/common.py index 755fbd0d9036c..cd8dd102dc27c 100644 --- a/pandas/tests/arithmetic/common.py +++ b/pandas/tests/arithmetic/common.py @@ -76,6 +76,13 @@ def assert_invalid_comparison(left, right, box): "Cannot compare type", "not supported between", "invalid type promotion", + ( + # GH#36706 npdev 1.20.0 2020-09-28 + r"The DTypes and " + r" do not have a common DType. " + "For example they cannot be stored in a single array unless the " + "dtype is `object`." + ), ] ) with pytest.raises(TypeError, match=msg): diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index e17357e9845b5..166f26f668502 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -53,6 +53,11 @@ def check(df, df2): msgs = [ r"Invalid comparison between dtype=datetime64\[ns\] and ndarray", "invalid type promotion", + ( + # npdev 1.20.0 + r"The DTypes and " + r" do not have a common DType." + ), ] msg = "|".join(msgs) with pytest.raises(TypeError, match=msg): From 00ae553211c5ab66c14aa63faa7bc64b57e762a1 Mon Sep 17 00:00:00 2001 From: MeeseeksMachine <39504233+meeseeksmachine@users.noreply.github.com> Date: Thu, 1 Oct 2020 09:49:47 -0700 Subject: [PATCH 32/38] Backport PR #36552: REGR: Series.__mod__ behaves different with numexpr (#36750) Co-authored-by: Simon Hawkins --- doc/source/whatsnew/v1.1.3.rst | 1 + pandas/core/computation/expressions.py | 5 +++- pandas/core/ops/methods.py | 2 -- pandas/tests/test_expressions.py | 40 +++++++++++++++++++++++++- 4 files changed, 44 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v1.1.3.rst b/doc/source/whatsnew/v1.1.3.rst index 91b9cf59687b3..15777abcb8084 100644 --- a/doc/source/whatsnew/v1.1.3.rst +++ b/doc/source/whatsnew/v1.1.3.rst @@ -34,6 +34,7 @@ Fixed regressions - Fixed regression when adding a :meth:`timedelta_range` to a :class:`Timestamp` raised a ``ValueError`` (:issue:`35897`) - Fixed regression in :meth:`Series.__getitem__` incorrectly raising when the input was a tuple (:issue:`35534`) - Fixed regression in :meth:`Series.__getitem__` incorrectly raising when the input was a frozenset (:issue:`35747`) +- Fixed regression in modulo of :class:`Index`, :class:`Series` and :class:`DataFrame` using ``numexpr`` using C not Python semantics (:issue:`36047`, :issue:`36526`) - Fixed regression in :meth:`read_excel` with ``engine="odf"`` caused ``UnboundLocalError`` in some cases where cells had nested child nodes (:issue:`36122`, :issue:`35802`) - Fixed regression in :meth:`DataFrame.replace` inconsistent replace when using a float in the replace method (:issue:`35376`) - Fixed regression in :class:`DataFrame` and :class:`Series` comparisons between numeric arrays and strings (:issue:`35700`, :issue:`36377`) diff --git a/pandas/core/computation/expressions.py b/pandas/core/computation/expressions.py index 0e9077e6d557e..da290db362019 100644 --- a/pandas/core/computation/expressions.py +++ b/pandas/core/computation/expressions.py @@ -132,7 +132,10 @@ def _evaluate_numexpr(op, op_str, a, b): roperator.rtruediv: "/", operator.floordiv: "//", roperator.rfloordiv: "//", - operator.mod: "%", + # we require Python semantics for mod of negative for backwards compatibility + # see https://github.com/pydata/numexpr/issues/365 + # so sticking with unaccelerated for now + operator.mod: None, roperator.rmod: "%", operator.pow: "**", roperator.rpow: "**", diff --git a/pandas/core/ops/methods.py b/pandas/core/ops/methods.py index a4694a6e5134f..c60b67fa2f4f6 100644 --- a/pandas/core/ops/methods.py +++ b/pandas/core/ops/methods.py @@ -171,8 +171,6 @@ def _create_methods(cls, arith_method, comp_method, bool_method, special): mul=arith_method(cls, operator.mul, special), truediv=arith_method(cls, operator.truediv, special), floordiv=arith_method(cls, operator.floordiv, special), - # Causes a floating point exception in the tests when numexpr enabled, - # so for now no speedup mod=arith_method(cls, operator.mod, special), pow=arith_method(cls, operator.pow, special), # not entirely sure why this is necessary, but previously was included diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py index 2368e93ddc256..cc8a134ebcc9f 100644 --- a/pandas/tests/test_expressions.py +++ b/pandas/tests/test_expressions.py @@ -6,7 +6,7 @@ import pytest import pandas._testing as tm -from pandas.core.api import DataFrame +from pandas.core.api import DataFrame, Index, Series from pandas.core.computation import expressions as expr _frame = DataFrame(randn(10000, 4), columns=list("ABCD"), dtype="float64") @@ -380,3 +380,41 @@ def test_frame_series_axis(self, axis, arith): result = op_func(other, axis=axis) tm.assert_frame_equal(expected, result) + + @pytest.mark.parametrize( + "op", + [ + "__mod__", + pytest.param("__rmod__", marks=pytest.mark.xfail(reason="GH-36552")), + "__floordiv__", + "__rfloordiv__", + ], + ) + @pytest.mark.parametrize("box", [DataFrame, Series, Index]) + @pytest.mark.parametrize("scalar", [-5, 5]) + def test_python_semantics_with_numexpr_installed(self, op, box, scalar): + # https://github.com/pandas-dev/pandas/issues/36047 + expr._MIN_ELEMENTS = 0 + data = np.arange(-50, 50) + obj = box(data) + method = getattr(obj, op) + result = method(scalar) + + # compare result with numpy + expr.set_use_numexpr(False) + expected = method(scalar) + expr.set_use_numexpr(True) + tm.assert_equal(result, expected) + + # compare result element-wise with Python + for i, elem in enumerate(data): + if box == DataFrame: + scalar_result = result.iloc[i, 0] + else: + scalar_result = result[i] + try: + expected = getattr(int(elem), op)(scalar) + except ZeroDivisionError: + pass + else: + assert scalar_result == expected From 5ad5d9e70411cc8f15e14247666335e771503324 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Thu, 1 Oct 2020 20:19:16 +0100 Subject: [PATCH 33/38] CI: troubleshoot travis ci on 1.1.x (#36770) --- ci/deps/travis-36-cov.yaml | 1 - ci/deps/travis-36-locale.yaml | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/deps/travis-36-cov.yaml b/ci/deps/travis-36-cov.yaml index c380423c55f19..8c8db106af05c 100644 --- a/ci/deps/travis-36-cov.yaml +++ b/ci/deps/travis-36-cov.yaml @@ -15,7 +15,6 @@ dependencies: # pandas dependencies - beautifulsoup4 - botocore>=1.11 - - cython>=0.29.16 - dask - fastparquet>=0.3.2 - fsspec>=0.7.4 diff --git a/ci/deps/travis-36-locale.yaml b/ci/deps/travis-36-locale.yaml index 21176054ae0d7..31281ce0aa243 100644 --- a/ci/deps/travis-36-locale.yaml +++ b/ci/deps/travis-36-locale.yaml @@ -27,6 +27,7 @@ dependencies: - numpy - openpyxl - pandas-gbq=0.12.0 + - google-cloud-bigquery==1.21.0 - psycopg2=2.6.2 - pyarrow>=0.13.0 # GH #35813 - pymysql=0.7.11 From 2ea3fc803c9c92a2e3cdc0b1f591f110677434d4 Mon Sep 17 00:00:00 2001 From: MeeseeksMachine <39504233+meeseeksmachine@users.noreply.github.com> Date: Sat, 3 Oct 2020 04:50:23 -0700 Subject: [PATCH 34/38] Backport PR #36675 on branch 1.1.x (REGR: Series.loc with a MultiIndex containing Timestamp raises InvalidIndexError) (#36818) Co-authored-by: Simon Hawkins --- doc/source/whatsnew/v1.1.3.rst | 1 + pandas/core/indexing.py | 2 +- pandas/tests/indexing/multiindex/test_loc.py | 10 ++++++++++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.3.rst b/doc/source/whatsnew/v1.1.3.rst index 15777abcb8084..acf1dafc59885 100644 --- a/doc/source/whatsnew/v1.1.3.rst +++ b/doc/source/whatsnew/v1.1.3.rst @@ -37,6 +37,7 @@ Fixed regressions - Fixed regression in modulo of :class:`Index`, :class:`Series` and :class:`DataFrame` using ``numexpr`` using C not Python semantics (:issue:`36047`, :issue:`36526`) - Fixed regression in :meth:`read_excel` with ``engine="odf"`` caused ``UnboundLocalError`` in some cases where cells had nested child nodes (:issue:`36122`, :issue:`35802`) - Fixed regression in :meth:`DataFrame.replace` inconsistent replace when using a float in the replace method (:issue:`35376`) +- Fixed regression in :meth:`Series.loc` on a :class:`Series` with a :class:`MultiIndex` containing :class:`Timestamp` raising ``InvalidIndexError`` (:issue:`35858`) - Fixed regression in :class:`DataFrame` and :class:`Series` comparisons between numeric arrays and strings (:issue:`35700`, :issue:`36377`) - Fixed regression in :meth:`DataFrame.apply` with ``raw=True`` and user-function returning string (:issue:`35940`) - Fixed regression when setting empty :class:`DataFrame` column to a :class:`Series` in preserving name of index in frame (:issue:`36527`) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 04d1dbceb3342..5a24addf46d93 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1064,7 +1064,7 @@ def _handle_lowerdim_multi_index_axis0(self, tup: Tuple): try: # fast path for series or for tup devoid of slices return self._get_label(tup, axis=axis) - except TypeError: + except (TypeError, InvalidIndexError): # slices are unhashable pass except KeyError as ek: diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index 63983f45d7832..95a23a9bcf63b 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -493,6 +493,16 @@ def test_loc_datetime_mask_slicing(): tm.assert_series_equal(result, expected) +def test_loc_datetime_series_tuple_slicing(): + # https://github.com/pandas-dev/pandas/issues/35858 + date = pd.Timestamp("2000") + ser = pd.Series( + 1, index=pd.MultiIndex.from_tuples([("a", date)], names=["a", "b"]), name="c", + ) + result = ser.loc[:, [date]] + tm.assert_series_equal(result, ser) + + def test_loc_with_mi_indexer(): # https://github.com/pandas-dev/pandas/issues/35351 df = DataFrame( From b8cd5e85c4f546340c97213a9f642e381cce1449 Mon Sep 17 00:00:00 2001 From: MeeseeksMachine <39504233+meeseeksmachine@users.noreply.github.com> Date: Mon, 5 Oct 2020 05:03:14 -0700 Subject: [PATCH 35/38] Backport PR #36864: CI: Update error message for np_dev (#36886) Co-authored-by: Daniel Saxton <2658661+dsaxton@users.noreply.github.com> --- pandas/tests/series/indexing/test_indexing.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/pandas/tests/series/indexing/test_indexing.py b/pandas/tests/series/indexing/test_indexing.py index 1fafdf00393e1..fbdac2bb2d8e8 100644 --- a/pandas/tests/series/indexing/test_indexing.py +++ b/pandas/tests/series/indexing/test_indexing.py @@ -367,14 +367,17 @@ def test_2d_to_1d_assignment_raises(): x = np.random.randn(2, 2) y = pd.Series(range(2)) - msg = ( - r"shape mismatch: value array of shape \(2,2\) could not be " - r"broadcast to indexing result of shape \(2,\)" + msg = "|".join( + [ + r"shape mismatch: value array of shape \(2,2\) could not be " + r"broadcast to indexing result of shape \(2,\)", + r"cannot reshape array of size 4 into shape \(2,\)", + ] ) with pytest.raises(ValueError, match=msg): y.loc[range(2)] = x - msg = r"could not broadcast input array from shape \(2,2\) into shape \(2\)" + msg = r"could not broadcast input array from shape \(2,2\) into shape \(2,?\)" with pytest.raises(ValueError, match=msg): y.loc[:] = x From 4b2f51a82dd92da03e6ca35492f489f70ffd4f9e Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Mon, 5 Oct 2020 13:04:41 +0100 Subject: [PATCH 36/38] DOC: sync release notes on 1.1.x with master (#36883) --- doc/source/whatsnew/v1.1.3.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.3.rst b/doc/source/whatsnew/v1.1.3.rst index acf1dafc59885..af714b1bb2ab1 100644 --- a/doc/source/whatsnew/v1.1.3.rst +++ b/doc/source/whatsnew/v1.1.3.rst @@ -16,7 +16,7 @@ Enhancements Added support for new Python version ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Pandas 1.1.3 now supports Python 3.9 (:issue:`36296`). +pandas 1.1.3 now supports Python 3.9 (:issue:`36296`). Development Changes ^^^^^^^^^^^^^^^^^^^ From aeef2bf93bf787ce2a3dc802ea83adfaa5930ee4 Mon Sep 17 00:00:00 2001 From: MeeseeksMachine <39504233+meeseeksmachine@users.noreply.github.com> Date: Mon, 5 Oct 2020 08:17:11 -0700 Subject: [PATCH 37/38] Backport PR #36887: DOC: 1.1.3 release date (#36891) Co-authored-by: Simon Hawkins --- doc/source/whatsnew/v1.1.3.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.1.3.rst b/doc/source/whatsnew/v1.1.3.rst index af714b1bb2ab1..2323afbe00e5d 100644 --- a/doc/source/whatsnew/v1.1.3.rst +++ b/doc/source/whatsnew/v1.1.3.rst @@ -1,7 +1,7 @@ .. _whatsnew_113: -What's new in 1.1.3 (??) ------------------------- +What's new in 1.1.3 (October 5, 2020) +------------------------------------- These are the changes in pandas 1.1.3. See :ref:`release` for a full changelog including other versions of pandas. From db08276bc116c438d3fdee492026f8223584c477 Mon Sep 17 00:00:00 2001 From: Pandas Development Team Date: Mon, 5 Oct 2020 15:27:32 +0000 Subject: [PATCH 38/38] RLS: 1.1.3