Skip to content

Commit cadcc3e

Browse files
committed
Merge remote-tracking branch 'upstream/master' into styler_to_html_sparse_args
2 parents 71c9dae + 8924277 commit cadcc3e

File tree

17 files changed

+290
-77
lines changed

17 files changed

+290
-77
lines changed

ci/code_checks.sh

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -121,8 +121,7 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then
121121
pandas/io/parsers/ \
122122
pandas/io/sas/ \
123123
pandas/io/sql.py \
124-
pandas/tseries/ \
125-
pandas/io/formats/style_render.py
124+
pandas/tseries/
126125
RET=$(($RET + $?)) ; echo $MSG "DONE"
127126

128127
fi

doc/source/whatsnew/v1.4.0.rst

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,7 @@ Numeric
205205
^^^^^^^
206206
- Bug in :meth:`DataFrame.rank` raising ``ValueError`` with ``object`` columns and ``method="first"`` (:issue:`41931`)
207207
- Bug in :meth:`DataFrame.rank` treating missing values and extreme values as equal (for example ``np.nan`` and ``np.inf``), causing incorrect results when ``na_option="bottom"`` or ``na_option="top`` used (:issue:`41931`)
208-
-
208+
- Bug in ``numexpr`` engine still being used when the option ``compute.use_numexpr`` is set to ``False`` (:issue:`32556`)
209209

210210
Conversion
211211
^^^^^^^^^^
@@ -262,10 +262,11 @@ Groupby/resample/rolling
262262
^^^^^^^^^^^^^^^^^^^^^^^^
263263
- Fixed bug in :meth:`SeriesGroupBy.apply` where passing an unrecognized string argument failed to raise ``TypeError`` when the underlying ``Series`` is empty (:issue:`42021`)
264264
- Bug in :meth:`Series.rolling.apply`, :meth:`DataFrame.rolling.apply`, :meth:`Series.expanding.apply` and :meth:`DataFrame.expanding.apply` with ``engine="numba"`` where ``*args`` were being cached with the user passed function (:issue:`42287`)
265-
-
265+
- Bug in :meth:`DataFrame.groupby.rolling.var` would calculate the rolling variance only on the first group (:issue:`42442`)
266266

267267
Reshaping
268268
^^^^^^^^^
269+
- Improved error message when creating a :class:`DataFrame` column from a multi-dimensional :class:`numpy.ndarray` (:issue:`42463`)
269270
- :func:`concat` creating :class:`MultiIndex` with duplicate level entries when concatenating a :class:`DataFrame` with duplicates in :class:`Index` and multiple keys (:issue:`42651`)
270271
- Bug in :meth:`pandas.cut` on :class:`Series` with duplicate indices (:issue:`42185`) and non-exact :meth:`pandas.CategoricalIndex` (:issue:`42425`)
271272
-
@@ -287,6 +288,7 @@ Styler
287288

288289
Other
289290
^^^^^
291+
- Bug in :meth:`CustomBusinessMonthBegin.__add__` (:meth:`CustomBusinessMonthEnd.__add__`) not applying the extra ``offset`` parameter when beginning (end) of the target month is already a business day (:issue:`41356`)
290292

291293
.. ***DO NOT USE THIS SECTION***
292294

pandas/_libs/algos.pyx

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -217,8 +217,8 @@ def groupsort_indexer(const intp_t[:] index, Py_ssize_t ngroups):
217217
This is a reverse of the label factorization process.
218218
"""
219219
cdef:
220-
Py_ssize_t i, loc, label, n
221-
ndarray[intp_t] indexer, where, counts
220+
Py_ssize_t i, label, n
221+
intp_t[::1] indexer, where, counts
222222

223223
counts = np.zeros(ngroups + 1, dtype=np.intp)
224224
n = len(index)
@@ -241,7 +241,7 @@ def groupsort_indexer(const intp_t[:] index, Py_ssize_t ngroups):
241241
indexer[where[label]] = i
242242
where[label] += 1
243243

244-
return indexer, counts
244+
return indexer.base, counts.base
245245

246246

247247
cdef inline Py_ssize_t swap(numeric *a, numeric *b) nogil:

pandas/_libs/tslibs/offsets.pyx

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3370,7 +3370,10 @@ cdef class _CustomBusinessMonth(BusinessMixin):
33703370
"""
33713371
Define default roll function to be called in apply method.
33723372
"""
3373-
cbday = CustomBusinessDay(n=self.n, normalize=False, **self.kwds)
3373+
cbday_kwds = self.kwds.copy()
3374+
cbday_kwds['offset'] = timedelta(0)
3375+
3376+
cbday = CustomBusinessDay(n=1, normalize=False, **cbday_kwds)
33743377

33753378
if self._prefix.endswith("S"):
33763379
# MonthBegin
@@ -3414,6 +3417,9 @@ cdef class _CustomBusinessMonth(BusinessMixin):
34143417

34153418
new = cur_month_offset_date + n * self.m_offset
34163419
result = self.cbday_roll(new)
3420+
3421+
if self.offset:
3422+
result = result + self.offset
34173423
return result
34183424

34193425

pandas/_libs/window/aggregations.pyx

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -310,7 +310,10 @@ cdef inline void add_var(float64_t val, float64_t *nobs, float64_t *mean_x,
310310
t = y - mean_x[0]
311311
compensation[0] = t + mean_x[0] - y
312312
delta = t
313-
mean_x[0] = mean_x[0] + delta / nobs[0]
313+
if nobs[0]:
314+
mean_x[0] = mean_x[0] + delta / nobs[0]
315+
else:
316+
mean_x[0] = 0
314317
ssqdm_x[0] = ssqdm_x[0] + (val - prev_mean) * (val - mean_x[0])
315318

316319

pandas/core/computation/eval.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,9 +43,10 @@ def _check_engine(engine: str | None) -> str:
4343
Engine name.
4444
"""
4545
from pandas.core.computation.check import NUMEXPR_INSTALLED
46+
from pandas.core.computation.expressions import USE_NUMEXPR
4647

4748
if engine is None:
48-
engine = "numexpr" if NUMEXPR_INSTALLED else "python"
49+
engine = "numexpr" if USE_NUMEXPR else "python"
4950

5051
if engine not in ENGINES:
5152
valid_engines = list(ENGINES.keys())

pandas/core/groupby/groupby.py

Lines changed: 44 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -2897,16 +2897,15 @@ def _get_cythonized_result(
28972897

28982898
ids, _, ngroups = grouper.group_info
28992899
output: dict[base.OutputKey, np.ndarray] = {}
2900-
base_func = getattr(libgroupby, how)
2901-
2902-
error_msg = ""
2903-
for idx, obj in enumerate(self._iterate_slices()):
2904-
name = obj.name
2905-
values = obj._values
29062900

2907-
if numeric_only and not is_numeric_dtype(values.dtype):
2908-
continue
2901+
base_func = getattr(libgroupby, how)
2902+
base_func = partial(base_func, labels=ids)
2903+
if needs_ngroups:
2904+
base_func = partial(base_func, ngroups=ngroups)
2905+
if min_count is not None:
2906+
base_func = partial(base_func, min_count=min_count)
29092907

2908+
def blk_func(values: ArrayLike) -> ArrayLike:
29102909
if aggregate:
29112910
result_sz = ngroups
29122911
else:
@@ -2915,54 +2914,31 @@ def _get_cythonized_result(
29152914
result = np.zeros(result_sz, dtype=cython_dtype)
29162915
if needs_2d:
29172916
result = result.reshape((-1, 1))
2918-
func = partial(base_func, result)
2917+
func = partial(base_func, out=result)
29192918

29202919
inferences = None
29212920

29222921
if needs_counts:
29232922
counts = np.zeros(self.ngroups, dtype=np.int64)
2924-
func = partial(func, counts)
2923+
func = partial(func, counts=counts)
29252924

29262925
if needs_values:
29272926
vals = values
29282927
if pre_processing:
2929-
try:
2930-
vals, inferences = pre_processing(vals)
2931-
except TypeError as err:
2932-
error_msg = str(err)
2933-
howstr = how.replace("group_", "")
2934-
warnings.warn(
2935-
"Dropping invalid columns in "
2936-
f"{type(self).__name__}.{howstr} is deprecated. "
2937-
"In a future version, a TypeError will be raised. "
2938-
f"Before calling .{howstr}, select only columns which "
2939-
"should be valid for the function.",
2940-
FutureWarning,
2941-
stacklevel=3,
2942-
)
2943-
continue
2928+
vals, inferences = pre_processing(vals)
2929+
29442930
vals = vals.astype(cython_dtype, copy=False)
29452931
if needs_2d:
29462932
vals = vals.reshape((-1, 1))
2947-
func = partial(func, vals)
2948-
2949-
func = partial(func, ids)
2950-
2951-
if min_count is not None:
2952-
func = partial(func, min_count)
2933+
func = partial(func, values=vals)
29532934

29542935
if needs_mask:
29552936
mask = isna(values).view(np.uint8)
2956-
func = partial(func, mask)
2957-
2958-
if needs_ngroups:
2959-
func = partial(func, ngroups)
2937+
func = partial(func, mask=mask)
29602938

29612939
if needs_nullable:
29622940
is_nullable = isinstance(values, BaseMaskedArray)
29632941
func = partial(func, nullable=is_nullable)
2964-
if post_processing:
2965-
post_processing = partial(post_processing, nullable=is_nullable)
29662942

29672943
func(**kwargs) # Call func to modify indexer values in place
29682944

@@ -2973,9 +2949,38 @@ def _get_cythonized_result(
29732949
result = algorithms.take_nd(values, result)
29742950

29752951
if post_processing:
2976-
result = post_processing(result, inferences)
2952+
pp_kwargs = {}
2953+
if needs_nullable:
2954+
pp_kwargs["nullable"] = isinstance(values, BaseMaskedArray)
29772955

2978-
key = base.OutputKey(label=name, position=idx)
2956+
result = post_processing(result, inferences, **pp_kwargs)
2957+
2958+
return result
2959+
2960+
error_msg = ""
2961+
for idx, obj in enumerate(self._iterate_slices()):
2962+
values = obj._values
2963+
2964+
if numeric_only and not is_numeric_dtype(values.dtype):
2965+
continue
2966+
2967+
try:
2968+
result = blk_func(values)
2969+
except TypeError as err:
2970+
error_msg = str(err)
2971+
howstr = how.replace("group_", "")
2972+
warnings.warn(
2973+
"Dropping invalid columns in "
2974+
f"{type(self).__name__}.{howstr} is deprecated. "
2975+
"In a future version, a TypeError will be raised. "
2976+
f"Before calling .{howstr}, select only columns which "
2977+
"should be valid for the function.",
2978+
FutureWarning,
2979+
stacklevel=3,
2980+
)
2981+
continue
2982+
2983+
key = base.OutputKey(label=obj.name, position=idx)
29792984
output[key] = result
29802985

29812986
# error_msg is "" on an frame/series with no rows or columns

pandas/core/indexes/base.py

Lines changed: 14 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -672,6 +672,11 @@ def _format_duplicate_message(self) -> DataFrame:
672672
assert len(duplicates)
673673

674674
out = Series(np.arange(len(self))).groupby(self).agg(list)[duplicates]
675+
if self._is_multi:
676+
# test_format_duplicate_labels_message_multi
677+
# error: "Type[Index]" has no attribute "from_tuples" [attr-defined]
678+
out.index = type(self).from_tuples(out.index) # type: ignore[attr-defined]
679+
675680
if self.nlevels == 1:
676681
out = out.rename_axis("label")
677682
return out.to_frame(name="positions")
@@ -5400,22 +5405,15 @@ def _get_indexer_strict(self, key, axis_name: str_t) -> tuple[Index, np.ndarray]
54005405

54015406
self._raise_if_missing(keyarr, indexer, axis_name)
54025407

5403-
if (
5404-
needs_i8_conversion(self.dtype)
5405-
or is_categorical_dtype(self.dtype)
5406-
or is_interval_dtype(self.dtype)
5407-
):
5408-
# For CategoricalIndex take instead of reindex to preserve dtype.
5409-
# For IntervalIndex this is to map integers to the Intervals they match to.
5410-
keyarr = self.take(indexer)
5411-
if keyarr.dtype.kind in ["m", "M"]:
5412-
# DTI/TDI.take can infer a freq in some cases when we dont want one
5413-
if isinstance(key, list) or (
5414-
isinstance(key, type(self))
5415-
# "Index" has no attribute "freq"
5416-
and key.freq is None # type: ignore[attr-defined]
5417-
):
5418-
keyarr = keyarr._with_freq(None)
5408+
keyarr = self.take(indexer)
5409+
if keyarr.dtype.kind in ["m", "M"]:
5410+
# DTI/TDI.take can infer a freq in some cases when we dont want one
5411+
if isinstance(key, list) or (
5412+
isinstance(key, type(self))
5413+
# "Index" has no attribute "freq"
5414+
and key.freq is None # type: ignore[attr-defined]
5415+
):
5416+
keyarr = keyarr._with_freq(None)
54195417

54205418
return keyarr, indexer
54215419

pandas/core/internals/construction.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -615,6 +615,8 @@ def _extract_index(data) -> Index:
615615
elif is_list_like(val) and getattr(val, "ndim", 1) == 1:
616616
have_raw_arrays = True
617617
raw_lengths.append(len(val))
618+
elif isinstance(val, np.ndarray) and val.ndim > 1:
619+
raise ValueError("Per-column arrays must each be 1-dimensional")
618620

619621
if not indexes and not raw_lengths:
620622
raise ValueError("If using all scalar values, you must pass an index")

pandas/tests/computation/test_eval.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1865,6 +1865,35 @@ def test_invalid_engine():
18651865
pd.eval("x + y", local_dict={"x": 1, "y": 2}, engine="asdf")
18661866

18671867

1868+
@td.skip_if_no_ne
1869+
@pytest.mark.parametrize(
1870+
("use_numexpr", "expected"),
1871+
(
1872+
(True, "numexpr"),
1873+
(False, "python"),
1874+
),
1875+
)
1876+
def test_numexpr_option_respected(use_numexpr, expected):
1877+
# GH 32556
1878+
from pandas.core.computation.eval import _check_engine
1879+
1880+
with pd.option_context("compute.use_numexpr", use_numexpr):
1881+
result = _check_engine(None)
1882+
assert result == expected
1883+
1884+
1885+
@td.skip_if_no_ne
1886+
def test_numexpr_option_incompatible_op():
1887+
# GH 32556
1888+
with pd.option_context("compute.use_numexpr", False):
1889+
df = DataFrame(
1890+
{"A": [True, False, True, False, None, None], "B": [1, 2, 3, 4, 5, 6]}
1891+
)
1892+
result = df.query("A.isnull()")
1893+
expected = DataFrame({"A": [None, None], "B": [5, 6]}, index=[4, 5])
1894+
tm.assert_frame_equal(result, expected)
1895+
1896+
18681897
@td.skip_if_no_ne
18691898
def test_invalid_parser():
18701899
msg = "Invalid parser 'asdf' passed"

0 commit comments

Comments
 (0)