Skip to content

Commit f538868

Browse files
committed
Merge branch 'master' into enh-masked-2d
2 parents f0957b3 + be35ea2 commit f538868

File tree

28 files changed

+1065
-1074
lines changed

28 files changed

+1065
-1074
lines changed

doc/source/getting_started/install.rst

Lines changed: 84 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -255,47 +255,52 @@ For example, :func:`pandas.read_hdf` requires the ``pytables`` package, while
255255
optional dependency is not installed, pandas will raise an ``ImportError`` when
256256
the method requiring that dependency is called.
257257

258+
Visualization
259+
^^^^^^^^^^^^^
260+
258261
========================= ================== =============================================================
259262
Dependency Minimum Version Notes
260263
========================= ================== =============================================================
261-
BeautifulSoup4 4.6.0 HTML parser for read_html (see :ref:`note <optional_html>`)
264+
matplotlib 2.2.3 Plotting library
262265
Jinja2 2.10 Conditional formatting with DataFrame.style
263-
PyQt4 Clipboard I/O
264-
PyQt5 Clipboard I/O
265-
PyTables 3.5.1 HDF5-based reading / writing
266-
SQLAlchemy 1.3.0 SQL support for databases other than sqlite
266+
tabulate 0.8.7 Printing in Markdown-friendly format (see `tabulate`_)
267+
========================= ================== =============================================================
268+
269+
Computation
270+
^^^^^^^^^^^
271+
272+
========================= ================== =============================================================
273+
Dependency Minimum Version Notes
274+
========================= ================== =============================================================
267275
SciPy 1.12.0 Miscellaneous statistical functions
268-
xlsxwriter 1.0.2 Excel writing
269-
blosc 1.17.0 Compression for HDF5
270-
fsspec 0.7.4 Handling files aside from local and HTTP
271-
fastparquet 0.4.0 Parquet reading / writing
272-
gcsfs 0.6.0 Google Cloud Storage access
273-
html5lib 1.0.1 HTML parser for read_html (see :ref:`note <optional_html>`)
274-
lxml 4.3.0 HTML parser for read_html (see :ref:`note <optional_html>`)
275-
matplotlib 2.2.3 Visualization
276276
numba 0.46.0 Alternative execution engine for rolling operations
277+
(see :ref:`Enhancing Performance <enhancingperf.numba>`)
278+
xarray 0.12.3 pandas-like API for N-dimensional data
279+
========================= ================== =============================================================
280+
281+
Excel files
282+
^^^^^^^^^^^
283+
284+
========================= ================== =============================================================
285+
Dependency Minimum Version Notes
286+
========================= ================== =============================================================
287+
xlrd 1.2.0 Reading Excel
288+
xlwt 1.3.0 Writing Excel
289+
xlsxwriter 1.0.2 Writing Excel
277290
openpyxl 3.0.0 Reading / writing for xlsx files
278-
pandas-gbq 0.12.0 Google Big Query access
279-
psycopg2 2.7 PostgreSQL engine for sqlalchemy
280-
pyarrow 0.15.0 Parquet, ORC, and feather reading / writing
281-
pymysql 0.8.1 MySQL engine for sqlalchemy
282-
pyreadstat SPSS files (.sav) reading
283291
pyxlsb 1.0.6 Reading for xlsb files
284-
qtpy Clipboard I/O
285-
s3fs 0.4.0 Amazon S3 access
286-
tabulate 0.8.7 Printing in Markdown-friendly format (see `tabulate`_)
287-
xarray 0.12.3 pandas-like API for N-dimensional data
288-
xclip Clipboard I/O on linux
289-
xlrd 1.2.0 Excel reading
290-
xlwt 1.3.0 Excel writing
291-
xsel Clipboard I/O on linux
292-
zlib Compression for HDF5
293292
========================= ================== =============================================================
294293

295-
.. _optional_html:
294+
HTML
295+
^^^^
296296

297-
Optional dependencies for parsing HTML
298-
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
297+
========================= ================== =============================================================
298+
Dependency Minimum Version Notes
299+
========================= ================== =============================================================
300+
BeautifulSoup4 4.6.0 HTML parser for read_html
301+
html5lib 1.0.1 HTML parser for read_html
302+
lxml 4.3.0 HTML parser for read_html
303+
========================= ================== =============================================================
299304

300305
One of the following combinations of libraries is needed to use the
301306
top-level :func:`~pandas.read_html` function:
@@ -320,3 +325,52 @@ top-level :func:`~pandas.read_html` function:
320325
.. _BeautifulSoup4: https://www.crummy.com/software/BeautifulSoup
321326
.. _lxml: https://lxml.de
322327
.. _tabulate: https://github.com/astanin/python-tabulate
328+
329+
SQL databases
330+
^^^^^^^^^^^^^
331+
332+
========================= ================== =============================================================
333+
Dependency Minimum Version Notes
334+
========================= ================== =============================================================
335+
SQLAlchemy 1.3.0 SQL support for databases other than sqlite
336+
psycopg2 2.7 PostgreSQL engine for sqlalchemy
337+
pymysql 0.8.1 MySQL engine for sqlalchemy
338+
========================= ================== =============================================================
339+
340+
Other data sources
341+
^^^^^^^^^^^^^^^^^^
342+
343+
========================= ================== =============================================================
344+
Dependency Minimum Version Notes
345+
========================= ================== =============================================================
346+
PyTables 3.5.1 HDF5-based reading / writing
347+
blosc 1.17.0 Compression for HDF5
348+
zlib Compression for HDF5
349+
fastparquet 0.4.0 Parquet reading / writing
350+
pyarrow 0.15.0 Parquet, ORC, and feather reading / writing
351+
pyreadstat SPSS files (.sav) reading
352+
========================= ================== =============================================================
353+
354+
Access data in the cloud
355+
^^^^^^^^^^^^^^^^^^^^^^^^
356+
357+
========================= ================== =============================================================
358+
Dependency Minimum Version Notes
359+
========================= ================== =============================================================
360+
fsspec 0.7.4 Handling files aside from simple local and HTTP
361+
gcsfs 0.6.0 Google Cloud Storage access
362+
pandas-gbq 0.12.0 Google Big Query access
363+
s3fs 0.4.0 Amazon S3 access
364+
========================= ================== =============================================================
365+
366+
Clipboard
367+
^^^^^^^^^
368+
369+
========================= ================== =============================================================
370+
Dependency Minimum Version Notes
371+
========================= ================== =============================================================
372+
PyQt4/PyQt5 Clipboard I/O
373+
qtpy Clipboard I/O
374+
xclip Clipboard I/O on linux
375+
xsel Clipboard I/O on linux
376+
========================= ================== =============================================================

doc/source/user_guide/cookbook.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1410,7 +1410,7 @@ Often it's useful to obtain the lower (or upper) triangular form of a correlatio
14101410
14111411
corr_mat.where(mask)
14121412
1413-
The ``method`` argument within ``DataFrame.corr`` can accept a callable in addition to the named correlation types. Here we compute the ``distance correlation <https://en.wikipedia.org/wiki/Distance_correlation>``__ matrix for a ``DataFrame`` object.
1413+
The ``method`` argument within ``DataFrame.corr`` can accept a callable in addition to the named correlation types. Here we compute the `distance correlation <https://en.wikipedia.org/wiki/Distance_correlation>`__ matrix for a ``DataFrame`` object.
14141414

14151415
.. ipython:: python
14161416

doc/source/user_guide/style.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1154,7 +1154,7 @@
11541154
"metadata": {},
11551155
"outputs": [],
11561156
"source": [
1157-
"from IPython.html import widgets\n",
1157+
"from ipywidgets import widgets\n",
11581158
"@widgets.interact\n",
11591159
"def f(h_neg=(0, 359, 1), h_pos=(0, 359), s=(0., 99.9), l=(0., 99.9)):\n",
11601160
" return df.style.background_gradient(\n",

doc/source/whatsnew/v1.2.3.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ Fixed regressions
1616
~~~~~~~~~~~~~~~~~
1717

1818
- Fixed regression in :meth:`~DataFrame.to_excel` raising ``KeyError`` when giving duplicate columns with ``columns`` attribute (:issue:`39695`)
19-
-
19+
- Fixed regression in :meth:`DataFrame.__setitem__` not aligning :class:`DataFrame` on right-hand side for boolean indexer (:issue:`39931`)
2020

2121
.. ---------------------------------------------------------------------------
2222

doc/source/whatsnew/v1.3.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -482,6 +482,7 @@ Other
482482
- Bug in :class:`Styler` where rendered HTML was missing a column class identifier for certain header cells (:issue:`39716`)
483483
- Bug in :meth:`Styler.background_gradient` where text-color was not determined correctly (:issue:`39888`)
484484
- Bug in :meth:`DataFrame.equals`, :meth:`Series.equals`, :meth:`Index.equals` with object-dtype containing ``np.datetime64("NaT")`` or ``np.timedelta64("NaT")`` (:issue:`39650`)
485+
- Bug in :func:`pandas.util.show_versions` where console JSON output was not proper JSON (:issue:`39701`)
485486

486487

487488
.. ---------------------------------------------------------------------------

pandas/core/frame.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3264,6 +3264,9 @@ def _setitem_array(self, key, value):
32643264
key = check_bool_indexer(self.index, key)
32653265
indexer = key.nonzero()[0]
32663266
self._check_setitem_copy()
3267+
if isinstance(value, DataFrame):
3268+
# GH#39931 reindex since iloc does not align
3269+
value = value.reindex(self.index.take(indexer))
32673270
self.iloc[indexer] = value
32683271
else:
32693272
if isinstance(value, DataFrame):

pandas/core/internals/blocks.py

Lines changed: 20 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717

1818
from pandas._libs import (
1919
Interval,
20-
NaT,
2120
Period,
2221
Timestamp,
2322
algos as libalgos,
@@ -45,6 +44,7 @@
4544
maybe_downcast_numeric,
4645
maybe_downcast_to_dtype,
4746
maybe_upcast,
47+
sanitize_to_nanoseconds,
4848
soft_convert_objects,
4949
)
5050
from pandas.core.dtypes.common import (
@@ -72,6 +72,7 @@
7272
from pandas.core.dtypes.missing import (
7373
is_valid_na_for_dtype,
7474
isna,
75+
na_value_for_dtype,
7576
)
7677

7778
import pandas.core.algorithms as algos
@@ -95,11 +96,13 @@
9596
DatetimeArray,
9697
ExtensionArray,
9798
PandasArray,
98-
TimedeltaArray,
9999
)
100100
from pandas.core.base import PandasObject
101101
import pandas.core.common as com
102-
from pandas.core.construction import extract_array
102+
from pandas.core.construction import (
103+
ensure_wrapped_if_datetimelike,
104+
extract_array,
105+
)
103106
from pandas.core.indexers import (
104107
check_setitem_lengths,
105108
is_empty_indexer,
@@ -2095,8 +2098,6 @@ class DatetimeLikeBlockMixin(NDArrayBackedExtensionBlock):
20952098

20962099
is_numeric = False
20972100
_can_hold_na = True
2098-
_dtype: np.dtype
2099-
_holder: Type[Union[DatetimeArray, TimedeltaArray]]
21002101

21012102
@classmethod
21022103
def _maybe_coerce_values(cls, values):
@@ -2112,25 +2113,26 @@ def _maybe_coerce_values(cls, values):
21122113
Returns
21132114
-------
21142115
values : ndarray[datetime64ns/timedelta64ns]
2115-
2116-
Overridden by DatetimeTZBlock.
21172116
"""
2118-
if values.dtype != cls._dtype:
2119-
# non-nano we will convert to nano
2120-
if values.dtype.kind != cls._dtype.kind:
2121-
# caller is responsible for ensuring td64/dt64 dtype
2122-
raise TypeError(values.dtype) # pragma: no cover
2123-
2124-
values = cls._holder._from_sequence(values)._data
2125-
2126-
if isinstance(values, cls._holder):
2117+
values = extract_array(values, extract_numpy=True)
2118+
if isinstance(values, np.ndarray):
2119+
values = sanitize_to_nanoseconds(values)
2120+
elif isinstance(values.dtype, np.dtype):
2121+
# i.e. not datetime64tz
21272122
values = values._data
21282123

2129-
assert isinstance(values, np.ndarray), type(values)
21302124
return values
21312125

21322126
def array_values(self):
2133-
return self._holder._simple_new(self.values)
2127+
return ensure_wrapped_if_datetimelike(self.values)
2128+
2129+
@property
2130+
def _holder(self):
2131+
return type(self.array_values())
2132+
2133+
@property
2134+
def fill_value(self):
2135+
return na_value_for_dtype(self.dtype)
21342136

21352137
def to_native_types(self, na_rep="NaT", **kwargs):
21362138
""" convert to our native types format """
@@ -2142,9 +2144,6 @@ def to_native_types(self, na_rep="NaT", **kwargs):
21422144

21432145
class DatetimeBlock(DatetimeLikeBlockMixin):
21442146
__slots__ = ()
2145-
fill_value = np.datetime64("NaT", "ns")
2146-
_dtype = fill_value.dtype
2147-
_holder = DatetimeArray
21482147

21492148
def set_inplace(self, locs, values):
21502149
"""
@@ -2165,42 +2164,16 @@ class DatetimeTZBlock(ExtensionBlock, DatetimeBlock):
21652164
_can_hold_na = True
21662165
is_numeric = False
21672166

2168-
_holder = DatetimeArray
2169-
21702167
internal_values = Block.internal_values
21712168
_can_hold_element = DatetimeBlock._can_hold_element
21722169
to_native_types = DatetimeBlock.to_native_types
21732170
diff = DatetimeBlock.diff
2174-
fill_value = NaT
21752171
where = DatetimeBlock.where
21762172
putmask = DatetimeLikeBlockMixin.putmask
21772173
fillna = DatetimeLikeBlockMixin.fillna
21782174

21792175
array_values = ExtensionBlock.array_values
21802176

2181-
@classmethod
2182-
def _maybe_coerce_values(cls, values):
2183-
"""
2184-
Input validation for values passed to __init__. Ensure that
2185-
we have datetime64TZ, coercing if necessary.
2186-
2187-
Parameters
2188-
----------
2189-
values : array-like
2190-
Must be convertible to datetime64
2191-
2192-
Returns
2193-
-------
2194-
values : DatetimeArray
2195-
"""
2196-
if not isinstance(values, cls._holder):
2197-
values = cls._holder(values)
2198-
2199-
if values.tz is None:
2200-
raise ValueError("cannot create a DatetimeTZBlock without a tz")
2201-
2202-
return values
2203-
22042177
@property
22052178
def is_view(self) -> bool:
22062179
""" return a boolean if I am possibly a view """
@@ -2216,9 +2189,6 @@ def external_values(self):
22162189

22172190
class TimeDeltaBlock(DatetimeLikeBlockMixin):
22182191
__slots__ = ()
2219-
_holder = TimedeltaArray
2220-
fill_value = np.timedelta64("NaT", "ns")
2221-
_dtype = fill_value.dtype
22222192

22232193

22242194
class ObjectBlock(Block):

pandas/io/clipboard/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@
6666
EXCEPT_MSG = """
6767
Pyperclip could not find a copy/paste mechanism for your system.
6868
For more information, please visit
69-
https://pyperclip.readthedocs.io/en/latest/introduction.html#not-implemented-error
69+
https://pyperclip.readthedocs.io/en/latest/#not-implemented-error
7070
"""
7171

7272
ENCODING = "utf-8"

pandas/tests/frame/indexing/test_setitem.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -575,3 +575,12 @@ def test_setitem_boolean_mask(self, mask_type, float_frame):
575575
expected = df.copy()
576576
expected.values[np.array(mask)] = np.nan
577577
tm.assert_frame_equal(result, expected)
578+
579+
@pytest.mark.parametrize("indexer", [tm.setitem, tm.loc])
580+
def test_setitem_boolean_mask_aligning(self, indexer):
581+
# GH#39931
582+
df = DataFrame({"a": [1, 4, 2, 3], "b": [5, 6, 7, 8]})
583+
expected = df.copy()
584+
mask = df["a"] >= 3
585+
indexer(df)[mask] = indexer(df)[mask].sort_values("a")
586+
tm.assert_frame_equal(df, expected)

pandas/tests/frame/methods/test_astype.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -648,3 +648,32 @@ def test_astype_bytes(self):
648648
# GH#39474
649649
result = DataFrame(["foo", "bar", "baz"]).astype(bytes)
650650
assert result.dtypes[0] == np.dtype("S3")
651+
652+
653+
class TestAstypeCategorical:
654+
def test_astype_from_categorical3(self):
655+
df = DataFrame({"cats": [1, 2, 3, 4, 5, 6], "vals": [1, 2, 3, 4, 5, 6]})
656+
cats = Categorical([1, 2, 3, 4, 5, 6])
657+
exp_df = DataFrame({"cats": cats, "vals": [1, 2, 3, 4, 5, 6]})
658+
df["cats"] = df["cats"].astype("category")
659+
tm.assert_frame_equal(exp_df, df)
660+
661+
def test_astype_from_categorical4(self):
662+
df = DataFrame(
663+
{"cats": ["a", "b", "b", "a", "a", "d"], "vals": [1, 2, 3, 4, 5, 6]}
664+
)
665+
cats = Categorical(["a", "b", "b", "a", "a", "d"])
666+
exp_df = DataFrame({"cats": cats, "vals": [1, 2, 3, 4, 5, 6]})
667+
df["cats"] = df["cats"].astype("category")
668+
tm.assert_frame_equal(exp_df, df)
669+
670+
def test_categorical_astype_to_int(self, any_int_or_nullable_int_dtype):
671+
# GH#39402
672+
673+
df = DataFrame(data={"col1": pd.array([2.0, 1.0, 3.0])})
674+
df.col1 = df.col1.astype("category")
675+
df.col1 = df.col1.astype(any_int_or_nullable_int_dtype)
676+
expected = DataFrame(
677+
{"col1": pd.array([2, 1, 3], dtype=any_int_or_nullable_int_dtype)}
678+
)
679+
tm.assert_frame_equal(df, expected)

0 commit comments

Comments
 (0)