Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
57 commits
Select commit Hold shift + click to select a range
ae51cff
ENH: 2D support for MaskedArray
jbrockmendel Jan 6, 2021
f608792
Merge branch 'master' of https://github.com/pandas-dev/pandas into en…
jbrockmendel Jan 6, 2021
125606b
remove Any part of _mask annotation
jbrockmendel Jan 6, 2021
dd5dbbe
xfail for ArrowStringArray
jbrockmendel Jan 6, 2021
577826c
Merge branch 'master' of https://github.com/pandas-dev/pandas into en…
jbrockmendel Feb 3, 2021
17f63d4
absolute import
jbrockmendel Feb 3, 2021
33b2d78
Merge branch 'master' of https://github.com/pandas-dev/pandas into en…
jbrockmendel Feb 4, 2021
a2bd7b1
Merge branch 'master' of https://github.com/pandas-dev/pandas into en…
jbrockmendel Feb 5, 2021
3f14fa3
TST: reductions with axis
jbrockmendel Feb 5, 2021
6600588
Merge branch 'master' of https://github.com/pandas-dev/pandas into en…
jbrockmendel Feb 6, 2021
560279c
Merge branch 'master' into enh-masked-2d
jbrockmendel Feb 7, 2021
553038c
np_version_under1p17 compat
jbrockmendel Feb 7, 2021
b2a26bf
Merge branch 'master' into enh-masked-2d
jbrockmendel Feb 12, 2021
8a40d59
Merge branch 'master' into enh-masked-2d
jbrockmendel Feb 12, 2021
44999d1
xfail syntax
jbrockmendel Feb 12, 2021
7a6c226
typo fixup
jbrockmendel Feb 12, 2021
638bd9c
Merge branch 'master' into enh-masked-2d
jbrockmendel Feb 13, 2021
aca12e6
Merge branch 'master' into enh-masked-2d
jbrockmendel Feb 14, 2021
f27f8c0
Merge branch 'master' into enh-masked-2d
jbrockmendel Feb 15, 2021
3810660
Merge branch 'master' into enh-masked-2d
jbrockmendel Feb 19, 2021
f0957b3
Merge branch 'master' into enh-masked-2d
jbrockmendel Feb 22, 2021
f538868
Merge branch 'master' into enh-masked-2d
jbrockmendel Feb 23, 2021
6664d0d
isort fixup
jbrockmendel Feb 23, 2021
2792724
Merge branch 'master' into enh-masked-2d
jbrockmendel Feb 26, 2021
061a53c
Merge branch 'master' into enh-masked-2d
jbrockmendel Mar 2, 2021
6032ed1
Merge branch 'master' into enh-masked-2d
jbrockmendel Mar 3, 2021
543258d
Merge branch 'master' into enh-masked-2d
jbrockmendel Mar 6, 2021
e96ec33
Merge branch 'master' into enh-masked-2d
jbrockmendel Mar 9, 2021
6ca7f01
Merge branch 'master' into enh-masked-2d
jbrockmendel Mar 10, 2021
6f26c4b
Fix pad/backfill 2d
jbrockmendel Mar 12, 2021
3dedb8f
Merge branch 'master' into enh-masked-2d
jbrockmendel Mar 12, 2021
34fda97
typo fixup
jbrockmendel Mar 12, 2021
2a108ba
Merge branch 'master' into enh-masked-2d
jbrockmendel Mar 15, 2021
2c99e59
Merge branch 'master' into enh-masked-2d
jbrockmendel Mar 16, 2021
ee9c3a0
Merge branch 'master' into enh-masked-2d
jbrockmendel Mar 31, 2021
efd0071
Merge branch 'master' into enh-masked-2d
jbrockmendel Mar 31, 2021
3839b98
Merge branch 'master' into enh-masked-2d
jbrockmendel Apr 8, 2021
0956993
Merge branch 'master' into enh-masked-2d
jbrockmendel Apr 14, 2021
4b75101
comment
jbrockmendel Apr 14, 2021
639fc23
Merge branch 'master' into enh-masked-2d
jbrockmendel May 4, 2021
2989efc
Merge branch 'master' into enh-masked-2d
jbrockmendel May 9, 2021
81cd3e4
Merge branch 'master' into enh-masked-2d
jbrockmendel May 17, 2021
8f315bc
Merge branch 'master' into enh-masked-2d
jbrockmendel Aug 3, 2021
21cf578
fix broken tests
jbrockmendel Aug 4, 2021
6f215d6
Merge branch 'master' into enh-masked-2d
jbrockmendel Sep 28, 2021
e68c797
Merge branch 'master' into enh-masked-2d
jbrockmendel Sep 29, 2021
17dd19a
Merge branch 'master' into enh-masked-2d
jbrockmendel Oct 3, 2021
93d65eb
Merge branch 'master' into enh-masked-2d
jbrockmendel Oct 6, 2021
3bfe60c
comment
jbrockmendel Oct 6, 2021
5c28d69
Merge branch 'master' into enh-masked-2d
jbrockmendel Oct 8, 2021
92d710b
Merge branch 'master' of https://github.com/pandas-dev/pandas into en…
jbrockmendel Oct 10, 2021
5b014c1
troubleshoot windows build
jbrockmendel Oct 11, 2021
db76ca0
Merge branch 'master' into enh-masked-2d
jbrockmendel Oct 11, 2021
8148fcd
Merge branch 'master' into enh-masked-2d
jbrockmendel Oct 13, 2021
15a533f
troubleshoot 32bit builds
jbrockmendel Oct 13, 2021
7a7601e
troubleshoot 32bit builds
jbrockmendel Oct 14, 2021
7c6baaf
troubleshoot 32 bit builds
jbrockmendel Oct 14, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Fix pad/backfill 2d
  • Loading branch information
jbrockmendel committed Mar 12, 2021
commit 6f26c4b78acaaab1ace094346edcc7e612d883cf
8 changes: 5 additions & 3 deletions pandas/_libs/algos.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -629,7 +629,7 @@ def pad_inplace(algos_t[:] values, uint8_t[:] mask, limit=None):

@cython.boundscheck(False)
@cython.wraparound(False)
def pad_2d_inplace(algos_t[:, :] values, const uint8_t[:, :] mask, limit=None):
def pad_2d_inplace(algos_t[:, :] values, uint8_t[:, :] mask, limit=None):
cdef:
Py_ssize_t i, j, N, K
algos_t val
Expand All @@ -648,10 +648,11 @@ def pad_2d_inplace(algos_t[:, :] values, const uint8_t[:, :] mask, limit=None):
val = values[j, 0]
for i in range(N):
if mask[j, i]:
if fill_count >= lim:
if fill_count >= lim or i == 0:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

continue
fill_count += 1
values[j, i] = val
mask[j, i] = False
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

mask should be previous mask... see pad_inplace #39953

import numpy as np
import pandas as pd

dtype = pd.Int64Dtype()

data_missing = pd.array([pd.NA, 1], dtype=dtype)

arr = data_missing.repeat(4).reshape(4, 2)

result = arr.fillna(method="pad")
print(result)

expected = data_missing.fillna(method="pad").repeat(4).reshape(4, 2)
print(expected)
<IntegerArray>
[
[<NA>, <NA>],
[1, 1],
[1, 1],
[1, 1]
]
Shape: (4, 2), dtype: Int64
<IntegerArray>
[
[<NA>, <NA>],
[<NA>, <NA>],
[1, 1],
[1, 1]
]
Shape: (4, 2), dtype: Int64

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so fixing this on my numba branch results in...

@numba.njit
def _pad_2d_inplace(values, mask, limit=None):
    if values.shape[1]:
        K, N = values.shape
        if limit is None:
            for j in range(K):
                val, prev_mask = values[j, 0], mask[j, 0]
                for i in range(N):
                    if mask[j, i]:
                        values[j, i], mask[j, i] = val, prev_mask
                    else:
                        val, prev_mask = values[j, i], mask[j, i]
        else:
            for j in range(K):
                fill_count = 0
                val, prev_mask = values[j, 0], mask[j, 0]
                for i in range(N):
                    if mask[j, i]:
                        if fill_count >= limit:
                            continue
                        fill_count += 1
                        values[j, i], mask[j, i] = val, prev_mask
                    else:
                        fill_count = 0
                        val, prev_mask = values[j, i], mask[j, i]

I have some duplication here but a perf improvement for the common case of no limit, the duplication can probably be mitigated by reshaping a 1d array and removing the 1d version pad_inplace

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I might look into a variant using 2 loops, the first to find the first not missing value. and the second to fill without tracking the previous mask, then we could just do mask[j, i] = False

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One more thing while in the neighborhood, I think for i in range(N) should probably be for i in range(1, N)?

else:
fill_count = 0
val = values[j, i]
Expand Down Expand Up @@ -776,7 +777,7 @@ def backfill_inplace(algos_t[:] values, uint8_t[:] mask, limit=None):
@cython.boundscheck(False)
@cython.wraparound(False)
def backfill_2d_inplace(algos_t[:, :] values,
const uint8_t[:, :] mask,
uint8_t[:, :] mask,
limit=None):
cdef:
Py_ssize_t i, j, N, K
Expand All @@ -800,6 +801,7 @@ def backfill_2d_inplace(algos_t[:, :] values,
continue
fill_count += 1
values[j, i] = val
mask[j, i] = False
else:
fill_count = 0
val = values[j, i]
Expand Down
16 changes: 8 additions & 8 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,6 @@
extract_array,
sanitize_array,
)
from pandas.core.missing import interpolate_2d
from pandas.core.ops.common import unpack_zerodim_and_defer
from pandas.core.sorting import nargsort
from pandas.core.strings.object_array import ObjectStringArrayMixin
Expand Down Expand Up @@ -1776,13 +1775,9 @@ def fillna(self, value=None, method=None, limit=None):

if method is not None:
# pad / bfill

# TODO: dispatch when self.categories is EA-dtype
values = np.asarray(self).reshape(-1, len(self))
values = interpolate_2d(values, method, 0, None).astype(
self.categories.dtype
)[0]
codes = _get_codes_for_values(values, self.categories)
return NDArrayBackedExtensionArray.fillna(
self, None, method=method, limit=limit
)

else:
# We copy even if there is nothing to fill
Expand Down Expand Up @@ -2604,6 +2599,11 @@ def _get_codes_for_values(values, categories: Index) -> np.ndarray:
"""
dtype_equal = is_dtype_equal(values.dtype, categories.dtype)

if values.ndim > 1:
flat = values.ravel()
codes = _get_codes_for_values(flat, categories)
return codes.reshape(values.shape)

if is_extension_array_dtype(categories.dtype) and is_object_dtype(values):
# Support inferring the correct extension dtype from an array of
# scalar objects. e.g.
Expand Down
8 changes: 4 additions & 4 deletions pandas/core/arrays/masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,13 +170,13 @@ def fillna(

if mask.any():
if method is not None:
func = missing.get_fill_func(method)
func = missing.get_fill_func(method, ndim=self.ndim)
new_values, new_mask = func(
self._data.copy(),
self._data.copy().T,
limit=limit,
mask=mask.copy(),
mask=mask.copy().T,
)
return type(self)(new_values, new_mask.view(np.bool_))
return type(self)(new_values.T, new_mask.view(np.bool_).T)
else:
# fill with value
new_values = self.copy()
Expand Down