Skip to content
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Review (jreback)
  • Loading branch information
h-vetinari committed Oct 11, 2018
commit 285a1f7b8c382e9d36f1971e4d48efe887da815b
38 changes: 21 additions & 17 deletions pandas/core/strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from pandas.core.dtypes.generic import ABCSeries, ABCIndex
from pandas.core.dtypes.missing import isna
from pandas.core.dtypes.common import (
ensure_object,
is_bool_dtype,
is_categorical_dtype,
is_object_dtype,
Expand Down Expand Up @@ -36,13 +37,13 @@
_shared_docs = dict()


def interleave_sep(all_cols, sep):
'''
def interleave_sep(list_of_columns, sep):
"""
Auxiliary function for :meth:`str.cat`

Parameters
----------
all_cols : list of numpy arrays
list_of_columns : list of numpy arrays
List of arrays to be concatenated with sep
sep : string
The separator string for concatenating the columns
Expand All @@ -51,12 +52,12 @@ def interleave_sep(all_cols, sep):
-------
list
The list of arrays interleaved with sep; to be fed to np.sum
'''
"""
if sep == '':
# no need to add empty strings
return all_cols
result = [sep] * (2 * len(all_cols) - 1)
result[::2] = all_cols
return list_of_columns
result = [sep] * (2 * len(list_of_columns) - 1)
result[::2] = list_of_columns
return result
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would simply do np.sum(result) here, no?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, that's reasonable. Refactored the function as necessary



Expand Down Expand Up @@ -2207,12 +2208,12 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):

# concatenate Series/Index with itself if no "others"
if others is None:
data = data.astype(object).values
data = ensure_object(data)
mask = isna(data)
if mask.any():
if na_rep is None:
return sep.join(data[~mask])
return sep.join(np.where(mask, na_rep, data))
if na_rep is None and mask.any():
data = data[~mask]
elif na_rep is not None and mask.any():
data = np.where(mask, na_rep, data)
return sep.join(data)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we do a single sep.join, and just have the branches mask the data as needed

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done


try:
Expand Down Expand Up @@ -2251,11 +2252,13 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):
data, others = data.align(others, join=join)
others = [others[x] for x in others] # again list of Series

all_cols = [x.astype(object).values for x in [data] + others]
all_cols = [ensure_object(x) for x in [data] + others]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Assuming the index is aligned here can we alternately just concat the columns together and call ensure_object on the entire frame instead of using a list comprehension?

masks = np.array([isna(x) for x in all_cols])
union_mask = np.logical_or.reduce(masks, axis=0)

if na_rep is None and union_mask.any():
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

comment on these cases

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added comments

# no na_rep means NaNs for all rows where any column has a NaN
# only necessary if there are actually any NaNs
result = np.empty(len(data), dtype=object)
np.putmask(result, union_mask, np.nan)

Expand All @@ -2264,11 +2267,12 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):

result[not_masked] = np.sum(all_cols, axis=0)
elif na_rep is not None and union_mask.any():
# fill NaNs
all_cols = [np.where(masks[i], na_rep, all_cols[i])
for i in range(len(all_cols))]
# fill NaNs with na_rep in case there are actually any NaNs
all_cols = [np.where(mask, na_rep, col)
for mask, col in zip(masks, all_cols)]
result = np.sum(interleave_sep(all_cols, sep), axis=0)
else: # no NaNs
else:
# no NaNs - can just concatenate
result = np.sum(interleave_sep(all_cols, sep), axis=0)

if isinstance(self._orig, Index):
Expand Down