Skip to content
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Refactor interleave_sep to use np.sum
  • Loading branch information
h-vetinari committed Oct 11, 2018
commit 28e78594e91640c1ea8928e1a6ef083cc225f228
26 changes: 13 additions & 13 deletions pandas/core/strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,28 +37,29 @@
_shared_docs = dict()


def interleave_sep(list_of_columns, sep):
def cat_core(list_of_columns, sep):
"""
Auxiliary function for :meth:`str.cat`

Parameters
----------
list_of_columns : list of numpy arrays
List of arrays to be concatenated with sep
List of arrays to be concatenated with sep;
these arrays may not contain NaNs!
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is with this documented limitation? Believe in master that NaN is valid and returns NaN when passed in

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@WillAyd
the new helper function cat_core has nothing to do with the existing (essentially fully-fledged up to index-handling) str_cat. It's an internal function to avoid lots of copied code, and (as it just wraps np.sum) is not nan-safe for string values.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@WillAyd
Does that answer your question? what would you like to see for this docstring?

sep : string
The separator string for concatenating the columns

Returns
-------
list
The list of arrays interleaved with sep; to be fed to np.sum
nd.array
The concatenation of list_of_columns with sep
"""
if sep == '':
# no need to add empty strings
return list_of_columns
result = [sep] * (2 * len(list_of_columns) - 1)
result[::2] = list_of_columns
return result
return np.sum(list_of_columns, axis=0)
list_with_sep = [sep] * (2 * len(list_of_columns) - 1)
list_with_sep[::2] = list_of_columns
return np.sum(list_with_sep, axis=0)


def _na_map(f, arr, na_result=np.nan, dtype=object):
Expand Down Expand Up @@ -2263,17 +2264,16 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):
np.putmask(result, union_mask, np.nan)

not_masked = ~union_mask
all_cols = interleave_sep([x[not_masked] for x in all_cols], sep)

result[not_masked] = np.sum(all_cols, axis=0)
result[not_masked] = cat_core([x[not_masked] for x in all_cols],
sep)
elif na_rep is not None and union_mask.any():
# fill NaNs with na_rep in case there are actually any NaNs
all_cols = [np.where(mask, na_rep, col)
for mask, col in zip(masks, all_cols)]
result = np.sum(interleave_sep(all_cols, sep), axis=0)
result = cat_core(all_cols, sep)
else:
# no NaNs - can just concatenate
result = np.sum(interleave_sep(all_cols, sep), axis=0)
result = cat_core(all_cols, sep)

if isinstance(self._orig, Index):
result = Index(result, name=self._orig.name)
Expand Down