-
-
Notifications
You must be signed in to change notification settings - Fork 19.3k
CLN/ERR: str.cat internals #22725
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
CLN/ERR: str.cat internals #22725
Changes from 1 commit
5f8890c
285a1f7
28e7859
807f18e
ed27c66
0d3c6d2
36c6240
a97fe67
e58ec9d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Loading branch information
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,6 +5,7 @@ | |
| from pandas.core.dtypes.generic import ABCSeries, ABCIndex | ||
| from pandas.core.dtypes.missing import isna | ||
| from pandas.core.dtypes.common import ( | ||
| ensure_object, | ||
| is_bool_dtype, | ||
| is_categorical_dtype, | ||
| is_object_dtype, | ||
|
|
@@ -36,13 +37,13 @@ | |
| _shared_docs = dict() | ||
|
|
||
|
|
||
| def interleave_sep(all_cols, sep): | ||
| ''' | ||
| def interleave_sep(list_of_columns, sep): | ||
| """ | ||
| Auxiliary function for :meth:`str.cat` | ||
|
|
||
| Parameters | ||
| ---------- | ||
| all_cols : list of numpy arrays | ||
| list_of_columns : list of numpy arrays | ||
| List of arrays to be concatenated with sep | ||
| sep : string | ||
| The separator string for concatenating the columns | ||
|
|
@@ -51,12 +52,12 @@ def interleave_sep(all_cols, sep): | |
| ------- | ||
| list | ||
| The list of arrays interleaved with sep; to be fed to np.sum | ||
| ''' | ||
| """ | ||
| if sep == '': | ||
| # no need to add empty strings | ||
| return all_cols | ||
| result = [sep] * (2 * len(all_cols) - 1) | ||
| result[::2] = all_cols | ||
| return list_of_columns | ||
| result = [sep] * (2 * len(list_of_columns) - 1) | ||
| result[::2] = list_of_columns | ||
| return result | ||
|
|
||
|
|
||
|
|
@@ -2207,12 +2208,12 @@ def cat(self, others=None, sep=None, na_rep=None, join=None): | |
|
|
||
| # concatenate Series/Index with itself if no "others" | ||
| if others is None: | ||
| data = data.astype(object).values | ||
| data = ensure_object(data) | ||
| mask = isna(data) | ||
| if mask.any(): | ||
| if na_rep is None: | ||
| return sep.join(data[~mask]) | ||
| return sep.join(np.where(mask, na_rep, data)) | ||
| if na_rep is None and mask.any(): | ||
| data = data[~mask] | ||
| elif na_rep is not None and mask.any(): | ||
| data = np.where(mask, na_rep, data) | ||
| return sep.join(data) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can we do a single sep.join, and just have the branches mask the data as needed There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
|
|
||
| try: | ||
|
|
@@ -2251,11 +2252,13 @@ def cat(self, others=None, sep=None, na_rep=None, join=None): | |
| data, others = data.align(others, join=join) | ||
| others = [others[x] for x in others] # again list of Series | ||
|
|
||
| all_cols = [x.astype(object).values for x in [data] + others] | ||
| all_cols = [ensure_object(x) for x in [data] + others] | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Assuming the index is aligned here can we alternately just concat the columns together and call |
||
| masks = np.array([isna(x) for x in all_cols]) | ||
| union_mask = np.logical_or.reduce(masks, axis=0) | ||
|
|
||
| if na_rep is None and union_mask.any(): | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. comment on these cases There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added comments |
||
| # no na_rep means NaNs for all rows where any column has a NaN | ||
| # only necessary if there are actually any NaNs | ||
| result = np.empty(len(data), dtype=object) | ||
| np.putmask(result, union_mask, np.nan) | ||
|
|
||
|
|
@@ -2264,11 +2267,12 @@ def cat(self, others=None, sep=None, na_rep=None, join=None): | |
|
|
||
| result[not_masked] = np.sum(all_cols, axis=0) | ||
| elif na_rep is not None and union_mask.any(): | ||
| # fill NaNs | ||
| all_cols = [np.where(masks[i], na_rep, all_cols[i]) | ||
| for i in range(len(all_cols))] | ||
| # fill NaNs with na_rep in case there are actually any NaNs | ||
| all_cols = [np.where(mask, na_rep, col) | ||
| for mask, col in zip(masks, all_cols)] | ||
| result = np.sum(interleave_sep(all_cols, sep), axis=0) | ||
| else: # no NaNs | ||
| else: | ||
| # no NaNs - can just concatenate | ||
| result = np.sum(interleave_sep(all_cols, sep), axis=0) | ||
|
|
||
| if isinstance(self._orig, Index): | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I would simply do
np.sum(result)here, no?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
OK, that's reasonable. Refactored the function as necessary