-
-
Notifications
You must be signed in to change notification settings - Fork 19.3k
Better error for str.cat with listlike of wrong dtype. #26607
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
cd9aa24
fee9612
fd710de
e7f0d7e
bfca6d1
02f6429
cb73704
3fb1411
9752aa7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -575,7 +575,7 @@ Strings | |
| ^^^^^^^ | ||
|
|
||
| - Bug in the ``__name__`` attribute of several methods of :class:`Series.str`, which were set incorrectly (:issue:`23551`) | ||
| - | ||
| - Improved error message when passing ``Series`` of wrong dtype to :meth:`Series.str.cat` (:issue:`22722`) | ||
|
||
| - | ||
|
|
||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2280,6 +2280,23 @@ def cat(self, others=None, sep=None, na_rep=None, join=None): | |
| 'must all be of the same length as the ' | ||
| 'calling Series/Index.') | ||
|
|
||
| # data has already been checked by _validate to be of correct dtype, | ||
| # but others could still have Series of dtypes (e.g. integers) which | ||
| # will necessarily fail in concatenation. To avoid deep and confusing | ||
| # traces, we raise here for anything that's not object or all-NA float. | ||
| def _legal_dtype(series): | ||
|
||
| # unify dtype handling between categorical/non-categorical | ||
| dtype = (series.dtype if not is_categorical_dtype(series) | ||
| else series.cat.categories.dtype) | ||
| legal = dtype == 'O' or (dtype == 'float' and series.isna().all()) | ||
| return legal | ||
| err_wrong_dtype = ('Can only concatenate list-likes containing only ' | ||
| 'strings (or missing values).') | ||
| if any(not _legal_dtype(x) for x in others): | ||
| raise TypeError(err_wrong_dtype + ' Received list-like of dtype: ' | ||
| '{}'.format([x.dtype for x in others | ||
| if not _legal_dtype(x)][0])) | ||
|
|
||
| if join is None and warn: | ||
| warnings.warn("A future version of pandas will perform index " | ||
| "alignment when `others` is a Series/Index/" | ||
|
|
@@ -2307,23 +2324,28 @@ def cat(self, others=None, sep=None, na_rep=None, join=None): | |
| na_masks = np.array([isna(x) for x in all_cols]) | ||
| union_mask = np.logical_or.reduce(na_masks, axis=0) | ||
|
|
||
| if na_rep is None and union_mask.any(): | ||
| # no na_rep means NaNs for all rows where any column has a NaN | ||
| # only necessary if there are actually any NaNs | ||
| result = np.empty(len(data), dtype=object) | ||
| np.putmask(result, union_mask, np.nan) | ||
|
|
||
| not_masked = ~union_mask | ||
| result[not_masked] = cat_core([x[not_masked] for x in all_cols], | ||
| sep) | ||
| elif na_rep is not None and union_mask.any(): | ||
| # fill NaNs with na_rep in case there are actually any NaNs | ||
| all_cols = [np.where(nm, na_rep, col) | ||
| for nm, col in zip(na_masks, all_cols)] | ||
| result = cat_core(all_cols, sep) | ||
| else: | ||
| # no NaNs - can just concatenate | ||
| result = cat_core(all_cols, sep) | ||
| # if there are any non-string, non-null values hidden within an object | ||
| # dtype, cat_core will fail; catch error and return with better message | ||
| try: | ||
| if na_rep is None and union_mask.any(): | ||
| # no na_rep means NaNs for all rows where any column has a NaN | ||
| # only necessary if there are actually any NaNs | ||
| result = np.empty(len(data), dtype=object) | ||
| np.putmask(result, union_mask, np.nan) | ||
|
|
||
| not_masked = ~union_mask | ||
| result[not_masked] = cat_core([x[not_masked] | ||
| for x in all_cols], sep) | ||
|
||
| elif na_rep is not None and union_mask.any(): | ||
| # fill NaNs with na_rep in case there are actually any NaNs | ||
| all_cols = [np.where(nm, na_rep, col) | ||
| for nm, col in zip(na_masks, all_cols)] | ||
| result = cat_core(all_cols, sep) | ||
| else: | ||
| # no NaNs - can just concatenate | ||
| result = cat_core(all_cols, sep) | ||
| except TypeError: | ||
| raise TypeError(err_wrong_dtype) | ||
|
|
||
| if isinstance(self._orig, Index): | ||
| # add dtype for case that result is all-NA | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.