Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
BUG: ensure we use group sizes, not group counts, in transform (GH9697)
  • Loading branch information
dsm054 committed Mar 22, 2015
commit 8bae0d42db5e4fbd93407a016b35267f830a4ace
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v0.16.1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,5 @@ Performance Improvements

Bug Fixes
~~~~~~~~~

- Bug in ``transform`` causing length mismatch when null entries were present and a fast aggregator was being used (:issue:`9697`)
2 changes: 1 addition & 1 deletion pandas/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -2453,7 +2453,7 @@ def _transform_fast(self, func):
if isinstance(func, compat.string_types):
func = getattr(self,func)
values = func().values
counts = self.count().values
counts = self.size().values
values = np.repeat(values, com._ensure_platform_int(counts))

return self._set_result_index_ordered(Series(values))
Expand Down
13 changes: 13 additions & 0 deletions pandas/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1058,6 +1058,19 @@ def test_transform_function_aliases(self):
expected = self.df.groupby('A')['C'].transform(np.mean)
assert_series_equal(result, expected)

def test_transform_length(self):
# GH 9697
df = pd.DataFrame({'col1':[1,1,2,2], 'col2':[1,2,3,np.nan]})
expected = pd.Series([3.0]*4)
def nsum(x):
return np.nansum(x)
results = [df.groupby('col1').transform(sum)['col2'],
df.groupby('col1')['col2'].transform(sum),
df.groupby('col1').transform(nsum)['col2'],
df.groupby('col1')['col2'].transform(nsum)]
for result in results:
assert_series_equal(result, expected)

def test_with_na(self):
index = Index(np.arange(10))

Expand Down