@@ -94,5 +94,221 @@ cdef inline float64_t kth_smallest_c(float64_t* a,
9494 return a[k]
9595
9696
97+ @ cython.boundscheck (False )
98+ @ cython.wraparound (False )
99+ def group_median_float64 (ndarray[float64_t , ndim = 2 ] out,
100+ ndarray[int64_t] counts ,
101+ ndarray[float64_t , ndim = 2 ] values,
102+ ndarray[int64_t] labels ,
103+ Py_ssize_t min_count = - 1 ):
104+ """
105+ Only aggregates on axis=0
106+ """
107+ cdef:
108+ Py_ssize_t i, j, N, K, ngroups, size
109+ ndarray[int64_t] _counts
110+ ndarray data
111+ float64_t* ptr
112+
113+ assert min_count == - 1 , " 'min_count' only used in add and prod"
114+
115+ ngroups = len (counts)
116+ N, K = (< object > values).shape
117+
118+ indexer, _counts = groupsort_indexer(labels, ngroups)
119+ counts[:] = _counts[1 :]
120+
121+ data = np.empty((K, N), dtype = np.float64)
122+ ptr = < float64_t* > data.data
123+
124+ take_2d_axis1_float64_float64(values.T, indexer, out = data)
125+
126+ with nogil:
127+
128+ for i in range (K):
129+ # exclude NA group
130+ ptr += _counts[0 ]
131+ for j in range (ngroups):
132+ size = _counts[j + 1 ]
133+ out[j, i] = median_linear(ptr, size)
134+ ptr += size
135+
136+
137+ @ cython.boundscheck (False )
138+ @ cython.wraparound (False )
139+ def group_cumprod_float64 (float64_t[:, :] out ,
140+ float64_t[:, :] values ,
141+ int64_t[:] labels ,
142+ bint is_datetimelike ):
143+ """
144+ Only transforms on axis=0
145+ """
146+ cdef:
147+ Py_ssize_t i, j, N, K, size
148+ float64_t val
149+ float64_t[:, :] accum
150+ int64_t lab
151+
152+ N, K = (< object > values).shape
153+ accum = np.ones_like(values)
154+
155+ with nogil:
156+ for i in range (N):
157+ lab = labels[i]
158+
159+ if lab < 0 :
160+ continue
161+ for j in range (K):
162+ val = values[i, j]
163+ if val == val:
164+ accum[lab, j] *= val
165+ out[i, j] = accum[lab, j]
166+
167+
168+ @ cython.boundscheck (False )
169+ @ cython.wraparound (False )
170+ def group_cumsum (numeric[:, :] out ,
171+ numeric[:, :] values ,
172+ int64_t[:] labels ,
173+ is_datetimelike ):
174+ """
175+ Only transforms on axis=0
176+ """
177+ cdef:
178+ Py_ssize_t i, j, N, K, size
179+ numeric val
180+ numeric[:, :] accum
181+ int64_t lab
182+
183+ N, K = (< object > values).shape
184+ accum = np.zeros_like(values)
185+
186+ with nogil:
187+ for i in range (N):
188+ lab = labels[i]
189+
190+ if lab < 0 :
191+ continue
192+ for j in range (K):
193+ val = values[i, j]
194+
195+ if numeric == float32_t or numeric == float64_t:
196+ if val == val:
197+ accum[lab, j] += val
198+ out[i, j] = accum[lab, j]
199+ else :
200+ accum[lab, j] += val
201+ out[i, j] = accum[lab, j]
202+
203+
204+ @ cython.boundscheck (False )
205+ @ cython.wraparound (False )
206+ def group_shift_indexer (ndarray[int64_t] out , ndarray[int64_t] labels ,
207+ int ngroups , int periods ):
208+ cdef:
209+ Py_ssize_t N, i, j, ii
210+ int offset, sign
211+ int64_t lab, idxer, idxer_slot
212+ int64_t[:] label_seen = np.zeros(ngroups, dtype = np.int64)
213+ int64_t[:, :] label_indexer
214+
215+ N, = (< object > labels).shape
216+
217+ if periods < 0 :
218+ periods = - periods
219+ offset = N - 1
220+ sign = - 1
221+ elif periods > 0 :
222+ offset = 0
223+ sign = 1
224+
225+ if periods == 0 :
226+ with nogil:
227+ for i in range (N):
228+ out[i] = i
229+ else :
230+ # array of each previous indexer seen
231+ label_indexer = np.zeros((ngroups, periods), dtype = np.int64)
232+ with nogil:
233+ for i in range (N):
234+ # # reverse iterator if shifting backwards
235+ ii = offset + sign * i
236+ lab = labels[ii]
237+
238+ # Skip null keys
239+ if lab == - 1 :
240+ out[ii] = - 1
241+ continue
242+
243+ label_seen[lab] += 1
244+
245+ idxer_slot = label_seen[lab] % periods
246+ idxer = label_indexer[lab, idxer_slot]
247+
248+ if label_seen[lab] > periods:
249+ out[ii] = idxer
250+ else :
251+ out[ii] = - 1
252+
253+ label_indexer[lab, idxer_slot] = ii
254+
255+
256+ @ cython.wraparound (False )
257+ @ cython.boundscheck (False )
258+ def group_fillna_indexer (ndarray[int64_t] out , ndarray[int64_t] labels ,
259+ ndarray[uint8_t] mask , object direction ,
260+ int64_t limit ):
261+ """ Indexes how to fill values forwards or backwards within a group
262+
263+ Parameters
264+ ----------
265+ out : array of int64_t values which this method will write its results to
266+ Missing values will be written to with a value of -1
267+ labels : array containing unique label for each group, with its ordering
268+ matching up to the corresponding record in `values`
269+ mask : array of int64_t values where a 1 indicates a missing value
270+ direction : {'ffill', 'bfill'}
271+ Direction for fill to be applied (forwards or backwards, respectively)
272+ limit : Consecutive values to fill before stopping, or -1 for no limit
273+
274+ Notes
275+ -----
276+ This method modifies the `out` parameter rather than returning an object
277+ """
278+ cdef:
279+ Py_ssize_t i, N
280+ ndarray[int64_t] sorted_labels
281+ int64_t idx, curr_fill_idx= - 1 , filled_vals= 0
282+
283+ N = len (out)
284+
285+ # Make sure all arrays are the same size
286+ assert N == len (labels) == len (mask)
287+
288+ sorted_labels = np.argsort(labels).astype(np.int64, copy = False )
289+ if direction == ' bfill' :
290+ sorted_labels = sorted_labels[::- 1 ]
291+
292+ with nogil:
293+ for i in range (N):
294+ idx = sorted_labels[i]
295+ if mask[idx] == 1 : # is missing
296+ # Stop filling once we've hit the limit
297+ if filled_vals >= limit and limit != - 1 :
298+ curr_fill_idx = - 1
299+ filled_vals += 1
300+ else : # reset items when not missing
301+ filled_vals = 0
302+ curr_fill_idx = idx
303+
304+ out[idx] = curr_fill_idx
305+
306+ # If we move to the next group, reset
307+ # the fill_idx and counter
308+ if i == N - 1 or labels[idx] != labels[sorted_labels[i+ 1 ]]:
309+ curr_fill_idx = - 1
310+ filled_vals = 0
311+
312+
97313# generated from template
98314include " groupby_helper.pxi"
0 commit comments