Skip to content

Commit c360f11

Browse files
committed
rebase upstream
2 parents 24195fe + 1d04dc9 commit c360f11

File tree

3 files changed

+106
-60
lines changed

3 files changed

+106
-60
lines changed

python/pyspark/sql/functions.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,21 @@ def _():
122122
'bitwiseNOT': 'Computes bitwise not.',
123123
}
124124

125+
_functions_1_6 = {
126+
# unary math functions
127+
"stddev": "Aggregate function: returns the unbiased sample standard deviation of" +
128+
" the expression in a group.",
129+
"stddev_samp": "Aggregate function: returns the unbiased sample standard deviation of" +
130+
" the expression in a group.",
131+
"stddev_pop": "Aggregate function: returns population standard deviation of" +
132+
" the expression in a group.",
133+
"variance": "Aggregate function: returns the population variance of the values in a group.",
134+
"var_samp": "Aggregate function: returns the unbiased variance of the values in a group.",
135+
"var_pop": "Aggregate function: returns the population variance of the values in a group.",
136+
"skewness": "Aggregate function: returns the skewness of the values in a group.",
137+
"kurtosis": "Aggregate function: returns the kurtosis of the values in a group."
138+
}
139+
125140
# math functions that take two arguments as input
126141
_binary_mathfunctions = {
127142
'atan2': 'Returns the angle theta from the conversion of rectangular coordinates (x, y) to' +
@@ -172,6 +187,8 @@ def _():
172187
globals()[_name] = since(1.4)(_create_binary_mathfunction(_name, _doc))
173188
for _name, _doc in _window_functions.items():
174189
globals()[_name] = since(1.4)(_create_window_function(_name, _doc))
190+
for _name, _doc in _functions_1_6.items():
191+
globals()[_name] = since(1.6)(_create_function(_name, _doc))
175192
del _name, _doc
176193

177194

python/pyspark/sql/group.py

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,94 @@ def sum(self, *cols):
167167
[Row(sum(age)=7, sum(height)=165)]
168168
"""
169169

170+
@df_varargs_api
171+
@since(1.6)
172+
def stddev(self, *cols):
173+
"""Compute the sample standard deviation for each numeric columns for each group.
174+
175+
:param cols: list of column names (string). Non-numeric columns are ignored.
176+
177+
>>> df3.groupBy().stddev('age', 'height').collect()
178+
[Row(STDDEV(age)=2.12..., STDDEV(height)=3.53...)]
179+
"""
180+
181+
@df_varargs_api
182+
@since(1.6)
183+
def stddev_samp(self, *cols):
184+
"""Compute the sample standard deviation for each numeric columns for each group.
185+
186+
:param cols: list of column names (string). Non-numeric columns are ignored.
187+
188+
>>> df3.groupBy().stddev_samp('age', 'height').collect()
189+
[Row(STDDEV_SAMP(age)=2.12..., STDDEV_SAMP(height)=3.53...)]
190+
"""
191+
192+
@df_varargs_api
193+
@since(1.6)
194+
def stddev_pop(self, *cols):
195+
"""Compute the population standard deviation for each numeric columns for each group.
196+
197+
:param cols: list of column names (string). Non-numeric columns are ignored.
198+
199+
>>> df3.groupBy().stddev_pop('age', 'height').collect()
200+
[Row(STDDEV_POP(age)=1.5, STDDEV_POP(height)=2.5)]
201+
"""
202+
203+
@df_varargs_api
204+
@since(1.6)
205+
def variance(self, *cols):
206+
"""Compute the sample variance for each numeric columns for each group.
207+
208+
:param cols: list of column names (string). Non-numeric columns are ignored.
209+
210+
>>> df3.groupBy().variance('age', 'height').collect()
211+
[Row(VARIANCE(age)=2.25, VARIANCE(height)=6.25)]
212+
"""
213+
214+
@df_varargs_api
215+
@since(1.6)
216+
def var_pop(self, *cols):
217+
"""Compute the sample variance for each numeric columns for each group.
218+
219+
:param cols: list of column names (string). Non-numeric columns are ignored.
220+
221+
>>> df3.groupBy().var_pop('age', 'height').collect()
222+
[Row(VAR_POP(age)=2.25, VAR_POP(height)=6.25)]
223+
"""
224+
225+
@df_varargs_api
226+
@since(1.6)
227+
def var_samp(self, *cols):
228+
"""Compute the sample variance for each numeric columns for each group.
229+
230+
:param cols: list of column names (string). Non-numeric columns are ignored.
231+
232+
>>> df3.groupBy().var_samp('age', 'height').collect()
233+
[Row(VAR_SAMP(age)=4.5, VAR_SAMP(height)=12.5)]
234+
"""
235+
236+
@df_varargs_api
237+
@since(1.6)
238+
def skewness(self, *cols):
239+
"""Compute the skewness for each numeric columns for each group.
240+
241+
:param cols: list of column names (string). Non-numeric columns are ignored.
242+
243+
>>> df3.groupBy().skewness('age', 'height').collect()
244+
[Row(SKEWNESS(age)=0.0, SKEWNESS(height)=0.0)]
245+
"""
246+
247+
@df_varargs_api
248+
@since(1.6)
249+
def kurtosis(self, *cols):
250+
"""Compute the kurtosis for each numeric columns for each group.
251+
252+
:param cols: list of column names (string). Non-numeric columns are ignored.
253+
254+
>>> df3.groupBy().kurtosis('age', 'height').collect()
255+
[Row(KURTOSIS(age)=-2.0, KURTOSIS(height)=-2.0)]
256+
"""
257+
170258

171259
def _test():
172260
import doctest

sql/core/src/main/scala/org/apache/spark/sql/functions.scala

Lines changed: 1 addition & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -254,14 +254,6 @@ object functions {
254254
*/
255255
def kurtosis(e: Column): Column = Kurtosis(e.expr)
256256

257-
/**
258-
* Aggregate function: returns the kurtosis of the values in a group.
259-
*
260-
* @group agg_funcs
261-
* @since 1.6.0
262-
*/
263-
def kurtosis(columnName: String): Column = kurtosis(Column(columnName))
264-
265257
/**
266258
* Aggregate function: returns the last value in a group.
267259
*
@@ -354,32 +346,14 @@ object functions {
354346
def stddev(e: Column): Column = Stddev(e.expr)
355347

356348
/**
357-
* Aggregate function: returns the population standard deviation of
358-
* the expression in a group.
359-
*
360-
* @group agg_funcs
361-
* @since 1.6.0
362-
*/
363-
def stddev(columnName: String): Column = stddev(Column(columnName))
364-
365-
/**
366-
* Aggregate function: returns the unbiased sample standard deviation of
349+
* Aggregate function: returns the sample standard deviation of
367350
* the expression in a group.
368351
*
369352
* @group agg_funcs
370353
* @since 1.6.0
371354
*/
372355
def stddev_samp(e: Column): Column = StddevSamp(e.expr)
373356

374-
/**
375-
* Aggregate function: returns the unbiased sample standard deviation of
376-
* the expression in a group.
377-
*
378-
* @group agg_funcs
379-
* @since 1.6.0
380-
*/
381-
def stddev_samp(columnName: String): Column = stddev_samp(Column(columnName))
382-
383357
/**
384358
* Aggregate function: returns the population standard deviation of
385359
* the expression in a group.
@@ -389,15 +363,6 @@ object functions {
389363
*/
390364
def stddev_pop(e: Column): Column = StddevPop(e.expr)
391365

392-
/**
393-
* Aggregate function: returns the population standard deviation of
394-
* the expression in a group.
395-
*
396-
* @group agg_funcs
397-
* @since 1.6.0
398-
*/
399-
def stddev_pop(columnName: String): Column = stddev_pop(Column(columnName))
400-
401366
/**
402367
* Aggregate function: returns the sum of all values in the expression.
403368
*
@@ -438,14 +403,6 @@ object functions {
438403
*/
439404
def variance(e: Column): Column = Variance(e.expr)
440405

441-
/**
442-
* Aggregate function: returns the population variance of the values in a group.
443-
*
444-
* @group agg_funcs
445-
* @since 1.6.0
446-
*/
447-
def variance(columnName: String): Column = variance(Column(columnName))
448-
449406
/**
450407
* Aggregate function: returns the unbiased variance of the values in a group.
451408
*
@@ -454,14 +411,6 @@ object functions {
454411
*/
455412
def var_samp(e: Column): Column = VarianceSamp(e.expr)
456413

457-
/**
458-
* Aggregate function: returns the unbiased variance of the values in a group.
459-
*
460-
* @group agg_funcs
461-
* @since 1.6.0
462-
*/
463-
def var_samp(columnName: String): Column = var_samp(Column(columnName))
464-
465414
/**
466415
* Aggregate function: returns the population variance of the values in a group.
467416
*
@@ -470,14 +419,6 @@ object functions {
470419
*/
471420
def var_pop(e: Column): Column = VariancePop(e.expr)
472421

473-
/**
474-
* Aggregate function: returns the population variance of the values in a group.
475-
*
476-
* @group agg_funcs
477-
* @since 1.6.0
478-
*/
479-
def var_pop(columnName: String): Column = var_pop(Column(columnName))
480-
481422
//////////////////////////////////////////////////////////////////////////////////////////////
482423
// Window functions
483424
//////////////////////////////////////////////////////////////////////////////////////////////

0 commit comments

Comments
 (0)