-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-3891][SQL] Add array support to percentile, percentile_approx and constant inspectors support #2802
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SPARK-3891][SQL] Add array support to percentile, percentile_approx and constant inspectors support #2802
Changes from 1 commit
7f94aff
cb7c61e
47f6365
f37fd69
4d39105
c46db0f
a18f917
a0182e5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
Fixed HiveUdaf wrap object issue.
- Loading branch information
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -198,12 +198,13 @@ private[hive] case class HiveGenericUdaf( | |
|
|
||
| @transient | ||
| protected lazy val objectInspector = { | ||
| resolver.getEvaluator(children.map(_.dataType.toTypeInfo).toArray) | ||
| val parameterInfo = new SimpleGenericUDAFParameterInfo(inspectors.toArray,false,false) | ||
| resolver.getEvaluator(parameterInfo) | ||
| .init(GenericUDAFEvaluator.Mode.COMPLETE, inspectors.toArray) | ||
| } | ||
|
|
||
| @transient | ||
| protected lazy val inspectors = children.map(_.dataType).map(toInspector) | ||
| protected lazy val inspectors = children.map(toInspector) | ||
|
|
||
| def dataType: DataType = inspectorToDataType(objectInspector) | ||
|
|
||
|
|
@@ -233,7 +234,7 @@ private[hive] case class HiveUdaf( | |
| } | ||
|
|
||
| @transient | ||
| protected lazy val inspectors = children.map(_.dataType).map(toInspector) | ||
| protected lazy val inspectors = children.map(ex => toInspector(ex.dataType)) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
|
|
||
| def dataType: DataType = inspectorToDataType(objectInspector) | ||
|
|
||
|
|
@@ -266,7 +267,7 @@ private[hive] case class HiveGenericUdtf( | |
| protected lazy val function: GenericUDTF = createFunction() | ||
|
|
||
| @transient | ||
| protected lazy val inputInspectors = children.map(_.dataType).map(toInspector) | ||
| protected lazy val inputInspectors = children.map( ex => toInspector(ex.dataType)) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: remove the space before
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
|
|
||
| @transient | ||
| protected lazy val outputInspector = function.initialize(inputInspectors.toArray) | ||
|
|
@@ -341,9 +342,15 @@ private[hive] case class HiveUdafFunction( | |
| createFunction[AbstractGenericUDAFResolver]() | ||
| } | ||
|
|
||
| private val inspectors = exprs.map(_.dataType).map(toInspector).toArray | ||
|
|
||
| private val function = resolver.getEvaluator(exprs.map(_.dataType.toTypeInfo).toArray) | ||
|
|
||
| private val inspectors = | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we have to distinguish the
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. previously flag added to not disturb UDAFBridge path. Fixed and tested the same. |
||
| if(isUDAFBridgeRequired) exprs.map(ex => toInspector(ex.dataType)).toArray | ||
| else exprs.map(toInspector).toArray | ||
|
|
||
| private val function = { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Seems the GenericUDAFEvaluator getEvaluator(GenericUDAFParameterInfo)` is deprecated. We'd better keep the previous implementation.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually, It is other way round. GenericUDAFResolver and its method GenericUDAFEvaluator getEvaluator(TypeInfo[] info) is deprecated and replaced by GenericUDAFResolver2. AbstractGenericUDAFResolver is for migration. |
||
| val parameterInfo = new SimpleGenericUDAFParameterInfo(inspectors,false,false) | ||
| resolver.getEvaluator(parameterInfo) | ||
| } | ||
|
|
||
| private val returnInspector = function.init(GenericUDAFEvaluator.Mode.COMPLETE, inspectors) | ||
|
|
||
|
|
@@ -356,8 +363,11 @@ private[hive] case class HiveUdafFunction( | |
| @transient | ||
| val inputProjection = new InterpretedProjection(exprs) | ||
|
|
||
| @transient | ||
| protected lazy val cached = new Array[AnyRef](exprs.length) | ||
|
|
||
| def update(input: Row): Unit = { | ||
| val inputs = inputProjection(input).asInstanceOf[Seq[AnyRef]].toArray | ||
| function.iterate(buffer, inputs) | ||
| function.iterate(buffer, wrap(inputs,inspectors,cached)) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Spaces after |
||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -87,8 +87,19 @@ class HiveUdfSuite extends QueryTest { | |
| test("SPARK-2693 udaf aggregates test") { | ||
| checkAnswer(sql("SELECT percentile(key,1) FROM src LIMIT 1"), | ||
| sql("SELECT max(key) FROM src").collect().toSeq) | ||
|
|
||
| checkAnswer(sql("SELECT percentile(key,array(1,1)) FROM src LIMIT 1"), | ||
| sql("SELECT array(max(key),max(key)) FROM src").collect().toSeq) | ||
| } | ||
|
|
||
| test("Generic UDAF aggregates") { | ||
| checkAnswer(sql("SELECT ceiling(percentile_approx(key,0.99999)) FROM src LIMIT 1"), | ||
| sql("SELECT max(key) FROM src LIMIT 1").collect().toSeq) | ||
|
|
||
| checkAnswer(sql("SELECT percentile_approx(100.0, array(0.9,0.9)) FROM src LIMIT 1"), | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Space after |
||
| sql("SELECT array(100,100) FROM src LIMIT 1").collect().toSeq) | ||
| } | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A |
||
|
|
||
| test("UDFIntegerToString") { | ||
| val testData = TestHive.sparkContext.parallelize( | ||
| IntegerCaseClass(1) :: IntegerCaseClass(2) :: Nil) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We'd better keep the previous implementation, see https://github.com/apache/hive/blob/b8250ac2f30539f6b23ce80a20a9e338d3d31458/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/AbstractGenericUDAFResolver.java
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Actually, It is other way round. GenericUDAFResolver and its method GenericUDAFEvaluator getEvaluator(TypeInfo[] info) is deprecated and replaced by GenericUDAFResolver2. AbstractGenericUDAFResolver is for migration.
UDAF function like percentile_approx no longer supports the deprecated interface. So i have changed this to use AbstractGenericUDAFResolver(GenericUDAFParameterInfo).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
OK, got it. thanks.
Nit: spaces after
,.