-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-25817][SQL] Dataset encoder should support combination of map and product type #22812
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -119,10 +119,9 @@ object ExpressionEncoder { | |
| } | ||
|
|
||
| val childrenDeserializers = encoders.zipWithIndex.map { case (enc, index) => | ||
| val getColumnsByOrdinals = enc.objDeserializer.collect { case c: GetColumnByOrdinal => c } | ||
| .distinct | ||
| assert(getColumnsByOrdinals.size == 1, "object deserializer should have only one " + | ||
| s"`GetColumnByOrdinal`, but there are ${getColumnsByOrdinals.size}") | ||
| val getColExprs = enc.objDeserializer.collect { case c: GetColumnByOrdinal => c }.distinct | ||
|
||
| assert(getColExprs.size == 1, "object deserializer should have only one " + | ||
| s"`GetColumnByOrdinal`, but there are ${getColExprs.size}") | ||
|
|
||
| val input = GetStructField(GetColumnByOrdinal(0, schema), index) | ||
| val newDeserializer = enc.objDeserializer.transformUp { | ||
|
|
@@ -216,7 +215,6 @@ case class ExpressionEncoder[T]( | |
| } | ||
| nullSafeSerializer match { | ||
| case If(_: IsNull, _, s: CreateNamedStruct) => s | ||
| case s: CreateNamedStruct => s | ||
|
||
| case _ => | ||
| throw new RuntimeException(s"class $clsName has unexpected serializer: $objSerializer") | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -30,14 +30,13 @@ import org.apache.spark.serializer._ | |
| import org.apache.spark.sql.Row | ||
| import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow, ScalaReflection} | ||
| import org.apache.spark.sql.catalyst.ScalaReflection.universe.TermName | ||
| import org.apache.spark.sql.catalyst.analysis.TypeCheckResult | ||
| import org.apache.spark.sql.catalyst.analysis.{TypeCheckResult, UnresolvedException} | ||
| import org.apache.spark.sql.catalyst.encoders.RowEncoder | ||
| import org.apache.spark.sql.catalyst.expressions._ | ||
| import org.apache.spark.sql.catalyst.expressions.codegen._ | ||
| import org.apache.spark.sql.catalyst.expressions.codegen.Block._ | ||
| import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, ArrayData, GenericArrayData, MapData} | ||
| import org.apache.spark.sql.types._ | ||
| import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String} | ||
| import org.apache.spark.util.Utils | ||
|
|
||
| /** | ||
|
|
@@ -963,25 +962,32 @@ case class MapObjects private( | |
| } | ||
| } | ||
|
|
||
| /** | ||
| * Similar to [[UnresolvedMapObjects]], this is a placeholder of [[CatalystToExternalMap]]. | ||
| * | ||
| * @param child An expression that when evaluated returns a map object. | ||
| * @param keyFunction The function applied on the key collection elements. | ||
| * @param valueFunction The function applied on the value collection elements. | ||
| * @param collClass The type of the resulting collection. | ||
| */ | ||
| case class UnresolvedCatalystToExternalMap( | ||
| child: Expression, | ||
| @transient keyFunction: Expression => Expression, | ||
| @transient valueFunction: Expression => Expression, | ||
| collClass: Class[_]) extends UnaryExpression with Unevaluable { | ||
|
|
||
| override lazy val resolved = false | ||
|
|
||
| override def dataType: DataType = ObjectType(collClass) | ||
| } | ||
|
|
||
| object CatalystToExternalMap { | ||
| private val curId = new java.util.concurrent.atomic.AtomicInteger() | ||
|
|
||
| /** | ||
| * Construct an instance of CatalystToExternalMap case class. | ||
| * | ||
| * @param keyFunction The function applied on the key collection elements. | ||
| * @param valueFunction The function applied on the value collection elements. | ||
| * @param inputData An expression that when evaluated returns a map object. | ||
| * @param collClass The type of the resulting collection. | ||
| */ | ||
| def apply( | ||
| keyFunction: Expression => Expression, | ||
| valueFunction: Expression => Expression, | ||
| inputData: Expression, | ||
| collClass: Class[_]): CatalystToExternalMap = { | ||
| def apply(u: UnresolvedCatalystToExternalMap): CatalystToExternalMap = { | ||
| val id = curId.getAndIncrement() | ||
| val keyLoopValue = s"CatalystToExternalMap_keyLoopValue$id" | ||
| val mapType = inputData.dataType.asInstanceOf[MapType] | ||
| val mapType = u.child.dataType.asInstanceOf[MapType] | ||
| val keyLoopVar = LambdaVariable(keyLoopValue, "", mapType.keyType, nullable = false) | ||
| val valueLoopValue = s"CatalystToExternalMap_valueLoopValue$id" | ||
| val valueLoopIsNull = if (mapType.valueContainsNull) { | ||
|
|
@@ -991,9 +997,9 @@ object CatalystToExternalMap { | |
| } | ||
| val valueLoopVar = LambdaVariable(valueLoopValue, valueLoopIsNull, mapType.valueType) | ||
| CatalystToExternalMap( | ||
| keyLoopValue, keyFunction(keyLoopVar), | ||
| valueLoopValue, valueLoopIsNull, valueFunction(valueLoopVar), | ||
| inputData, collClass) | ||
| keyLoopValue, u.keyFunction(keyLoopVar), | ||
| valueLoopValue, valueLoopIsNull, u.valueFunction(valueLoopVar), | ||
| u.child, u.collClass) | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -1090,15 +1096,9 @@ case class CatalystToExternalMap private( | |
| val tupleLoopValue = ctx.freshName("tupleLoopValue") | ||
| val builderValue = ctx.freshName("builderValue") | ||
|
|
||
| val getLength = s"${genInputData.value}.numElements()" | ||
|
||
|
|
||
| val keyArray = ctx.freshName("keyArray") | ||
| val valueArray = ctx.freshName("valueArray") | ||
| val getKeyArray = | ||
| s"${classOf[ArrayData].getName} $keyArray = ${genInputData.value}.keyArray();" | ||
| val getKeyLoopVar = CodeGenerator.getValue(keyArray, inputDataType(mapType.keyType), loopIndex) | ||
| val getValueArray = | ||
| s"${classOf[ArrayData].getName} $valueArray = ${genInputData.value}.valueArray();" | ||
| val getValueLoopVar = CodeGenerator.getValue( | ||
| valueArray, inputDataType(mapType.valueType), loopIndex) | ||
|
|
||
|
|
@@ -1147,10 +1147,10 @@ case class CatalystToExternalMap private( | |
| ${CodeGenerator.javaType(dataType)} ${ev.value} = ${CodeGenerator.defaultValue(dataType)}; | ||
|
|
||
| if (!${genInputData.isNull}) { | ||
| int $dataLength = $getLength; | ||
| int $dataLength = ${genInputData.value}.numElements(); | ||
| $constructBuilder | ||
| $getKeyArray | ||
| $getValueArray | ||
| ArrayData $keyArray = ${genInputData.value}.keyArray(); | ||
| ArrayData $valueArray = ${genInputData.value}.valueArray(); | ||
|
|
||
| int $loopIndex = 0; | ||
| while ($loopIndex < $dataLength) { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
When
u.childis resolved, is there stillUnresolvedExtractValue?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yea I think so. The
UnresolvedExtractValuemight appear inCatalystToExternalMap.keyLambdaFunctionandvalueLambdaFunctionThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ResolveReferencesmight also process that, but it is also good to have them here.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
TBH I don't quite remember why I did this for
MapObjects, so I just follow it here. Maybe we can remove it in a followup PR.