-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-32585][SQL] Support scala enumeration in ScalaReflection #29403
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -30,6 +30,7 @@ import org.apache.spark.sql.catalyst.expressions.objects._ | |
| import org.apache.spark.sql.catalyst.util.{ArrayData, MapData} | ||
| import org.apache.spark.sql.types._ | ||
| import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String} | ||
| import org.apache.spark.util.Utils | ||
|
|
||
|
|
||
| /** | ||
|
|
@@ -377,6 +378,29 @@ object ScalaReflection extends ScalaReflection { | |
| expressions.Literal.create(null, ObjectType(cls)), | ||
| newInstance | ||
| ) | ||
|
|
||
| case t if isSubtype(t, localTypeOf[Enumeration#Value]) => | ||
| // package example | ||
| // object Foo extends Enumeration { | ||
| // type Foo = Value | ||
| // val E1, E2 = Value | ||
| // } | ||
| // the fullName of tpe is example.Foo.Foo, but we need example.Foo so that | ||
| // we can call example.Foo.withName to deserialize string to enumeration. | ||
| val className = t.asInstanceOf[TypeRef].pre.typeSymbol.asClass.fullName | ||
| // this check is for spark-shell which give a default package name like '$line1.$read$$iw' | ||
| if (className.startsWith("$")) { | ||
| throw new UnsupportedOperationException( | ||
| s"Enumeration class required package name, but found $className") | ||
| } | ||
|
|
||
| val clazz = Utils.classForName(className) | ||
|
||
| StaticInvoke( | ||
| clazz, | ||
| ObjectType(getClassFromType(t)), | ||
| "withName", | ||
| createDeserializerForString(path, false) :: Nil, | ||
| returnNullable = false) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should also be
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also, if I'm correct, it would be good to include a test case for this case. What happens if you actually do operations on the null value stored as an enum? Is the nullability of the resulting schema correct. Do operations like
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we need
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, you are correct.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
We will get an exception, and the behavior is same as Java enum what we supported. |
||
| } | ||
| } | ||
|
|
||
|
|
@@ -562,6 +586,14 @@ object ScalaReflection extends ScalaReflection { | |
| } | ||
| createSerializerForObject(inputObject, fields) | ||
|
|
||
| case t if isSubtype(t, localTypeOf[Enumeration#Value]) => | ||
| createSerializerForString( | ||
| Invoke( | ||
| inputObject, | ||
| "toString", | ||
| ObjectType(classOf[java.lang.String]), | ||
| returnNullable = false)) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same as withName, toString also return a non-null value.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, I see. I agree. |
||
|
|
||
| case _ => | ||
| throw new UnsupportedOperationException( | ||
| s"No Encoder found for $tpe\n" + walkedTypePath) | ||
|
|
@@ -739,6 +771,8 @@ object ScalaReflection extends ScalaReflection { | |
| val Schema(dataType, nullable) = schemaFor(fieldType) | ||
| StructField(fieldName, dataType, nullable) | ||
| }), nullable = true) | ||
| case t if isSubtype(t, localTypeOf[Enumeration#Value]) => | ||
| Schema(StringType, nullable = false) | ||
|
||
| case other => | ||
| throw new UnsupportedOperationException(s"Schema for type $other is not supported") | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -22,6 +22,7 @@ import java.sql.{Date, Timestamp} | |
| import scala.reflect.runtime.universe.TypeTag | ||
|
|
||
| import org.apache.spark.SparkFunSuite | ||
| import org.apache.spark.sql.catalyst.FooEnum.FooEnum | ||
| import org.apache.spark.sql.catalyst.analysis.UnresolvedExtractValue | ||
| import org.apache.spark.sql.catalyst.expressions.{CreateNamedStruct, Expression, If, SpecificInternalRow, UpCast} | ||
| import org.apache.spark.sql.catalyst.expressions.objects.{AssertNotNull, NewInstance} | ||
|
|
@@ -90,6 +91,13 @@ case class FooWithAnnotation(f1: String @FooAnnotation, f2: Option[String] @FooA | |
|
|
||
| case class SpecialCharAsFieldData(`field.1`: String, `field 2`: String) | ||
|
|
||
| object FooEnum extends Enumeration { | ||
| type FooEnum = Value | ||
| val E1, E2 = Value | ||
| } | ||
|
|
||
| case class FooClassWithEnum(i: Int, e: FooEnum) | ||
|
|
||
| object TestingUDT { | ||
| @SQLUserDefinedType(udt = classOf[NestedStructUDT]) | ||
| class NestedStruct(val a: Integer, val b: Long, val c: Double) | ||
|
|
@@ -437,4 +445,12 @@ class ScalaReflectionSuite extends SparkFunSuite { | |
| StructField("f2", StringType)))) | ||
| assert(deserializerFor[FooWithAnnotation].dataType == ObjectType(classOf[FooWithAnnotation])) | ||
| } | ||
|
|
||
| test("SPARK-32585: Support scala enumeration in ScalaReflection") { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would consider putting a test case in
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks, not realized this test. |
||
| assert(schemaFor[FooClassWithEnum].dataType == StringType) | ||
| assert(serializerFor[FooClassWithEnum].dataType == StructType(Seq( | ||
| StructField("i", IntegerType, false), | ||
| StructField("e", StringType, false)))) | ||
| assert(deserializerFor[FooClassWithEnum].dataType == ObjectType(classOf[FooClassWithEnum])) | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -25,7 +25,7 @@ import org.scalatest.exceptions.TestFailedException | |
| import org.scalatest.prop.TableDrivenPropertyChecks._ | ||
|
|
||
| import org.apache.spark.{SparkException, TaskContext} | ||
| import org.apache.spark.sql.catalyst.ScroogeLikeExample | ||
| import org.apache.spark.sql.catalyst.{FooClassWithEnum, FooEnum, ScroogeLikeExample} | ||
| import org.apache.spark.sql.catalyst.encoders.{OuterScopes, RowEncoder} | ||
| import org.apache.spark.sql.catalyst.plans.{LeftAnti, LeftSemi} | ||
| import org.apache.spark.sql.catalyst.util.sideBySide | ||
|
|
@@ -1926,6 +1926,13 @@ class DatasetSuite extends QueryTest | |
| } | ||
| } | ||
| } | ||
|
|
||
| test("SPARK-32585: Support scala enumeration in ScalaReflection") { | ||
| checkDataset( | ||
| Seq(FooClassWithEnum(1, FooEnum.E1), FooClassWithEnum(2, FooEnum.E2)).toDS(), | ||
| Seq(FooClassWithEnum(1, FooEnum.E1), FooClassWithEnum(2, FooEnum.E2)): _* | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Related to the above comment, I would add a test case where the enum value is
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added the null test. |
||
| ) | ||
| } | ||
| } | ||
|
|
||
| object AssertExecutionId { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't actually understand this limitation?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's actually a jline issue that default package is something like
$line32.$read.$iw.$iw.$iw.$iw.$iw. We can't find Enum class using reflect. I choose to forbid it, but please tell me if there exists a better way to go.