Skip to content
Closed
Prev Previous commit
Next Next commit
review comments
  • Loading branch information
codeatri committed Aug 15, 2018
commit e5d9b051b027cf86fbcd82701f54e50f1aeac7f6
Original file line number Diff line number Diff line change
Expand Up @@ -523,14 +523,14 @@ case class TransformKeys(
MapType(function.dataType, map.valueType, map.valueContainsNull)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can use valueType and valueContainsNull from the following val?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about this?

}

@transient val MapType(keyType, valueType, valueContainsNull) = argument.dataType
@transient lazy val MapType(keyType, valueType, valueContainsNull) = argument.dataType

override def bind(f: (Expression, Seq[(DataType, Boolean)]) => LambdaFunction): TransformKeys = {
copy(function = f(function, (keyType, false) :: (valueType, valueContainsNull) :: Nil))
}

@transient lazy val (keyVar, valueVar) = {
val LambdaFunction(
@transient lazy val LambdaFunction(
_, (keyVar: NamedLambdaVariable) :: (valueVar: NamedLambdaVariable) :: Nil, _) = function
(keyVar, valueVar)
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: how about:

@transient lazy val LambdaFunction(_,
  (keyVar: NamedLambdaVariable) :: (valueVar: NamedLambdaVariable) :: Nil, _) = function

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, I meant we don't need to surround by:

@transient lazy val (keyVar, valueVar) = {
  ...
  (keyVar, valueVar)
}

just

@transient lazy val LambdaFunction(_,
  (keyVar: NamedLambdaVariable) :: (valueVar: NamedLambdaVariable) :: Nil, _) = function

should work.

Expand All @@ -544,7 +544,7 @@ case class TransformKeys(
keyVar.value.set(map.keyArray().get(i, keyVar.dataType))
valueVar.value.set(map.valueArray().get(i, valueVar.dataType))
val result = f.eval(inputRow)
if (result == null) {
if (result == null) {
throw new RuntimeException("Cannot use null as map key!")
}
resultKeys.update(i, result)
Expand All @@ -554,7 +554,7 @@ case class TransformKeys(
}

override def prettyName: String = "transform_keys"
}
}

/**
* Merges two given maps into a single map by applying function to the pair of values with
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,7 @@ class HigherOrderFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper
MapType(StringType, StringType, valueContainsNull = true))
val as2 = Literal.create(null,
MapType(StringType, StringType, valueContainsNull = false))
val asn = Literal.create(Map.empty[StringType, StringType],
val as3 = Literal.create(Map.empty[StringType, StringType],
MapType(StringType, StringType, valueContainsNull = true))

val concatValue: (Expression, Expression) => Expression = (k, v) => Concat(Seq(k, v))
Expand All @@ -340,16 +340,16 @@ class HigherOrderFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper
checkEvaluation(
transformKeys(transformKeys(as0, concatValue), concatValue),
Map("axyxy" -> "xy", "bbyzyz" -> "yz", "ccczxzx" -> "zx"))
checkEvaluation(transformKeys(asn, concatValue), Map.empty[String, String])
checkEvaluation(transformKeys(as3, concatValue), Map.empty[String, String])
checkEvaluation(
transformKeys(transformKeys(asn, concatValue), convertKeyToKeyLength),
transformKeys(transformKeys(as3, concatValue), convertKeyToKeyLength),
Map.empty[Int, String])
checkEvaluation(transformKeys(as0, convertKeyToKeyLength),
Map(2 -> "xy", 3 -> "yz", 4 -> "zx"))
checkEvaluation(transformKeys(as1, convertKeyToKeyLength),
Map(2 -> "xy", 3 -> "yz", 4 -> null))
checkEvaluation(transformKeys(as2, convertKeyToKeyLength), null)
checkEvaluation(transformKeys(asn, convertKeyToKeyLength), Map.empty[Int, String])
checkEvaluation(transformKeys(as3, convertKeyToKeyLength), Map.empty[Int, String])

val ax0 = Literal.create(
Map(1 -> "x", 2 -> "y", 3 -> "z"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,8 @@ select exists(ys, y -> y > 30) as v from nested;
select exists(cast(null as array<int>), y -> y > 30) as v;

create or replace temporary view nested as values
(1, map(1,1,2,2,3,3)),
(2, map(4,4,5,5,6,6))
(1, map(1, 1, 2, 2, 3, 3)),
(2, map(4, 4, 5, 5, 6, 6))
as t(x, ys);

-- Identity Transform Keys in a map
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -167,8 +167,8 @@ NULL

-- !query 17
create or replace temporary view nested as values
(1, map(1,1,2,2,3,3)),
(2, map(4,4,5,5,6,6))
(1, map(1, 1, 2, 2, 3, 3)),
(2, map(4, 4, 5, 5, 6, 6))
as t(x, ys)
-- !query 17 schema
struct<>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2302,13 +2302,13 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSQLContext {
assert(ex5.getMessage.contains("function map_zip_with does not support ordering on type map"))
}

test("transform keys function - test various primitive data types combinations") {
test("transform keys function - primitive data types") {
val dfExample1 = Seq(
Map[Int, Int](1 -> 1, 9 -> 9, 8 -> 8, 7 -> 7)
).toDF("i")

val dfExample2 = Seq(
Map[Int, Double](1 -> 1.0E0, 2 -> 1.4E0, 3 -> 1.7E0)
Map[Int, Double](1 -> 1.0, 2 -> 1.40, 3 -> 1.70)
).toDF("j")

val dfExample3 = Seq(
Expand Down Expand Up @@ -2357,34 +2357,37 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSQLContext {
}

test("transform keys function - Invalid lambda functions and exceptions") {

val dfExample1 = Seq(
Map[Int, Int](1 -> 1, 9 -> 9, 8 -> 8, 7 -> 7)
Map[String, String]("a" -> null)
).toDF("i")

val dfExample2 = Seq(
Map[String, String]("a" -> "b")
Seq(1, 2, 3, 4)
).toDF("j")

val dfExample3 = Seq(
Map[String, String]("a" -> null)
).toDF("x")

def testInvalidLambdaFunctions(): Unit = {
val ex1 = intercept[AnalysisException] {
dfExample1.selectExpr("transform_keys(i, k -> k )")
dfExample1.selectExpr("transform_keys(i, k -> k)")
}
assert(ex1.getMessage.contains("The number of lambda function arguments '1' does not match"))

val ex2 = intercept[AnalysisException] {
dfExample2.selectExpr("transform_keys(j, (k, v, x) -> k + 1)")
dfExample1.selectExpr("transform_keys(i, (k, v, x) -> k + 1)")
}
assert(ex2.getMessage.contains(
"The number of lambda function arguments '3' does not match"))
"The number of lambda function arguments '3' does not match"))

val ex3 = intercept[RuntimeException] {
dfExample3.selectExpr("transform_keys(x, (k, v) -> v)").show()
dfExample1.selectExpr("transform_keys(i, (k, v) -> v)").show()
}
assert(ex3.getMessage.contains("Cannot use null as map key!"))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems like we can do those tests only with dfExample3?


val ex4 = intercept[AnalysisException] {
dfExample2.selectExpr("transform_keys(j, (k, v) -> k + 1)")
}
assert(ex4.getMessage.contains(
"data type mismatch: argument 1 requires map type"))
}

testInvalidLambdaFunctions()
Expand Down