Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
[SPARK-16278][SPARK-16279][SQL] Implement map_keys/map_values SQL fun…
…ctions
  • Loading branch information
dongjoon-hyun committed Jul 2, 2016
commit 8a639bcc0ea70fd232a718e8ef164be20821b800
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,8 @@ object FunctionRegistry {
expression[IsNotNull]("isnotnull"),
expression[Least]("least"),
expression[CreateMap]("map"),
expression[MapKeys]("map_keys"),
expression[MapValues]("map_values"),
expression[CreateNamedStruct]("named_struct"),
expression[NaNvl]("nanvl"),
expression[NullIf]("nullif"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,56 @@ case class Size(child: Expression) extends UnaryExpression with ExpectsInputType
}
}

/**
* Returns an unordered array containing the keys of the map.
*/
@ExpressionDescription(
usage = "_FUNC_(map) - Returns an unordered array containing the keys of the map.")
case class MapKeys(child: Expression)
extends UnaryExpression with ExpectsInputTypes {

override def inputTypes: Seq[AbstractDataType] = Seq(MapType)

override def dataType: DataType = ArrayType(child.dataType.asInstanceOf[MapType].keyType)

override def foldable: Boolean = child.foldable
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is the default of UnaryExpression

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep.


override def nullSafeEval(map: Any): Any = {
map.asInstanceOf[MapData].keyArray().copy()
}

override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
nullSafeCodeGen(ctx, ev, c => s"${ev.value} = ($c).keyArray().copy();")
}

override def prettyName: String = "map_keys"
}

/**
* Returns an unordered array containing the values of the map.
*/
@ExpressionDescription(
usage = "_FUNC_(map) - Returns an unordered array containing the values of the map.")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

example would be great.

case class MapValues(child: Expression)
extends UnaryExpression with ExpectsInputTypes {

override def inputTypes: Seq[AbstractDataType] = Seq(MapType)

override def dataType: DataType = ArrayType(child.dataType.asInstanceOf[MapType].valueType)

override def foldable: Boolean = child.foldable

override def nullSafeEval(map: Any): Any = {
map.asInstanceOf[MapData].valueArray().copy()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What if we don't call copy here? It looks reasonable to copy, I'm just curious :)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I actually didn't found the corresponding case. Is it safe?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A similar one is GetStructField, it can get an array column from a row without copying it. Maybe it's safe to not copy here too. cc @davies

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for confirming. That's nice.

}

override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
nullSafeCodeGen(ctx, ev, c => s"${ev.value} = ($c).valueArray().copy();")
}

override def prettyName: String = "map_values"
}

/**
* Sorts the input array in ascending / descending order according to the natural ordering of
* the array elements and returns it.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,16 @@ class CollectionFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper {
checkEvaluation(Literal.create(null, ArrayType(StringType)), null)
}

test("MapKeys/MapValues") {
val m0 = Literal.create(Map("a" -> "1", "b" -> "2"), MapType(StringType, StringType))
val m1 = Literal.create(Map[String, String](), MapType(StringType, StringType))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

also check null case please

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure.


checkEvaluation(MapKeys(m0), Seq("a", "b"))
checkEvaluation(MapValues(m0), Seq("1", "2"))
checkEvaluation(MapKeys(m1), Seq())
checkEvaluation(MapValues(m1), Seq())
}

test("Sort Array") {
val a0 = Literal.create(Seq(2, 1, 3), ArrayType(IntegerType))
val a1 = Literal.create(Seq[Integer](), ArrayType(IntegerType))
Expand Down
16 changes: 16 additions & 0 deletions sql/core/src/main/scala/org/apache/spark/sql/functions.scala
Original file line number Diff line number Diff line change
Expand Up @@ -2760,6 +2760,22 @@ object functions {
*/
def size(e: Column): Column = withExpr { Size(e.expr) }

/**
* Returns the key array of the map.
*
* @group collection_funcs
* @since 2.1.0
*/
def map_keys(e: Column): Column = withExpr { MapKeys(e.expr) }
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's not register these functions here for now, we will think about it later.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, I think that policy is about xpath- related stuff. Isn't it?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it's about all hive-fallback functions

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see. Thank you for confirming!

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was confused due to posexplode before. Now, it's clear. I'll remove soon.


/**
* Returns the key array of the map.
*
* @group collection_funcs
* @since 2.1.0
*/
def map_values(e: Column): Column = withExpr { MapValues(e.expr) }

/**
* Sorts the input array for the given column in ascending order,
* according to the natural ordering of the array elements.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -352,6 +352,30 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSQLContext {
)
}

test("map_keys/map_values function") {
val df = Seq(
(Map[Int, Int](1 -> 100, 2 -> 200), "x"),
(Map[Int, Int](), "y"),
(Map[Int, Int](1 -> 100, 2 -> 200, 3 -> 300), "z")
).toDF("a", "b")
checkAnswer(
df.select(map_keys($"a")),
Seq(Row(Seq(1, 2)), Row(Seq.empty), Row(Seq(1, 2, 3)))
)
checkAnswer(
df.selectExpr("map_keys(a)"),
Seq(Row(Seq(1, 2)), Row(Seq.empty), Row(Seq(1, 2, 3)))
)
checkAnswer(
df.select(map_values($"a")),
Seq(Row(Seq(100, 200)), Row(Seq.empty), Row(Seq(100, 200, 300)))
)
checkAnswer(
df.selectExpr("map_values(a)"),
Seq(Row(Seq(100, 200)), Row(Seq.empty), Row(Seq(100, 200, 300)))
)
}

test("array contains function") {
val df = Seq(
(Seq[Int](1, 2), "x"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,6 @@ private[sql] class HiveSessionCatalog(
// str_to_map, windowingtablefunction.
private val hiveFunctions = Seq(
"hash", "java_method", "histogram_numeric",
"map_keys", "map_values",
"parse_url", "percentile", "percentile_approx", "reflect", "sentences", "stack", "str_to_map",
"xpath", "xpath_double", "xpath_float", "xpath_int", "xpath_long",
"xpath_number", "xpath_short", "xpath_string",
Expand Down