Skip to content
Closed
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
change the parm name to expectedNonNullableColumns
  • Loading branch information
gatorsmile committed Oct 20, 2016
commit 52cb8fb33e79b527008b68dccbaeb5bcc82f5feb
24 changes: 12 additions & 12 deletions sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -1619,7 +1619,7 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
private def verifyNullabilityInFilterExec(
df: DataFrame,
expr: String,
expectedNullableColumns: Seq[String]): Unit = {
expectedNonNullableColumns: Seq[String]): Unit = {
val dfWithFilter = df.where(s"isnotnull($expr)").selectExpr(expr)
// In the logical plan, all the output columns of input dataframe are nullable
dfWithFilter.queryExecution.optimizedPlan.collect {
Expand All @@ -1632,7 +1632,7 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
// otherwise, no change should be made.
case e: FilterExec =>
assert(e.output.forall { o =>
if (expectedNullableColumns.contains(o.name)) !o.nullable else o.nullable
if (expectedNonNullableColumns.contains(o.name)) !o.nullable else o.nullable
})
}
}
Expand All @@ -1644,14 +1644,14 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
new java.lang.Integer(2) -> new java.lang.Integer(4))).toDF()

verifyNullabilityInFilterExec(df,
expr = "Rand()", expectedNullableColumns = Seq.empty[String])
expr = "Rand()", expectedNonNullableColumns = Seq.empty[String])
verifyNullabilityInFilterExec(df,
expr = "coalesce(_1, _2)", expectedNullableColumns = Seq.empty[String])
expr = "coalesce(_1, _2)", expectedNonNullableColumns = Seq.empty[String])
verifyNullabilityInFilterExec(df,
expr = "coalesce(_1, 0) + Rand()", expectedNullableColumns = Seq.empty[String])
expr = "coalesce(_1, 0) + Rand()", expectedNonNullableColumns = Seq.empty[String])
verifyNullabilityInFilterExec(df,
expr = "cast(coalesce(cast(coalesce(_1, _2) as double), 0.0) as int)",
expectedNullableColumns = Seq.empty[String])
expectedNonNullableColumns = Seq.empty[String])
}

test("SPARK-17957: set nullability to false in FilterExec output") {
Expand All @@ -1661,17 +1661,17 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
new java.lang.Integer(2) -> new java.lang.Integer(4))).toDF()

verifyNullabilityInFilterExec(df,
expr = "_1 + _2 * 3", expectedNullableColumns = Seq("_1", "_2"))
expr = "_1 + _2 * 3", expectedNonNullableColumns = Seq("_1", "_2"))
verifyNullabilityInFilterExec(df,
expr = "_1 + _2", expectedNullableColumns = Seq("_1", "_2"))
expr = "_1 + _2", expectedNonNullableColumns = Seq("_1", "_2"))
verifyNullabilityInFilterExec(df,
expr = "_1", expectedNullableColumns = Seq("_1"))
expr = "_1", expectedNonNullableColumns = Seq("_1"))
verifyNullabilityInFilterExec(df,
expr = "_2 + Rand()", expectedNullableColumns = Seq("_2"))
expr = "_2 + Rand()", expectedNonNullableColumns = Seq("_2"))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should not work in current approach. It works now because we infer redundant IsNotNull constraints. E.g., if Filter has a constraint IsNotNull(_2 + Rand()), we will infer another IsNotNull(_2) from it. Your approach is working on IsNotNull(_2) to decide _2 is non-nullable column, not IsNotNull(_2 + Rand()).

I submitted another PR #15653 for redundant IsNotNull constraints. But I am not sure if we want to fix it since it doesn't affect correctness. I left that to @cloud-fan or @hvanhovell to decide it.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I already explained why my current solution works in my previous statement. Personally, I like simple code, which is easy to understand and maintain, especially when it can cover all the cases. Result correctness and code maintainability are alwasy more important.

If constructIsNotNullConstraints is changed by somebody else (i.e., it does not provide the expected IsNotNull constraints), the test cases added by this PR will fail. Then, we can modify the codes.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, so I said I will left that to @cloud-fan or others to decide...

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree with the simplicity argument but can you please add a comment here explaining why this particular case is working due to the null inference rule?

verifyNullabilityInFilterExec(df,
expr = "_2 * 3 + coalesce(_1, 0)", expectedNullableColumns = Seq("_2"))
expr = "_2 * 3 + coalesce(_1, 0)", expectedNonNullableColumns = Seq("_2"))
verifyNullabilityInFilterExec(df,
expr = "cast((_1 + _2) as boolean)", expectedNullableColumns = Seq("_1", "_2"))
expr = "cast((_1 + _2) as boolean)", expectedNonNullableColumns = Seq("_1", "_2"))
}

test("SPARK-17957: outer join + na.fill") {
Expand Down