Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Due to numerical errors, put udaf_corr in HiveCompatibilitySuite to b…
…lacklist and add these tests to AggregationQuerySuite.
  • Loading branch information
viirya committed Oct 30, 2015
commit 2de76b444456bc7e751fa9ccb85a6e8f0662ff76
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,11 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {

// classpath problems
"compute_stats.*",
"udf_bitmap_.*"
"udf_bitmap_.*",

// The difference between the double numbers generated by Hive and Spark
// can be ignored (e.g., 0.6633880657639323 and 0.6633880657639322)
"udaf_corr"
)

/**
Expand Down Expand Up @@ -858,7 +862,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"type_cast_1",
"type_widening",
"udaf_collect_set",
"udaf_corr",
"udaf_covar_pop",
"udaf_covar_samp",
"udaf_histogram_numeric",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -589,18 +589,66 @@ abstract class AggregationQuerySuite extends QueryTest with SQLTestUtils with Te
val corr6 = df4.groupBy().agg(corr("a", "c")).collect()(0).getDouble(0)
assert(math.abs(corr6 + 1.0) < 1e-12)

val df5 = Seq[(Integer, Integer)](
(1, null),
(null, -60)).toDF("a", "b")
// Test for udaf_corr in HiveCompatibilitySuite
// udaf_corr has been blacklisted due to numerical errors
// We test it here:
// SELECT corr(b, c) FROM covar_tab WHERE a < 1; => NULL
// SELECT corr(b, c) FROM covar_tab WHERE a < 3; => NULL
// SELECT corr(b, c) FROM covar_tab WHERE a = 3; => NULL
// SELECT a, corr(b, c) FROM covar_tab GROUP BY a ORDER BY a; =>
// 1 NULL
// 2 NULL
// 3 NULL
// 4 NULL
// 5 NULL
// 6 NULL
// SELECT corr(b, c) FROM covar_tab; => 0.6633880657639323

val covar_tab = Seq[(Integer, Integer, Integer)](
(1, null, 15),
(2, 3, null),
(3, 7, 12),
(4, 4, 14),
(5, 8, 17),
(6, 2, 11)).toDF("a", "b", "c")

covar_tab.registerTempTable("covar_tab")

val corr7 = df5.groupBy().agg(corr("a", "b")).collect()(0)
assert(corr7 == Row(null))
checkAnswer(
sqlContext.sql(
"""
|SELECT corr(b, c) FROM covar_tab WHERE a < 1
""".stripMargin),
Row(null) :: Nil)

checkAnswer(
sqlContext.sql(
"""
|SELECT corr(b, c) FROM covar_tab WHERE a < 3
""".stripMargin),
Row(null) :: Nil)

val df6 = Seq[(Integer, Integer)](
(7, 12)).toDF("a", "b")
checkAnswer(
sqlContext.sql(
"""
|SELECT corr(b, c) FROM covar_tab WHERE a = 3
""".stripMargin),
Row(null) :: Nil)

val corr8 = df6.groupBy().agg(corr("a", "b")).collect()(0)
assert(corr8 == Row(null))
checkAnswer(
sqlContext.sql(
"""
|SELECT a, corr(b, c) FROM covar_tab GROUP BY a ORDER BY a
""".stripMargin),
Row(1, null) ::
Row(2, null) ::
Row(3, null) ::
Row(4, null) ::
Row(5, null) ::
Row(6, null) :: Nil)

val corr7 = sqlContext.sql("SELECT corr(b, c) FROM covar_tab").collect()(0).getDouble(0)
assert(math.abs(corr7 - 0.6633880657639323) < 1e-12)

withSQLConf(SQLConf.USE_SQL_AGGREGATE2.key -> "false") {
val errorMessage = intercept[SparkException] {
Expand Down