Skip to content

Commit 25826c7

Browse files
araysrowen
authored andcommitted
[SPARK-21330][SQL] Bad partitioning does not allow to read a JDBC table with extreme values on the partition column
## What changes were proposed in this pull request? An overflow of the difference of bounds on the partitioning column leads to no data being read. This patch checks for this overflow. ## How was this patch tested? New unit test. Author: Andrew Ray <[email protected]> Closes #18800 from aray/SPARK-21330.
1 parent e3967dc commit 25826c7

File tree

2 files changed

+17
-1
lines changed

2 files changed

+17
-1
lines changed

sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,8 @@ private[sql] object JDBCRelation extends Logging {
6464
s"bound. Lower bound: $lowerBound; Upper bound: $upperBound")
6565

6666
val numPartitions =
67-
if ((upperBound - lowerBound) >= partitioning.numPartitions) {
67+
if ((upperBound - lowerBound) >= partitioning.numPartitions || /* check for overflow */
68+
(upperBound - lowerBound) < 0) {
6869
partitioning.numPartitions
6970
} else {
7071
logWarning("The number of partitions is reduced because the specified number of " +

sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,15 @@ class JDBCSuite extends SparkFunSuite
9696
| partitionColumn 'THEID', lowerBound '1', upperBound '4', numPartitions '3')
9797
""".stripMargin.replaceAll("\n", " "))
9898

99+
sql(
100+
s"""
101+
|CREATE OR REPLACE TEMPORARY VIEW partsoverflow
102+
|USING org.apache.spark.sql.jdbc
103+
|OPTIONS (url '$url', dbtable 'TEST.PEOPLE', user 'testUser', password 'testPass',
104+
| partitionColumn 'THEID', lowerBound '-9223372036854775808',
105+
| upperBound '9223372036854775807', numPartitions '3')
106+
""".stripMargin.replaceAll("\n", " "))
107+
99108
conn.prepareStatement("create table test.inttypes (a INT, b BOOLEAN, c TINYINT, "
100109
+ "d SMALLINT, e BIGINT)").executeUpdate()
101110
conn.prepareStatement("insert into test.inttypes values (1, false, 3, 4, 1234567890123)"
@@ -376,6 +385,12 @@ class JDBCSuite extends SparkFunSuite
376385
assert(ids(2) === 3)
377386
}
378387

388+
test("overflow of partition bound difference does not give negative stride") {
389+
val df = sql("SELECT * FROM partsoverflow")
390+
checkNumPartitions(df, expectedNumPartitions = 3)
391+
assert(df.collect().length == 3)
392+
}
393+
379394
test("Register JDBC query with renamed fields") {
380395
// Regression test for bug SPARK-7345
381396
sql(

0 commit comments

Comments
 (0)