Skip to content
Closed
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -71,10 +71,17 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with SharedSQLCo
""".stripMargin.replaceAll("\n", " ")).executeUpdate()
conn.commit()

conn.prepareStatement("CREATE TABLE ts_with_timezone (id NUMBER(10), t TIMESTAMP WITH TIME ZONE)")
.executeUpdate()
conn.prepareStatement("INSERT INTO ts_with_timezone VALUES (1, to_timestamp_tz('1999-12-01 11:00:00 UTC','YYYY-MM-DD HH:MI:SS TZR'))")
.executeUpdate()
conn.prepareStatement(
"CREATE TABLE ts_with_timezone (id NUMBER(10), t TIMESTAMP WITH TIME ZONE)").executeUpdate()
conn.prepareStatement(
"INSERT INTO ts_with_timezone VALUES " +
"(1, to_timestamp_tz('1999-12-01 11:00:00 UTC','YYYY-MM-DD HH:MI:SS TZR'))").executeUpdate()
conn.commit()

conn.prepareStatement(
"CREATE TABLE custom_column_types (id NUMBER, n1 number(1), n2 number(1))").executeUpdate()
conn.prepareStatement(
"INSERT INTO custom_column_types values(12312321321321312312312312123, 1, 0)").executeUpdate()
conn.commit()

sql(
Expand Down Expand Up @@ -268,4 +275,44 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with SharedSQLCo
assert(row.getDate(0).equals(dateVal))
assert(row.getTimestamp(1).equals(timestampVal))
}

test("SPARK-20427/SPARK-20921: read table use custom schema") {

// default will throw IllegalArgumentException
val e = intercept[org.apache.spark.SparkException] {
spark.read.jdbc(jdbcUrl, "custom_column_types", new Properties()).collect()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: Change the table names in all the test cases.

}
assert(e.getMessage.contains(
"requirement failed: Decimal precision 39 exceeds max precision 38"))

// custom schema can read data
val schema = StructType(Seq(
StructField("ID", DecimalType(DecimalType.MAX_PRECISION, 0)),
StructField("N1", IntegerType, true),
StructField("N2", BooleanType, true)))

val dfRead = spark.read.schema(schema).jdbc(jdbcUrl, "custom_column_types", new Properties())
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

?

val rows = dfRead.collect()

// verify the data type inserted
val types = rows(0).toSeq.map(x => x.getClass.toString)
assert(types(0).equals("class java.math.BigDecimal"))
assert(types(1).equals("class java.lang.Integer"))
assert(types(2).equals("class java.lang.Boolean"))

// verify the value inserted
val values = rows(0)
assert(values.getDecimal(0).equals(new java.math.BigDecimal("12312321321321312312312312123")))
assert(values.getInt(1).equals(1))
assert(values.getBoolean(2).equals(false))

// throw exception if custom schema field names does not match table column names
val wrongSchema = StructType(Seq(
StructField("ID", DecimalType(DecimalType.MAX_PRECISION, 0)),
StructField("N2", BooleanType, true)))

intercept[IllegalArgumentException] {
spark.read.schema(wrongSchema).jdbc(jdbcUrl, "CUSTOM_COLUMN_TYPES", new Properties()).count()
}.getMessage.contains("Field ID,N2 does not match ID,N1,N2.")
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -197,11 +197,13 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
* @since 1.4.0
*/
def jdbc(url: String, table: String, properties: Properties): DataFrame = {
assertNoSpecifiedSchema("jdbc")
// properties should override settings in extraOptions.
this.extraOptions ++= properties.asScala
// explicit url and dbtable should override all
this.extraOptions += (JDBCOptions.JDBC_URL -> url, JDBCOptions.JDBC_TABLE_NAME -> table)
if (userSpecifiedSchema.isDefined) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please also update another API in the line 273.

this.extraOptions += (JDBCOptions.JDBC_CUSTOM_SCHEMA -> userSpecifiedSchema.get.json)
}
format("jdbc").load()
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import java.sql.{Connection, DriverManager}
import java.util.{Locale, Properties}

import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
import org.apache.spark.sql.types.StructType

/**
* Options for the JDBC data source.
Expand Down Expand Up @@ -120,6 +121,11 @@ class JDBCOptions(
// TODO: to reuse the existing partition parameters for those partition specific options
val createTableOptions = parameters.getOrElse(JDBC_CREATE_TABLE_OPTIONS, "")
val createTableColumnTypes = parameters.get(JDBC_CREATE_TABLE_COLUMN_TYPES)
val customSchema = parameters.get(JDBC_CUSTOM_SCHEMA) match {
case Some(value) => Some(StructType.fromString(value))
case _ => None
}

val batchSize = {
val size = parameters.getOrElse(JDBC_BATCH_INSERT_SIZE, "1000").toInt
require(size >= 1,
Expand Down Expand Up @@ -156,6 +162,7 @@ object JDBCOptions {
val JDBC_TRUNCATE = newOption("truncate")
val JDBC_CREATE_TABLE_OPTIONS = newOption("createTableOptions")
val JDBC_CREATE_TABLE_COLUMN_TYPES = newOption("createTableColumnTypes")
val JDBC_CUSTOM_SCHEMA = newOption("customSchema")
val JDBC_BATCH_INSERT_SIZE = newOption("batchsize")
val JDBC_TXN_ISOLATION_LEVEL = newOption("isolationLevel")
}
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ object JDBCRDD extends Logging {
* @return A Catalyst schema corresponding to columns in the given order.
*/
private def pruneSchema(schema: StructType, columns: Array[String]): StructType = {
val fieldMap = Map(schema.fields.map(x => x.metadata.getString("name") -> x): _*)
val fieldMap = Map(schema.fields.map(x => x.name -> x): _*)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not a related change. Could you revert it back?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

CatalystSqlParser.parseTableSchema(columnTypes) constructed StructType without metadata, error message:

key not found: name
java.util.NoSuchElementException: key not found: name
	at scala.collection.MapLike$class.default(MapLike.scala:228)
	at scala.collection.AbstractMap.default(Map.scala:59)
	at scala.collection.MapLike$class.apply(MapLike.scala:141)
	at scala.collection.AbstractMap.apply(Map.scala:59)
	at org.apache.spark.sql.types.Metadata.get(Metadata.scala:111)
	at org.apache.spark.sql.types.Metadata.getString(Metadata.scala:60)
	at org.apache.spark.sql.execution.datasources.jdbc.JDBCRDD$$anonfun$1.apply(JDBCRDD.scala:83)
	at org.apache.spark.sql.execution.datasources.jdbc.JDBCRDD$$anonfun$1.apply(JDBCRDD.scala:83)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, I did not get your point. Could you show me an example? Is it a behavior breaking change?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

scala> org.apache.spark.sql.catalyst.parser.CatalystSqlParser.parseTableSchema("id int, name string").fields.map(x => x.metadata.getString("name") -> x)
java.util.NoSuchElementException: key not found: name
  at scala.collection.MapLike$class.default(MapLike.scala:228)
  at scala.collection.AbstractMap.default(Map.scala:59)
  at scala.collection.MapLike$class.apply(MapLike.scala:141)
  at scala.collection.AbstractMap.apply(Map.scala:59)
  at org.apache.spark.sql.types.Metadata.get(Metadata.scala:111)
  at org.apache.spark.sql.types.Metadata.getString(Metadata.scala:60)
  at $anonfun$1.apply(<console>:24)
  at $anonfun$1.apply(<console>:24)
  at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
  at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
  at scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33)
  at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:186)
  at scala.collection.TraversableLike$class.map(TraversableLike.scala:234)
  at scala.collection.mutable.ArrayOps$ofRef.map(ArrayOps.scala:186)
  ... 48 elided

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems safe to remove this line.

new StructType(columns.map(name => fieldMap(name)))
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,22 @@ private[sql] case class JDBCRelation(

override val needConversion: Boolean = false

override val schema: StructType = JDBCRDD.resolveTable(jdbcOptions)
override val schema: StructType = {
val schema = JDBCRDD.resolveTable(jdbcOptions)
val customSchema = jdbcOptions.customSchema
if (customSchema.isDefined) {
val schemaFieldNames = schema.fieldNames.mkString(",")
val customSchemaFieldNames = customSchema.get.fieldNames.mkString(",")
if(schemaFieldNames.equals(customSchemaFieldNames)) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We also need to consider the case sensitivity

customSchema.get
} else {
throw new IllegalArgumentException(
s"Field ${customSchemaFieldNames} does not match ${schemaFieldNames}.")
}
} else {
schema
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

    val tableSchema = JDBCRDD.resolveTable(jdbcOptions)
    jdbcOptions.customSchema match {
      case Some(customSchema) => JdbcUtils.parseUserSpecifiedColumnTypes(
        tableSchema, customSchema, sparkSession.sessionState.conf.resolver)
      case None => tableSchema
    }

}

// Check if JDBCRDD.compileFilter can accept input filters
override def unhandledFilters(filters: Array[Filter]): Array[Filter] = {
Expand Down
15 changes: 0 additions & 15 deletions sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -916,21 +916,6 @@ class JDBCSuite extends SparkFunSuite
assert(new JDBCOptions(CaseInsensitiveMap(parameters)).asConnectionProperties.isEmpty)
}

test("SPARK-16848: jdbc API throws an exception for user specified schema") {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also add test cases in JDBCSuite.scala?

val schema = StructType(Seq(
StructField("name", StringType, false), StructField("theid", IntegerType, false)))
val parts = Array[String]("THEID < 2", "THEID >= 2")
val e1 = intercept[AnalysisException] {
spark.read.schema(schema).jdbc(urlWithUserAndPass, "TEST.PEOPLE", parts, new Properties())
}.getMessage
assert(e1.contains("User specified schema not supported with `jdbc`"))

val e2 = intercept[AnalysisException] {
spark.read.schema(schema).jdbc(urlWithUserAndPass, "TEST.PEOPLE", new Properties())
}.getMessage
assert(e2.contains("User specified schema not supported with `jdbc`"))
}

test("SPARK-15648: teradataDialect StringType data mapping") {
val teradataDialect = JdbcDialects.get("jdbc:teradata://127.0.0.1/db")
assert(teradataDialect.getJDBCType(StringType).
Expand Down