Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,12 @@

package org.apache.spark.sql.jdbc

import java.sql.Connection
import java.sql.{Connection, Date, Timestamp}
import java.util.Properties

import org.apache.spark.sql.Row
import org.apache.spark.sql.test.SharedSQLContext
import org.apache.spark.sql.types._
import org.apache.spark.tags.DockerTest

/**
Expand Down Expand Up @@ -77,4 +79,74 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with SharedSQLCo
// verify the value is the inserted correct or not
assert(rows(0).getString(0).equals("foo"))
}

test("SPARK-16625: General data types to be mapped to Oracle") {
val props = new Properties()
props.put("oracle.jdbc.mapDateToTimestamp", "false")

val schema = StructType(Seq(
StructField("boolean_type", BooleanType, true),
StructField("integer_type", IntegerType, true),
StructField("long_type", LongType, true),
StructField("float_Type", FloatType, true),
StructField("double_type", DoubleType, true),
StructField("byte_type", ByteType, true),
StructField("short_type", ShortType, true),
StructField("string_type", StringType, true),
StructField("binary_type", BinaryType, true),
StructField("date_type", DateType, true),
StructField("timestamp_type", TimestampType, true)
))

val tableName = "test_oracle_general_types"
val booleanVal = true
val integerVal = 1
val longVal = 2L
val floatVal = 3.0f
val doubleVal = 4.0
val byteVal = 2.toByte
val shortVal = 5.toShort
val stringVal = "string"
val binaryVal = Array[Byte](6, 7, 8)
val dateVal = Date.valueOf("2016-07-26")
val timestampVal = Timestamp.valueOf("2016-07-26 11:49:45")

val data = spark.sparkContext.parallelize(Seq(
Row(
booleanVal, integerVal, longVal, floatVal, doubleVal, byteVal, shortVal, stringVal,
binaryVal, dateVal, timestampVal
)))

val dfWrite = spark.createDataFrame(data, schema)
dfWrite.write.jdbc(jdbcUrl, tableName, props)

val dfRead = spark.read.jdbc(jdbcUrl, tableName, props)
val rows = dfRead.collect()
// verify the data type is inserted
val types = rows(0).toSeq.map(x => x.getClass.toString)
assert(types(0).equals("class java.lang.Boolean"))
assert(types(1).equals("class java.lang.Integer"))
assert(types(2).equals("class java.lang.Long"))
assert(types(3).equals("class java.lang.Float"))
assert(types(4).equals("class java.lang.Float"))
assert(types(5).equals("class java.lang.Integer"))
assert(types(6).equals("class java.lang.Integer"))
assert(types(7).equals("class java.lang.String"))
assert(types(8).equals("class [B"))
assert(types(9).equals("class java.sql.Date"))
assert(types(10).equals("class java.sql.Timestamp"))
// verify the value is the inserted correct or not
val values = rows(0)
assert(values.getBoolean(0).equals(booleanVal))
assert(values.getInt(1).equals(integerVal))
assert(values.getLong(2).equals(longVal))
assert(values.getFloat(3).equals(floatVal))
assert(values.getFloat(4).equals(doubleVal.toFloat))
assert(values.getInt(5).equals(byteVal.toInt))
assert(values.getInt(6).equals(shortVal.toInt))
assert(values.getString(7).equals(stringVal))
assert(values.getAs[Array[Byte]](8).mkString.equals("678"))
assert(values.getDate(9).equals(dateVal))
assert(values.getTimestamp(10).equals(timestampVal))
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -26,30 +26,38 @@ private case object OracleDialect extends JdbcDialect {

override def canHandle(url: String): Boolean = url.startsWith("jdbc:oracle")

override def getCatalystType(
sqlType: Int, typeName: String, size: Int, md: MetadataBuilder): Option[DataType] = {
override def getCatalystType(sqlType: Int, typeName: String, size: Int, md: MetadataBuilder):
Option[DataType] = sqlType match {
// Handle NUMBER fields that have no precision/scale in special way
// because JDBC ResultSetMetaData converts this to 0 precision and -127 scale
// For more details, please see
// https://github.com/apache/spark/pull/8780#issuecomment-145598968
// and
// https://github.com/apache/spark/pull/8780#issuecomment-144541760
if (sqlType == Types.NUMERIC && size == 0) {
// This is sub-optimal as we have to pick a precision/scale in advance whereas the data
// in Oracle is allowed to have different precision/scale for each value.
case Types.NUMERIC if size == 0 => Option(DecimalType(DecimalType.MAX_PRECISION, 10))
// Handle FLOAT fields in a special way because JDBC ResultSetMetaData converts
// this to NUMERIC with -127 scale
// Not sure if there is a more robust way to identify the field as a float (or other
// numeric types that do not specify a scale.
case Types.NUMERIC if md.build().getLong("scale") == -127 =>
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@wangyum I can merge this, since it looks reasonable, even though I'm not an Oracle expert. Can we streamline this a bit by only computing md.build().getLong("scale") once? The repeated matching of Types.NUMERIC could be done once in an if statement and be clearer.

Option(DecimalType(DecimalType.MAX_PRECISION, 10))
} else if (sqlType == Types.NUMERIC && md.build().getLong("scale") == -127) {
// Handle FLOAT fields in a special way because JDBC ResultSetMetaData converts
// this to NUMERIC with -127 scale
// Not sure if there is a more robust way to identify the field as a float (or other
// numeric types that do not specify a scale.
Option(DecimalType(DecimalType.MAX_PRECISION, 10))
} else {
None
}
case Types.NUMERIC if size == 1 => Option(BooleanType)
case Types.NUMERIC if size == 3 || size == 5 || size == 10 => Option(IntegerType)
case Types.NUMERIC if size == 19 && md.build().getLong("scale") == 0L => Option(LongType)
case Types.NUMERIC if size == 19 && md.build().getLong("scale") == 4L => Option(FloatType)
case _ => None
}

override def getJDBCType(dt: DataType): Option[JdbcType] = dt match {
// For more details, please see
// https://docs.oracle.com/cd/E19501-01/819-3659/gcmaz/
case BooleanType => Some(JdbcType("NUMBER(1)", java.sql.Types.BOOLEAN))
case IntegerType => Some(JdbcType("NUMBER(10)", java.sql.Types.INTEGER))
case LongType => Some(JdbcType("NUMBER(19)", java.sql.Types.BIGINT))
case FloatType => Some(JdbcType("NUMBER(19, 4)", java.sql.Types.FLOAT))
case DoubleType => Some(JdbcType("NUMBER(19, 4)", java.sql.Types.DOUBLE))
case ByteType => Some(JdbcType("NUMBER(3)", java.sql.Types.SMALLINT))
case ShortType => Some(JdbcType("NUMBER(5)", java.sql.Types.SMALLINT))
case StringType => Some(JdbcType("VARCHAR2(255)", java.sql.Types.VARCHAR))
case _ => None
}
Expand Down
21 changes: 21 additions & 0 deletions sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -739,6 +739,27 @@ class JDBCSuite extends SparkFunSuite
map(_.databaseTypeDefinition).get == "VARCHAR2(255)")
}

test("SPARK-16625: General data types to be mapped to Oracle") {

def getJdbcType(dialect: JdbcDialect, dt: DataType): String = {
dialect.getJDBCType(dt).orElse(JdbcUtils.getCommonJDBCType(dt)).
map(_.databaseTypeDefinition).get
}

val oracleDialect = JdbcDialects.get("jdbc:oracle://127.0.0.1/db")
assert(getJdbcType(oracleDialect, BooleanType) == "NUMBER(1)")
assert(getJdbcType(oracleDialect, IntegerType) == "NUMBER(10)")
assert(getJdbcType(oracleDialect, LongType) == "NUMBER(19)")
assert(getJdbcType(oracleDialect, FloatType) == "NUMBER(19, 4)")
assert(getJdbcType(oracleDialect, DoubleType) == "NUMBER(19, 4)")
assert(getJdbcType(oracleDialect, ByteType) == "NUMBER(3)")
assert(getJdbcType(oracleDialect, ShortType) == "NUMBER(5)")
assert(getJdbcType(oracleDialect, StringType) == "VARCHAR2(255)")
assert(getJdbcType(oracleDialect, BinaryType) == "BLOB")
assert(getJdbcType(oracleDialect, DateType) == "DATE")
assert(getJdbcType(oracleDialect, TimestampType) == "TIMESTAMP")
}

private def assertEmptyQuery(sqlString: String): Unit = {
assert(sql(sqlString).collect().isEmpty)
}
Expand Down