Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
685fd07
use UTF8String instead of String for StringType
Mar 31, 2015
21f67c6
cleanup
Mar 31, 2015
4699c3a
use Array[Byte] in UTF8String
Mar 31, 2015
d32abd1
fix utf8 for python api
Mar 31, 2015
a85fb27
refactor
Mar 31, 2015
6b499ac
fix style
Apr 1, 2015
5f9e120
fix sql tests
Apr 1, 2015
38c303e
fix python sql tests
Apr 1, 2015
c7dd4d2
fix some catalyst tests
Apr 1, 2015
bb52e44
fix scala style
Apr 1, 2015
8b45864
fix codegen with UTF8String
Apr 1, 2015
23a766c
refactor
Apr 1, 2015
9dc32d1
fix some hive tests
Apr 2, 2015
73e4363
Merge branch 'master' of github.com:apache/spark into string
Apr 2, 2015
956b0a4
fix hive tests
Apr 2, 2015
9f4c194
convert data type for data source
Apr 2, 2015
537631c
some comment about Date
Apr 2, 2015
28d6f32
refactor
Apr 2, 2015
28f3d81
Merge branch 'master' of github.com:apache/spark into string
Apr 3, 2015
e5fa5b8
remove clone in UTF8String
Apr 3, 2015
8d17f21
fix hive compatibility tests
Apr 3, 2015
fd11364
optimize UTF8String
Apr 3, 2015
ac18ae6
address comment
Apr 3, 2015
2089d24
add hashcode check back
Apr 3, 2015
13d9d42
Merge branch 'master' of github.com:apache/spark into string
Apr 3, 2015
867bf50
fix String filter push down
Apr 4, 2015
1314a37
address comments from Yin
Apr 8, 2015
5116b43
rollback unrelated changes
Apr 8, 2015
08d897b
Merge branch 'master' of github.com:apache/spark into string
Apr 9, 2015
b04a19c
add comment for getString/setString
Apr 10, 2015
744788f
Merge branch 'master' of github.com:apache/spark into string
Apr 13, 2015
341ec2c
turn off scala style check in UTF8StringSuite
Apr 13, 2015
59025c8
address comments from @marmbrus
Apr 15, 2015
6d776a9
Merge branch 'master' of github.com:apache/spark into string
Apr 15, 2015
2772f0d
fix new test failure
Apr 15, 2015
3b7bfa8
fix schema of AddJar
Apr 15, 2015
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
fix some hive tests
  • Loading branch information
Davies Liu committed Apr 2, 2015
commit 9dc32d1b08cd6c76a7a096beba67bcbca89b0634
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ import scala.collection.JavaConversions._
* 1. The Underlying data type in catalyst and in Hive
* In catalyst:
* Primitive =>
* java.lang.String
* UTF8String
* int / scala.Int
* boolean / scala.Boolean
* float / scala.Float
Expand Down Expand Up @@ -239,7 +239,8 @@ private[hive] trait HiveInspectors {
*/
def unwrap(data: Any, oi: ObjectInspector): Any = oi match {
case coi: ConstantObjectInspector if coi.getWritableConstantValue == null => null
case poi: WritableConstantStringObjectInspector => poi.getWritableConstantValue.toString
case poi: WritableConstantStringObjectInspector =>
UTF8String(poi.getWritableConstantValue.toString)
case poi: WritableConstantHiveVarcharObjectInspector =>
poi.getWritableConstantValue.getHiveVarchar.getValue
case poi: WritableConstantHiveDecimalObjectInspector =>
Expand Down Expand Up @@ -287,7 +288,7 @@ private[hive] trait HiveInspectors {
hvoi.getPrimitiveWritableObject(data).getHiveVarchar.getValue
case hvoi: HiveVarcharObjectInspector => hvoi.getPrimitiveJavaObject(data).getValue
case x: StringObjectInspector if x.preferWritable() =>
x.getPrimitiveWritableObject(data).toString
UTF8String(x.getPrimitiveWritableObject(data).toString)
case x: IntObjectInspector if x.preferWritable() => x.get(data)
case x: BooleanObjectInspector if x.preferWritable() => x.get(data)
case x: FloatObjectInspector if x.preferWritable() => x.get(data)
Expand Down Expand Up @@ -340,7 +341,7 @@ private[hive] trait HiveInspectors {
*/
protected def wrapperFor(oi: ObjectInspector): Any => Any = oi match {
case _: JavaHiveVarcharObjectInspector =>
(o: Any) => new HiveVarchar(o.asInstanceOf[String], o.asInstanceOf[String].size)
(o: Any) => new HiveVarchar(o.asInstanceOf[UTF8String].toString, o.asInstanceOf[String].size)

case _: JavaHiveDecimalObjectInspector =>
(o: Any) => HiveShim.createDecimal(o.asInstanceOf[Decimal].toJavaBigDecimal)
Expand Down Expand Up @@ -409,7 +410,7 @@ private[hive] trait HiveInspectors {
case x: PrimitiveObjectInspector => x match {
// TODO we don't support the HiveVarcharObjectInspector yet.
case _: StringObjectInspector if x.preferWritable() => HiveShim.getStringWritable(a)
case _: StringObjectInspector => a.asInstanceOf[java.lang.String]
case _: StringObjectInspector => a.asInstanceOf[UTF8String].toString()
case _: IntObjectInspector if x.preferWritable() => HiveShim.getIntWritable(a)
case _: IntObjectInspector => a.asInstanceOf[java.lang.Integer]
case _: BooleanObjectInspector if x.preferWritable() => HiveShim.getBooleanWritable(a)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,36 +17,33 @@

package org.apache.spark.sql.hive

import java.util
import java.util.{ArrayList => JArrayList}
import java.util.Properties
import java.rmi.server.UID
import java.util.{Properties, ArrayList => JArrayList}

import scala.collection.JavaConversions._
import scala.language.implicitConversions

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
import org.apache.hadoop.io.{NullWritable, Writable}
import org.apache.hadoop.mapred.InputFormat
import org.apache.hadoop.hive.common.StatsSetupConst
import org.apache.hadoop.hive.common.`type`.{HiveDecimal}
import org.apache.hadoop.hive.common.`type`.HiveDecimal
import org.apache.hadoop.hive.conf.HiveConf
import org.apache.hadoop.hive.ql.Context
import org.apache.hadoop.hive.ql.metadata.{Table, Hive, Partition}
import org.apache.hadoop.hive.ql.metadata.{Hive, Partition, Table}
import org.apache.hadoop.hive.ql.plan.{CreateTableDesc, FileSinkDesc, TableDesc}
import org.apache.hadoop.hive.ql.processors.CommandProcessorFactory
import org.apache.hadoop.hive.serde.serdeConstants
import org.apache.hadoop.hive.serde2.typeinfo.{TypeInfo, DecimalTypeInfo, TypeInfoFactory}
import org.apache.hadoop.hive.serde2.objectinspector.primitive.{HiveDecimalObjectInspector, PrimitiveObjectInspectorFactory}
import org.apache.hadoop.hive.serde2.objectinspector.{ObjectInspectorConverters, PrimitiveObjectInspector, ObjectInspector}
import org.apache.hadoop.hive.serde2.{Deserializer, ColumnProjectionUtils}
import org.apache.hadoop.hive.serde2.{io => hiveIo}
import org.apache.hadoop.hive.serde2.avro.AvroGenericRecordWritable
import org.apache.hadoop.hive.serde2.objectinspector.primitive.{HiveDecimalObjectInspector, PrimitiveObjectInspectorFactory}
import org.apache.hadoop.hive.serde2.objectinspector.{ObjectInspector, ObjectInspectorConverters, PrimitiveObjectInspector}
import org.apache.hadoop.hive.serde2.typeinfo.{DecimalTypeInfo, TypeInfo, TypeInfoFactory}
import org.apache.hadoop.hive.serde2.{ColumnProjectionUtils, Deserializer, io => hiveIo}
import org.apache.hadoop.io.{NullWritable, Writable}
import org.apache.hadoop.mapred.InputFormat
import org.apache.hadoop.{io => hadoopIo}

import org.apache.spark.Logging
import org.apache.spark.sql.types.{Decimal, DecimalType}
import org.apache.spark.sql.types.{Decimal, DecimalType, UTF8String}


/**
Expand All @@ -63,11 +60,12 @@ private[hive] case class HiveFunctionWrapper(var functionClassName: String)
// for Serialization
def this() = this(null)

import java.io.{OutputStream, InputStream}
import com.esotericsoftware.kryo.Kryo
import org.apache.spark.util.Utils._
import org.apache.hadoop.hive.ql.exec.Utilities
import org.apache.hadoop.hive.ql.exec.UDF
import java.io.{InputStream, OutputStream}

import com.esotericsoftware.kryo.Kryo
import org.apache.hadoop.hive.ql.exec.{UDF, Utilities}

import org.apache.spark.util.Utils._

@transient
private val methodDeSerialize = {
Expand Down Expand Up @@ -224,7 +222,7 @@ private[hive] object HiveShim {
TypeInfoFactory.voidTypeInfo, null)

def getStringWritable(value: Any): hadoopIo.Text =
if (value == null) null else new hadoopIo.Text(value.asInstanceOf[String])
if (value == null) null else new hadoopIo.Text(value.asInstanceOf[UTF8String].toString)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since you are changing Shim13, do we need to update Shim12 as well?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch, done


def getIntWritable(value: Any): hadoopIo.IntWritable =
if (value == null) null else new hadoopIo.IntWritable(value.asInstanceOf[Int])
Expand Down