Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
03c3bd9
Refactors Parquet read path to implement backwards-compatibility rules
liancheng Jul 5, 2015
0525346
Removes old Parquet record converters
liancheng Jul 5, 2015
a74fb2c
More comments
liancheng Jul 5, 2015
bcac49f
Removes the 16-byte restriction of decimals
liancheng Jul 5, 2015
6437d4b
Assembles requested schema from Parquet file schema
liancheng Jul 5, 2015
1781dff
Adds test case for SPARK-8811
liancheng Jul 5, 2015
7fb21f1
Reverts an unnecessary debugging change
liancheng Jul 5, 2015
38fe1e7
Adds explicit return type
liancheng Jul 6, 2015
802cbd7
Fixes bugs related to schema merging and empty requested columns
liancheng Jul 6, 2015
884d3e6
Fixes styling issue and reverts unnecessary changes
liancheng Jul 6, 2015
0cc1b37
Fixes MiMa checks
liancheng Jul 6, 2015
a099d3e
More comments
liancheng Jul 6, 2015
06cfe9d
Adds comments about TimestampType handling
liancheng Jul 6, 2015
13b9121
Adds ParquetAvroCompatibilitySuite
liancheng Jul 7, 2015
440f7b3
Adds generated files to .rat-excludes
liancheng Jul 7, 2015
1d390aa
Adds parquet-thrift compatibility test
liancheng Jul 7, 2015
f2208cd
Adds README.md for Thrift/Avro code generation
liancheng Jul 7, 2015
a8f13bb
Using Parquet writer API to do compatibility tests
liancheng Jul 7, 2015
3d7ab36
Fixes .rat-excludes
liancheng Jul 7, 2015
7946ee1
Fixes Scala styling issues
liancheng Jul 7, 2015
926af87
Simplifies Parquet compatibility test suites
liancheng Jul 8, 2015
598c3e8
Adds extra Maven repo for hadoop-lzo, which is a transitive dependenc…
liancheng Jul 8, 2015
b8c1295
Excludes the whole parquet package from MiMa
liancheng Jul 8, 2015
c6fbc06
Removes WIP file committed by mistake
liancheng Jul 8, 2015
360fe18
Adds ParquetHiveCompatibilitySuite
liancheng Jul 8, 2015
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Using Parquet writer API to do compatibility tests
  • Loading branch information
liancheng committed Jul 8, 2015
commit a8f13bba3d8c6a73658e54cfd3dc95c6bb159c49
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,9 @@ import java.util.{List => JList, Map => JMap}

import scala.collection.JavaConversions._

import org.apache.hadoop.mapreduce.Job
import org.apache.parquet.avro.{AvroParquetOutputFormat, AvroWriteSupport}
import org.apache.parquet.hadoop.ParquetOutputFormat
import org.apache.hadoop.fs.Path
import org.apache.parquet.avro.AvroParquetWriter

import org.apache.spark.rdd.RDD._
import org.apache.spark.sql.parquet.test.avro.{Nested, ParquetAvroCompat}
import org.apache.spark.sql.test.TestSQLContext
import org.apache.spark.sql.{Row, SQLContext}
Expand Down Expand Up @@ -88,21 +86,13 @@ class ParquetAvroCompatibilitySuite extends ParquetCompatibilityTest {
override protected def beforeAll(): Unit = {
super.beforeAll()

val job = new Job()
ParquetOutputFormat.setWriteSupportClass(job, classOf[AvroWriteSupport])
AvroParquetOutputFormat.setSchema(job, ParquetAvroCompat.getClassSchema)

sqlContext
.sparkContext
.parallelize(0 until 10)
.map(i => (null, makeParquetAvroCompat(i)))
.coalesce(1)
.saveAsNewAPIHadoopFile(
parquetStore.getCanonicalPath,
classOf[Void],
classOf[ParquetAvroCompat],
classOf[ParquetOutputFormat[ParquetAvroCompat]],
job.getConfiguration)
val writer =
new AvroParquetWriter[ParquetAvroCompat](
new Path(parquetStore.getCanonicalPath),
ParquetAvroCompat.getClassSchema)

(0 until 10).foreach(i => writer.write(makeParquetAvroCompat(i)))
writer.close()
}

test("Read Parquet file generated by parquet-avro") {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@ import java.util.{List => JList, Map => JMap}

import scala.collection.JavaConversions._

import org.apache.hadoop.mapreduce.Job
import org.apache.parquet.hadoop.ParquetOutputFormat
import org.apache.parquet.hadoop.thrift.ParquetThriftOutputFormat
import org.apache.hadoop.fs.Path
import org.apache.parquet.hadoop.metadata.CompressionCodecName
import org.apache.parquet.thrift.ThriftParquetWriter

import org.apache.spark.sql.parquet.test.thrift.{Nested, ParquetThriftCompat, Suit}
import org.apache.spark.sql.test.TestSQLContext
Expand Down Expand Up @@ -84,21 +84,14 @@ class ParquetThriftCompatibilitySuite extends ParquetCompatibilityTest {
override protected def beforeAll(): Unit = {
super.beforeAll()

val job = new Job()
ParquetThriftOutputFormat.setThriftClass(job, classOf[ParquetThriftCompat])
ParquetOutputFormat.setWriteSupportClass(job, classOf[ParquetThriftCompat])

sqlContext
.sparkContext
.parallelize(0 until 10)
.map(i => (null, makeParquetThriftCompat(i)))
.coalesce(1)
.saveAsNewAPIHadoopFile(
parquetStore.getCanonicalPath,
classOf[Void],
val writer =
new ThriftParquetWriter[ParquetThriftCompat](
new Path(parquetStore.getCanonicalPath),
classOf[ParquetThriftCompat],
classOf[ParquetThriftOutputFormat[ParquetThriftCompat]],
job.getConfiguration)
CompressionCodecName.SNAPPY)

(0 until 10).foreach(i => writer.write(makeParquetThriftCompat(i)))
writer.close()
}

test("Read Parquet file generated by parquet-thrift") {
Expand Down