-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-8580] [SPARK-10136] [SQL] Fixes Parquet support for Avro array of primitive array #8341
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Closed
liancheng
wants to merge
1
commit into
apache:master
from
liancheng:spark-10136/parquet-avro-nested-primitive-array
Closed
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Fixes SPARK-10136 and adds more tests
- Loading branch information
commit 34547d693001ce3bea3758c831f07e8ec33def81
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
142 changes: 142 additions & 0 deletions
142
...n-java/org/apache/spark/sql/execution/datasources/parquet/test/avro/AvroArrayOfArray.java
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,142 @@ | ||
| /** | ||
| * Autogenerated by Avro | ||
| * | ||
| * DO NOT EDIT DIRECTLY | ||
| */ | ||
| package org.apache.spark.sql.execution.datasources.parquet.test.avro; | ||
| @SuppressWarnings("all") | ||
| @org.apache.avro.specific.AvroGenerated | ||
| public class AvroArrayOfArray extends org.apache.avro.specific.SpecificRecordBase implements org.apache.avro.specific.SpecificRecord { | ||
| public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse("{\"type\":\"record\",\"name\":\"AvroArrayOfArray\",\"namespace\":\"org.apache.spark.sql.execution.datasources.parquet.test.avro\",\"fields\":[{\"name\":\"int_arrays_column\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"array\",\"items\":\"int\"}}}]}"); | ||
| public static org.apache.avro.Schema getClassSchema() { return SCHEMA$; } | ||
| @Deprecated public java.util.List<java.util.List<java.lang.Integer>> int_arrays_column; | ||
|
|
||
| /** | ||
| * Default constructor. Note that this does not initialize fields | ||
| * to their default values from the schema. If that is desired then | ||
| * one should use <code>newBuilder()</code>. | ||
| */ | ||
| public AvroArrayOfArray() {} | ||
|
|
||
| /** | ||
| * All-args constructor. | ||
| */ | ||
| public AvroArrayOfArray(java.util.List<java.util.List<java.lang.Integer>> int_arrays_column) { | ||
| this.int_arrays_column = int_arrays_column; | ||
| } | ||
|
|
||
| public org.apache.avro.Schema getSchema() { return SCHEMA$; } | ||
| // Used by DatumWriter. Applications should not call. | ||
| public java.lang.Object get(int field$) { | ||
| switch (field$) { | ||
| case 0: return int_arrays_column; | ||
| default: throw new org.apache.avro.AvroRuntimeException("Bad index"); | ||
| } | ||
| } | ||
| // Used by DatumReader. Applications should not call. | ||
| @SuppressWarnings(value="unchecked") | ||
| public void put(int field$, java.lang.Object value$) { | ||
| switch (field$) { | ||
| case 0: int_arrays_column = (java.util.List<java.util.List<java.lang.Integer>>)value$; break; | ||
| default: throw new org.apache.avro.AvroRuntimeException("Bad index"); | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Gets the value of the 'int_arrays_column' field. | ||
| */ | ||
| public java.util.List<java.util.List<java.lang.Integer>> getIntArraysColumn() { | ||
| return int_arrays_column; | ||
| } | ||
|
|
||
| /** | ||
| * Sets the value of the 'int_arrays_column' field. | ||
| * @param value the value to set. | ||
| */ | ||
| public void setIntArraysColumn(java.util.List<java.util.List<java.lang.Integer>> value) { | ||
| this.int_arrays_column = value; | ||
| } | ||
|
|
||
| /** Creates a new AvroArrayOfArray RecordBuilder */ | ||
| public static org.apache.spark.sql.execution.datasources.parquet.test.avro.AvroArrayOfArray.Builder newBuilder() { | ||
| return new org.apache.spark.sql.execution.datasources.parquet.test.avro.AvroArrayOfArray.Builder(); | ||
| } | ||
|
|
||
| /** Creates a new AvroArrayOfArray RecordBuilder by copying an existing Builder */ | ||
| public static org.apache.spark.sql.execution.datasources.parquet.test.avro.AvroArrayOfArray.Builder newBuilder(org.apache.spark.sql.execution.datasources.parquet.test.avro.AvroArrayOfArray.Builder other) { | ||
| return new org.apache.spark.sql.execution.datasources.parquet.test.avro.AvroArrayOfArray.Builder(other); | ||
| } | ||
|
|
||
| /** Creates a new AvroArrayOfArray RecordBuilder by copying an existing AvroArrayOfArray instance */ | ||
| public static org.apache.spark.sql.execution.datasources.parquet.test.avro.AvroArrayOfArray.Builder newBuilder(org.apache.spark.sql.execution.datasources.parquet.test.avro.AvroArrayOfArray other) { | ||
| return new org.apache.spark.sql.execution.datasources.parquet.test.avro.AvroArrayOfArray.Builder(other); | ||
| } | ||
|
|
||
| /** | ||
| * RecordBuilder for AvroArrayOfArray instances. | ||
| */ | ||
| public static class Builder extends org.apache.avro.specific.SpecificRecordBuilderBase<AvroArrayOfArray> | ||
| implements org.apache.avro.data.RecordBuilder<AvroArrayOfArray> { | ||
|
|
||
| private java.util.List<java.util.List<java.lang.Integer>> int_arrays_column; | ||
|
|
||
| /** Creates a new Builder */ | ||
| private Builder() { | ||
| super(org.apache.spark.sql.execution.datasources.parquet.test.avro.AvroArrayOfArray.SCHEMA$); | ||
| } | ||
|
|
||
| /** Creates a Builder by copying an existing Builder */ | ||
| private Builder(org.apache.spark.sql.execution.datasources.parquet.test.avro.AvroArrayOfArray.Builder other) { | ||
| super(other); | ||
| if (isValidValue(fields()[0], other.int_arrays_column)) { | ||
| this.int_arrays_column = data().deepCopy(fields()[0].schema(), other.int_arrays_column); | ||
| fieldSetFlags()[0] = true; | ||
| } | ||
| } | ||
|
|
||
| /** Creates a Builder by copying an existing AvroArrayOfArray instance */ | ||
| private Builder(org.apache.spark.sql.execution.datasources.parquet.test.avro.AvroArrayOfArray other) { | ||
| super(org.apache.spark.sql.execution.datasources.parquet.test.avro.AvroArrayOfArray.SCHEMA$); | ||
| if (isValidValue(fields()[0], other.int_arrays_column)) { | ||
| this.int_arrays_column = data().deepCopy(fields()[0].schema(), other.int_arrays_column); | ||
| fieldSetFlags()[0] = true; | ||
| } | ||
| } | ||
|
|
||
| /** Gets the value of the 'int_arrays_column' field */ | ||
| public java.util.List<java.util.List<java.lang.Integer>> getIntArraysColumn() { | ||
| return int_arrays_column; | ||
| } | ||
|
|
||
| /** Sets the value of the 'int_arrays_column' field */ | ||
| public org.apache.spark.sql.execution.datasources.parquet.test.avro.AvroArrayOfArray.Builder setIntArraysColumn(java.util.List<java.util.List<java.lang.Integer>> value) { | ||
| validate(fields()[0], value); | ||
| this.int_arrays_column = value; | ||
| fieldSetFlags()[0] = true; | ||
| return this; | ||
| } | ||
|
|
||
| /** Checks whether the 'int_arrays_column' field has been set */ | ||
| public boolean hasIntArraysColumn() { | ||
| return fieldSetFlags()[0]; | ||
| } | ||
|
|
||
| /** Clears the value of the 'int_arrays_column' field */ | ||
| public org.apache.spark.sql.execution.datasources.parquet.test.avro.AvroArrayOfArray.Builder clearIntArraysColumn() { | ||
| int_arrays_column = null; | ||
| fieldSetFlags()[0] = false; | ||
| return this; | ||
| } | ||
|
|
||
| @Override | ||
| public AvroArrayOfArray build() { | ||
| try { | ||
| AvroArrayOfArray record = new AvroArrayOfArray(); | ||
| record.int_arrays_column = fieldSetFlags()[0] ? this.int_arrays_column : (java.util.List<java.util.List<java.lang.Integer>>) defaultValue(fields()[0]); | ||
| return record; | ||
| } catch (Exception e) { | ||
| throw new org.apache.avro.AvroRuntimeException(e); | ||
| } | ||
| } | ||
| } | ||
| } | ||
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We can merge this so its included in 1.5, but why are we checking in auto generated code? Can't we build it on demand.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
When adding the first set of Parquet Avro/Thrift/Hive compatibility suites, I considered building these files on demand, but that complicates the build process. Especially, it requires developers and Jenkins to install Thrift binary executable for code generation (or is there any way to avoid this?). Besides that, it's super tricky to compile the version of the Thrift binary executable Parquet uses under Mac because Mac lacks some C++ header files (I always compile parquet-thrift tests under a Ubuntu VM). That's why I decided to include the generated Java files. In this way, only developers who update these test suites need to install these extra dependencies.