diff --git a/README.md b/README.md
index 8514a701d4..93770c511a 100644
--- a/README.md
+++ b/README.md
@@ -35,19 +35,19 @@ Parquet-MR uses Maven to build and depends on the thrift compiler (protoc is now
To build and install the thrift compiler, run:
```
-wget -nv http://archive.apache.org/dist/thrift/0.16.0/thrift-0.16.0.tar.gz
-tar xzf thrift-0.16.0.tar.gz
-cd thrift-0.16.0
+wget -nv http://archive.apache.org/dist/thrift/0.19.0/thrift-0.19.0.tar.gz
+tar xzf thrift-0.19.0.tar.gz
+cd thrift-0.19.0
chmod +x ./configure
./configure --disable-libs
sudo make install
```
-If you're on OSX and use homebrew, you can instead install Thrift 0.16.0 with `brew` and ensure that it comes first in your `PATH`.
+If you're on OSX and use homebrew, you can instead install Thrift 0.19.0 with `brew` and ensure that it comes first in your `PATH`.
```
brew install thrift
-export PATH="/usr/local/opt/thrift@0.16.0/bin:$PATH"
+export PATH="/usr/local/opt/thrift@0.19.0/bin:$PATH"
```
### Build Parquet with Maven
diff --git a/dev/ci-before_install.sh b/dev/ci-before_install.sh
index 82b7a06760..9d15311f52 100755
--- a/dev/ci-before_install.sh
+++ b/dev/ci-before_install.sh
@@ -20,7 +20,7 @@
# This script gets invoked by the CI system in a "before install" step
################################################################################
-export THRIFT_VERSION=0.16.0
+export THRIFT_VERSION=0.19.0
set -e
date
diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/Types.java b/parquet-column/src/main/java/org/apache/parquet/schema/Types.java
index 8821df2b82..5bc2f89f47 100644
--- a/parquet-column/src/main/java/org/apache/parquet/schema/Types.java
+++ b/parquet-column/src/main/java/org/apache/parquet/schema/Types.java
@@ -427,6 +427,9 @@ public THIS columnOrder(ColumnOrder columnOrder) {
@Override
protected PrimitiveType build(String name) {
+ if (length == 0 && logicalTypeAnnotation instanceof LogicalTypeAnnotation.UUIDLogicalTypeAnnotation) {
+ length = LogicalTypeAnnotation.UUIDLogicalTypeAnnotation.BYTES;
+ }
if (PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY == primitiveType) {
Preconditions.checkArgument(length > 0, "Invalid FIXED_LEN_BYTE_ARRAY length: %s", length);
}
diff --git a/parquet-format-structures/pom.xml b/parquet-format-structures/pom.xml
index 63112fd7db..c1b805e8dd 100644
--- a/parquet-format-structures/pom.xml
+++ b/parquet-format-structures/pom.xml
@@ -156,6 +156,11 @@
libthrift
${format.thrift.version}
+
+ javax.annotation
+ javax.annotation-api
+ ${javax.annotation.version}
+
diff --git a/parquet-format-structures/src/main/java/org/apache/parquet/format/InterningProtocol.java b/parquet-format-structures/src/main/java/org/apache/parquet/format/InterningProtocol.java
index 8b85dbebcb..bf4a214369 100644
--- a/parquet-format-structures/src/main/java/org/apache/parquet/format/InterningProtocol.java
+++ b/parquet-format-structures/src/main/java/org/apache/parquet/format/InterningProtocol.java
@@ -20,6 +20,7 @@
package org.apache.parquet.format;
import java.nio.ByteBuffer;
+import java.util.UUID;
import org.apache.thrift.TException;
import org.apache.thrift.protocol.TField;
import org.apache.thrift.protocol.TList;
@@ -42,187 +43,243 @@ public InterningProtocol(TProtocol delegate) {
this.delegate = delegate;
}
+ @Override
public TTransport getTransport() {
return delegate.getTransport();
}
+ @Override
public void writeMessageBegin(TMessage message) throws TException {
delegate.writeMessageBegin(message);
}
+ @Override
public void writeMessageEnd() throws TException {
delegate.writeMessageEnd();
}
+ @Override
public int hashCode() {
return delegate.hashCode();
}
+ @Override
public void writeStructBegin(TStruct struct) throws TException {
delegate.writeStructBegin(struct);
}
+ @Override
public void writeStructEnd() throws TException {
delegate.writeStructEnd();
}
+ @Override
public void writeFieldBegin(TField field) throws TException {
delegate.writeFieldBegin(field);
}
+ @Override
public void writeFieldEnd() throws TException {
delegate.writeFieldEnd();
}
+ @Override
public void writeFieldStop() throws TException {
delegate.writeFieldStop();
}
+ @Override
public void writeMapBegin(TMap map) throws TException {
delegate.writeMapBegin(map);
}
+ @Override
public void writeMapEnd() throws TException {
delegate.writeMapEnd();
}
+ @Override
public void writeListBegin(TList list) throws TException {
delegate.writeListBegin(list);
}
+ @Override
public void writeListEnd() throws TException {
delegate.writeListEnd();
}
+ @Override
public void writeSetBegin(TSet set) throws TException {
delegate.writeSetBegin(set);
}
+ @Override
public void writeSetEnd() throws TException {
delegate.writeSetEnd();
}
+ @Override
public void writeBool(boolean b) throws TException {
delegate.writeBool(b);
}
+ @Override
public void writeByte(byte b) throws TException {
delegate.writeByte(b);
}
+ @Override
public void writeI16(short i16) throws TException {
delegate.writeI16(i16);
}
+ @Override
public void writeI32(int i32) throws TException {
delegate.writeI32(i32);
}
+ @Override
public void writeI64(long i64) throws TException {
delegate.writeI64(i64);
}
+ @Override
+ public void writeUuid(UUID uuid) throws TException {
+ delegate.writeUuid(uuid);
+ }
+
+ @Override
public void writeDouble(double dub) throws TException {
delegate.writeDouble(dub);
}
+ @Override
public void writeString(String str) throws TException {
delegate.writeString(str);
}
+ @Override
public void writeBinary(ByteBuffer buf) throws TException {
delegate.writeBinary(buf);
}
+ @Override
public TMessage readMessageBegin() throws TException {
return delegate.readMessageBegin();
}
+ @Override
public void readMessageEnd() throws TException {
delegate.readMessageEnd();
}
+ @Override
public TStruct readStructBegin() throws TException {
return delegate.readStructBegin();
}
+ @Override
public void readStructEnd() throws TException {
delegate.readStructEnd();
}
+ @Override
public TField readFieldBegin() throws TException {
return delegate.readFieldBegin();
}
+ @Override
public void readFieldEnd() throws TException {
delegate.readFieldEnd();
}
+ @Override
public TMap readMapBegin() throws TException {
return delegate.readMapBegin();
}
+ @Override
public void readMapEnd() throws TException {
delegate.readMapEnd();
}
+ @Override
public TList readListBegin() throws TException {
return delegate.readListBegin();
}
+ @Override
public void readListEnd() throws TException {
delegate.readListEnd();
}
+ @Override
public TSet readSetBegin() throws TException {
return delegate.readSetBegin();
}
+ @Override
public void readSetEnd() throws TException {
delegate.readSetEnd();
}
+ @Override
public boolean equals(Object obj) {
return delegate.equals(obj);
}
+ @Override
public boolean readBool() throws TException {
return delegate.readBool();
}
+ @Override
public byte readByte() throws TException {
return delegate.readByte();
}
+ @Override
public short readI16() throws TException {
return delegate.readI16();
}
+ @Override
public int readI32() throws TException {
return delegate.readI32();
}
+ @Override
public long readI64() throws TException {
return delegate.readI64();
}
+ @Override
+ public UUID readUuid() throws TException {
+ return delegate.readUuid();
+ }
+
+ @Override
public double readDouble() throws TException {
return delegate.readDouble();
}
+ @Override
public String readString() throws TException {
// this is where we intern the strings
return delegate.readString().intern();
}
+ @Override
public ByteBuffer readBinary() throws TException {
return delegate.readBinary();
}
+ @Override
public void reset() {
delegate.reset();
}
+ @Override
public String toString() {
return delegate.toString();
}
diff --git a/parquet-thrift/pom.xml b/parquet-thrift/pom.xml
index 31bcb3a3d2..964e4e4420 100644
--- a/parquet-thrift/pom.xml
+++ b/parquet-thrift/pom.xml
@@ -138,6 +138,11 @@
2.12.5
provided
+
+ javax.annotation
+ javax.annotation-api
+ ${javax.annotation.version}
+
org.antlr
antlr-runtime
diff --git a/parquet-thrift/src/main/java/org/apache/parquet/thrift/BufferedProtocolReadToWrite.java b/parquet-thrift/src/main/java/org/apache/parquet/thrift/BufferedProtocolReadToWrite.java
index 3a2bc20499..7e8a1633f8 100644
--- a/parquet-thrift/src/main/java/org/apache/parquet/thrift/BufferedProtocolReadToWrite.java
+++ b/parquet-thrift/src/main/java/org/apache/parquet/thrift/BufferedProtocolReadToWrite.java
@@ -21,6 +21,7 @@
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.List;
+import java.util.UUID;
import org.apache.parquet.ShouldNeverHappenException;
import org.apache.parquet.thrift.struct.ThriftField;
import org.apache.parquet.thrift.struct.ThriftType;
@@ -239,6 +240,10 @@ private boolean readOneValue(TProtocol in, byte type, List buffer, Thrif
final ByteBuffer bin = in.readBinary();
writeStringAction(buffer, bin);
break;
+ case TType.UUID:
+ final UUID uuid = in.readUuid();
+ writeUuidAction(buffer, uuid);
+ break;
case TType.VOID:
break;
default:
@@ -261,6 +266,20 @@ public String toDebugString() {
});
}
+ private void writeUuidAction(List buffer, final UUID uuid) {
+ buffer.add(new Action() {
+ @Override
+ public void write(TProtocol out) throws TException {
+ out.writeUuid(uuid);
+ }
+
+ @Override
+ public String toDebugString() {
+ return uuid.toString();
+ }
+ });
+ }
+
private void writeLongAction(List buffer, final long l) {
buffer.add(new Action() {
@Override
@@ -617,6 +636,9 @@ public void writeI32(int i) throws TException {}
@Override
public void writeI64(long l) throws TException {}
+ @Override
+ public void writeUuid(UUID uuid) throws TException {}
+
@Override
public void writeDouble(double v) throws TException {}
@@ -699,6 +721,11 @@ public long readI64() throws TException {
return 0;
}
+ @Override
+ public UUID readUuid() throws TException {
+ return null;
+ }
+
@Override
public double readDouble() throws TException {
return 0;
diff --git a/parquet-thrift/src/main/java/org/apache/parquet/thrift/ParquetProtocol.java b/parquet-thrift/src/main/java/org/apache/parquet/thrift/ParquetProtocol.java
index 9ad6bd559e..ab35497181 100644
--- a/parquet-thrift/src/main/java/org/apache/parquet/thrift/ParquetProtocol.java
+++ b/parquet-thrift/src/main/java/org/apache/parquet/thrift/ParquetProtocol.java
@@ -20,6 +20,7 @@
import java.lang.reflect.Method;
import java.nio.ByteBuffer;
+import java.util.UUID;
import org.apache.thrift.TException;
import org.apache.thrift.protocol.TField;
import org.apache.thrift.protocol.TList;
@@ -156,6 +157,11 @@ public void writeI64(long i64) throws TException {
throw exception();
}
+ @Override
+ public void writeUuid(UUID uuid) throws TException {
+ throw exception();
+ }
+
@Override
public void writeDouble(double dub) throws TException {
throw exception();
@@ -259,6 +265,11 @@ public long readI64() throws TException {
throw exception();
}
+ @Override
+ public UUID readUuid() throws TException {
+ throw exception();
+ }
+
@Override
public double readDouble() throws TException {
throw exception();
diff --git a/parquet-thrift/src/main/java/org/apache/parquet/thrift/ParquetWriteProtocol.java b/parquet-thrift/src/main/java/org/apache/parquet/thrift/ParquetWriteProtocol.java
index c2fea0de82..0abeed9465 100644
--- a/parquet-thrift/src/main/java/org/apache/parquet/thrift/ParquetWriteProtocol.java
+++ b/parquet-thrift/src/main/java/org/apache/parquet/thrift/ParquetWriteProtocol.java
@@ -771,6 +771,7 @@ private TProtocol getProtocol(ThriftField field, ColumnIO columnIO, Events retur
case I32:
case I64:
case STRING:
+ case UUID:
p = new PrimitiveWriteProtocol((PrimitiveColumnIO) columnIO, returnClause);
break;
case STRUCT:
diff --git a/parquet-thrift/src/main/java/org/apache/parquet/thrift/ThriftSchemaConvertVisitor.java b/parquet-thrift/src/main/java/org/apache/parquet/thrift/ThriftSchemaConvertVisitor.java
index 9002966736..317ad80a24 100644
--- a/parquet-thrift/src/main/java/org/apache/parquet/thrift/ThriftSchemaConvertVisitor.java
+++ b/parquet-thrift/src/main/java/org/apache/parquet/thrift/ThriftSchemaConvertVisitor.java
@@ -24,6 +24,7 @@
import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.BINARY;
import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.BOOLEAN;
import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.DOUBLE;
+import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY;
import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT32;
import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT64;
import static org.apache.parquet.schema.Type.Repetition.OPTIONAL;
@@ -400,6 +401,11 @@ public ConvertedField visit(StringType stringType, State state) {
}
}
+ @Override
+ public ConvertedField visit(ThriftType.UUIDType uuidType, State state) {
+ return visitPrimitiveType(FIXED_LEN_BYTE_ARRAY, LogicalTypeAnnotation.uuidType(), state);
+ }
+
private static boolean isUnion(StructOrUnionType s) {
switch (s) {
case STRUCT:
diff --git a/parquet-thrift/src/main/java/org/apache/parquet/thrift/ThriftSchemaConverter.java b/parquet-thrift/src/main/java/org/apache/parquet/thrift/ThriftSchemaConverter.java
index 7c5a2e5b35..235948606e 100644
--- a/parquet-thrift/src/main/java/org/apache/parquet/thrift/ThriftSchemaConverter.java
+++ b/parquet-thrift/src/main/java/org/apache/parquet/thrift/ThriftSchemaConverter.java
@@ -225,14 +225,18 @@ private static ThriftField toThriftField(String name, Field field, ThriftField.R
final Field listElemField = field.getListElemField();
type = new ThriftType.ListType(toThriftField(listElemField.getName(), listElemField, requirement));
break;
+ case UUID:
case ENUM:
- Collection enumValues = field.getEnumValues();
- List values = new ArrayList();
- for (TEnum tEnum : enumValues) {
- values.add(new EnumValue(tEnum.getValue(), tEnum.toString()));
+ if (field.isEnum()) {
+ Collection enumValues = field.getEnumValues();
+ List values = new ArrayList<>();
+ for (TEnum tEnum : enumValues) {
+ values.add(new EnumValue(tEnum.getValue(), tEnum.toString()));
+ }
+ type = new EnumType(values);
+ } else {
+ type = new ThriftType.UUIDType();
}
- type = new EnumType(values);
- break;
}
return new ThriftField(name, field.getId(), requirement, type);
}
diff --git a/parquet-thrift/src/main/java/org/apache/parquet/thrift/projection/amend/DefaultEventsVisitor.java b/parquet-thrift/src/main/java/org/apache/parquet/thrift/projection/amend/DefaultEventsVisitor.java
index 644a44d893..8b302db605 100644
--- a/parquet-thrift/src/main/java/org/apache/parquet/thrift/projection/amend/DefaultEventsVisitor.java
+++ b/parquet-thrift/src/main/java/org/apache/parquet/thrift/projection/amend/DefaultEventsVisitor.java
@@ -21,6 +21,7 @@
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.List;
+import java.util.UUID;
import org.apache.parquet.thrift.ParquetProtocol;
import org.apache.parquet.thrift.struct.ThriftField;
import org.apache.parquet.thrift.struct.ThriftType;
@@ -184,6 +185,19 @@ public Void visit(ThriftType.StringType stringType, Void v) {
return null;
}
+ private final UUID zeroUuid = UUID.fromString("00000000-0000-0000-0000-000000000000");
+
+ @Override
+ public Void visit(ThriftType.UUIDType uuidType, Void state) {
+ dummyEvents.add(new ParquetProtocol("readUuid()") {
+ @Override
+ public UUID readUuid() throws TException {
+ return zeroUuid;
+ }
+ });
+ return null;
+ }
+
public List getEvents() {
return dummyEvents;
}
diff --git a/parquet-thrift/src/main/java/org/apache/parquet/thrift/struct/CompatibilityChecker.java b/parquet-thrift/src/main/java/org/apache/parquet/thrift/struct/CompatibilityChecker.java
index 2a94b7e644..42f318aa1e 100644
--- a/parquet-thrift/src/main/java/org/apache/parquet/thrift/struct/CompatibilityChecker.java
+++ b/parquet-thrift/src/main/java/org/apache/parquet/thrift/struct/CompatibilityChecker.java
@@ -252,4 +252,9 @@ public Void visit(I64Type i64Type, State state) {
public Void visit(StringType stringType, State state) {
return null;
}
+
+ @Override
+ public Void visit(ThriftType.UUIDType uuidType, State state) {
+ return null;
+ }
}
diff --git a/parquet-thrift/src/main/java/org/apache/parquet/thrift/struct/ThriftType.java b/parquet-thrift/src/main/java/org/apache/parquet/thrift/struct/ThriftType.java
index 04663a4850..3988c03f26 100644
--- a/parquet-thrift/src/main/java/org/apache/parquet/thrift/struct/ThriftType.java
+++ b/parquet-thrift/src/main/java/org/apache/parquet/thrift/struct/ThriftType.java
@@ -30,6 +30,7 @@
import static org.apache.parquet.thrift.struct.ThriftTypeID.SET;
import static org.apache.parquet.thrift.struct.ThriftTypeID.STRING;
import static org.apache.parquet.thrift.struct.ThriftTypeID.STRUCT;
+import static org.apache.parquet.thrift.struct.ThriftTypeID.UUID;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonIgnore;
@@ -60,7 +61,8 @@
@JsonSubTypes.Type(value = ThriftType.MapType.class, name = "MAP"),
@JsonSubTypes.Type(value = ThriftType.SetType.class, name = "SET"),
@JsonSubTypes.Type(value = ThriftType.StringType.class, name = "STRING"),
- @JsonSubTypes.Type(value = ThriftType.StructType.class, name = "STRUCT")
+ @JsonSubTypes.Type(value = ThriftType.StructType.class, name = "STRUCT"),
+ @JsonSubTypes.Type(value = ThriftType.UUIDType.class, name = "UUID"),
})
public abstract class ThriftType {
private LogicalTypeAnnotation logicalTypeAnnotation;
@@ -131,6 +133,8 @@ public interface StateVisitor {
R visit(I64Type i64Type, S state);
R visit(StringType stringType, S state);
+
+ R visit(UUIDType uuidType, S state);
}
/**
@@ -161,6 +165,8 @@ public interface TypeVisitor {
void visit(I64Type i64Type);
void visit(StringType stringType);
+
+ void visit(UUIDType uuidType);
}
/**
@@ -653,6 +659,25 @@ public void accept(TypeVisitor visitor) {
}
}
+ public static class UUIDType extends ThriftType {
+
+ @JsonCreator
+ public UUIDType() {
+ super(UUID);
+ }
+
+ @Override
+ public R accept(StateVisitor visitor, S state) {
+ this.setLogicalTypeAnnotation(LogicalTypeAnnotation.uuidType());
+ return visitor.visit(this, state);
+ }
+
+ @Override
+ public void accept(TypeVisitor visitor) {
+ visitor.visit(this);
+ }
+ }
+
public static class StringType extends ThriftType {
private boolean binary = false;
diff --git a/parquet-thrift/src/main/java/org/apache/parquet/thrift/struct/ThriftTypeID.java b/parquet-thrift/src/main/java/org/apache/parquet/thrift/struct/ThriftTypeID.java
index d825ae9a99..7830f28afc 100644
--- a/parquet-thrift/src/main/java/org/apache/parquet/thrift/struct/ThriftTypeID.java
+++ b/parquet-thrift/src/main/java/org/apache/parquet/thrift/struct/ThriftTypeID.java
@@ -30,6 +30,7 @@
import org.apache.parquet.thrift.struct.ThriftType.SetType;
import org.apache.parquet.thrift.struct.ThriftType.StringType;
import org.apache.parquet.thrift.struct.ThriftType.StructType;
+import org.apache.parquet.thrift.struct.ThriftType.UUIDType;
import org.apache.thrift.protocol.TType;
/**
@@ -49,13 +50,22 @@ public enum ThriftTypeID {
MAP(TType.MAP, true, MapType.class),
SET(TType.SET, true, SetType.class),
LIST(TType.LIST, true, ListType.class),
- ENUM(TType.ENUM, TType.I32, EnumType.class);
+ ENUM(TType.ENUM, TType.I32, EnumType.class),
+ UUID(TType.UUID, UUIDType.class);
- private static ThriftTypeID[] types = new ThriftTypeID[17];
+ private static final ThriftTypeID[] types;
static {
+ types = new ThriftTypeID[18];
for (ThriftTypeID t : ThriftTypeID.values()) {
- types[t.thriftType] = t;
+ // The Thrift Type for Enum is not part of the spec, but is as a Java implementation detail:
+ // https://github.com/apache/thrift/blob/5cf71b2beec3c67a4c8452ddabbbc6ae43fff16f/lib/java/src/main/java/org/apache/thrift/protocol/TType.java#L39-L40
+ // So we put it at the very end
+ if (t.thriftType == -1) {
+ types[17] = t;
+ } else {
+ types[t.thriftType] = t;
+ }
}
}
diff --git a/parquet-thrift/src/test/java/org/apache/parquet/thrift/projection/TestFieldsPath.java b/parquet-thrift/src/test/java/org/apache/parquet/thrift/projection/TestFieldsPath.java
index ce4e4b6c66..2ea614b75b 100644
--- a/parquet-thrift/src/test/java/org/apache/parquet/thrift/projection/TestFieldsPath.java
+++ b/parquet-thrift/src/test/java/org/apache/parquet/thrift/projection/TestFieldsPath.java
@@ -175,5 +175,10 @@ public List visit(I64Type i64Type, FieldsPath path) {
public List visit(StringType stringType, FieldsPath path) {
return visitPrimitive(path);
}
+
+ @Override
+ public List visit(ThriftType.UUIDType uuidType, FieldsPath path) {
+ return visitPrimitive(path);
+ }
}
}
diff --git a/pom.xml b/pom.xml
index 88a02d621f..36233f1f38 100644
--- a/pom.xml
+++ b/pom.xml
@@ -70,6 +70,7 @@
2.16.0
2.16.0
0.18.2
+ 1.3.2
2.30.0
shaded.parquet
3.3.5
@@ -84,7 +85,7 @@
0.16.0
h2
0.10.0
- 0.16.0
+ 0.19.0
${thrift.version}
8.5.12
0.9.33
@@ -598,6 +599,8 @@
org.apache.parquet.arrow.schema.SchemaMapping
+ org.apache.parquet.thrift.struct.ThriftType
+
org.apache.parquet.io.api.Binary#get2BytesLittleEndian()
org.apache.parquet.schema.LogicalTypeAnnotation$Float16LogicalTypeAnnotation#accept(org.apache.parquet.schema.LogicalTypeAnnotation$LogicalTypeAnnotationVisitor)
@@ -623,6 +626,9 @@
true
true
+
+ javax.annotation:javax.annotation-api:jar:1.3.2
+