-
Notifications
You must be signed in to change notification settings - Fork 1.5k
PARQUET-1253: Support for new logical type representation #463
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
244bb6a
0a66346
f8e2236
6b4ff74
f801605
db30adb
047feb9
d11a09c
eb432f7
3c426d9
77f1d52
6e1ea5d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -389,9 +389,8 @@ abstract public void addValueToPrimitiveConverter( | |
| * @param primitive STRING, INT64, ... | ||
| * @param name the name of the type | ||
| */ | ||
| public PrimitiveType(Repetition repetition, PrimitiveTypeName primitive, | ||
| String name) { | ||
| this(repetition, primitive, 0, name, null, null, null); | ||
| public PrimitiveType(Repetition repetition, PrimitiveTypeName primitive, String name) { | ||
| this(repetition, primitive, 0, name, (LogicalTypeAnnotation) null, null, null); | ||
| } | ||
|
|
||
| /** | ||
|
|
@@ -401,15 +400,18 @@ public PrimitiveType(Repetition repetition, PrimitiveTypeName primitive, | |
| * @param name the name of the type | ||
| */ | ||
| public PrimitiveType(Repetition repetition, PrimitiveTypeName primitive, int length, String name) { | ||
| this(repetition, primitive, length, name, null, null, null); | ||
| this(repetition, primitive, length, name, (LogicalTypeAnnotation) null, null, null); | ||
| } | ||
|
|
||
| /** | ||
| * @param repetition OPTIONAL, REPEATED, REQUIRED | ||
| * @param primitive STRING, INT64, ... | ||
| * @param name the name of the type | ||
| * @param originalType (optional) the original type to help with cross schema convertion (LIST, MAP, ...) | ||
| * | ||
| * @deprecated use {@link #PrimitiveType(Repetition, PrimitiveTypeName, String, LogicalTypeAnnotation)} instead | ||
|
||
| */ | ||
| @Deprecated | ||
| public PrimitiveType(Repetition repetition, PrimitiveTypeName primitive, | ||
| String name, OriginalType originalType) { | ||
| this(repetition, primitive, 0, name, originalType, null, null); | ||
|
|
@@ -436,13 +438,20 @@ public PrimitiveType(Repetition repetition, PrimitiveTypeName primitive, | |
| * @param originalType (optional) the original type (MAP, DECIMAL, UTF8, ...) | ||
| * @param decimalMeta (optional) metadata about the decimal type | ||
| * @param id the id of the field | ||
| * | ||
| * @deprecated use {@link #PrimitiveType(Repetition, PrimitiveTypeName, int, String, LogicalTypeAnnotation, ID)} instead | ||
|
||
| */ | ||
| @Deprecated | ||
| public PrimitiveType(Repetition repetition, PrimitiveTypeName primitive, | ||
| int length, String name, OriginalType originalType, | ||
| DecimalMetadata decimalMeta, ID id) { | ||
| this(repetition, primitive, length, name, originalType, decimalMeta, id, null); | ||
| } | ||
|
|
||
| /** | ||
| * @deprecated use {@link #PrimitiveType(Repetition, PrimitiveTypeName, int, String, LogicalTypeAnnotation, ID, ColumnOrder)} instead | ||
|
||
| */ | ||
| @Deprecated | ||
|
||
| PrimitiveType(Repetition repetition, PrimitiveTypeName primitive, | ||
| int length, String name, OriginalType originalType, | ||
| DecimalMetadata decimalMeta, ID id, ColumnOrder columnOrder) { | ||
|
|
@@ -459,6 +468,37 @@ public PrimitiveType(Repetition repetition, PrimitiveTypeName primitive, | |
| this.columnOrder = requireValidColumnOrder(columnOrder); | ||
| } | ||
|
|
||
| public PrimitiveType(Repetition repetition, PrimitiveTypeName primitive, | ||
|
||
| String name, LogicalTypeAnnotation logicalTypeAnnotation) { | ||
| this(repetition, primitive, 0, name, logicalTypeAnnotation, null, null); | ||
| } | ||
|
|
||
| public PrimitiveType(Repetition repetition, PrimitiveTypeName primitive, | ||
| int length, String name, LogicalTypeAnnotation logicalTypeAnnotation, ID id) { | ||
| this(repetition, primitive, length, name, logicalTypeAnnotation, id, null); | ||
| } | ||
|
|
||
| PrimitiveType(Repetition repetition, PrimitiveTypeName primitive, | ||
| int length, String name, LogicalTypeAnnotation logicalTypeAnnotation, | ||
| ID id, ColumnOrder columnOrder) { | ||
| super(name, repetition, logicalTypeAnnotation, id); | ||
| this.primitive = primitive; | ||
| this.length = length; | ||
| if (getOriginalType() == OriginalType.DECIMAL) { | ||
| LogicalTypeAnnotation.DecimalLogicalTypeAnnotation decimal = (LogicalTypeAnnotation.DecimalLogicalTypeAnnotation) logicalTypeAnnotation; | ||
| this.decimalMeta = new DecimalMetadata(decimal.getPrecision(), decimal.getScale()); | ||
| } else { | ||
| this.decimalMeta = null; | ||
| } | ||
|
|
||
| if (columnOrder == null) { | ||
| columnOrder = primitive == PrimitiveTypeName.INT96 || getOriginalType() == OriginalType.INTERVAL | ||
| ? ColumnOrder.undefined() | ||
| : ColumnOrder.typeDefined(); | ||
| } | ||
| this.columnOrder = requireValidColumnOrder(columnOrder); | ||
| } | ||
|
|
||
| private ColumnOrder requireValidColumnOrder(ColumnOrder columnOrder) { | ||
| if (primitive == PrimitiveTypeName.INT96) { | ||
| Preconditions.checkArgument(columnOrder.getColumnOrderName() == ColumnOrderName.UNDEFINED, | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -128,7 +128,7 @@ public boolean isMoreRestrictiveThan(Repetition other) { | |
| */ | ||
| @Deprecated | ||
| public Type(String name, Repetition repetition) { | ||
| this(name, repetition, null, null); | ||
| this(name, repetition, (LogicalTypeAnnotation) null, null); | ||
| } | ||
|
|
||
| /** | ||
|
|
@@ -146,11 +146,18 @@ public Type(String name, Repetition repetition, OriginalType originalType) { | |
| * @param repetition OPTIONAL, REPEATED, REQUIRED | ||
| * @param originalType (optional) the original type to help with cross schema conversion (LIST, MAP, ...) | ||
| * @param id (optional) the id of the fields. | ||
| * | ||
| * @deprecated use {@link #Type(String, Repetition, LogicalTypeAnnotation, ID)} instead | ||
| */ | ||
| @Deprecated | ||
|
||
| Type(String name, Repetition repetition, OriginalType originalType, ID id) { | ||
| this(name, repetition, originalType, null, id); | ||
| } | ||
|
|
||
| /** | ||
| * @deprecated use {@link #Type(String, Repetition, LogicalTypeAnnotation, ID)} instead | ||
| */ | ||
| @Deprecated | ||
|
||
| Type(String name, Repetition repetition, OriginalType originalType, DecimalMetadata decimalMetadata, ID id) { | ||
| super(); | ||
| this.name = checkNotNull(name, "name"); | ||
|
|
@@ -159,6 +166,18 @@ public Type(String name, Repetition repetition, OriginalType originalType) { | |
| this.id = id; | ||
| } | ||
|
|
||
| public Type(String name, Repetition repetition, LogicalTypeAnnotation logicalTypeAnnotation) { | ||
| this(name, repetition, logicalTypeAnnotation, null); | ||
| } | ||
|
|
||
| Type(String name, Repetition repetition, LogicalTypeAnnotation logicalTypeAnnotation, ID id) { | ||
| super(); | ||
| this.name = checkNotNull(name, "name"); | ||
| this.repetition = checkNotNull(repetition, "repetition"); | ||
| this.logicalTypeAnnotation = logicalTypeAnnotation; | ||
| this.id = id; | ||
| } | ||
|
|
||
| /** | ||
| * @param id | ||
| * @return the same type with the id field set | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -23,6 +23,7 @@ | |
| import java.util.List; | ||
|
|
||
| import org.apache.parquet.Preconditions; | ||
| import org.apache.parquet.format.DecimalType; | ||
| import org.apache.parquet.schema.ColumnOrder.ColumnOrderName; | ||
| import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName; | ||
| import org.apache.parquet.schema.Type.ID; | ||
|
|
@@ -250,14 +251,32 @@ protected final THIS repetition(Type.Repetition repetition) { | |
| * | ||
| * @param type an {@code OriginalType} | ||
| * @return this builder for method chaining | ||
| * | ||
| * @deprecated use {@link #as(LogicalTypeAnnotation)} with the corresponding logical type instead | ||
| */ | ||
| @Deprecated | ||
| public THIS as(OriginalType type) { | ||
| this.logicalTypeAnnotation = LogicalTypeAnnotation.fromOriginalType(type, null); | ||
| return self(); | ||
| } | ||
|
|
||
| protected boolean newLogicalTypeSet; | ||
|
|
||
| /** | ||
| * Adds a type annotation ({@link LogicalTypeAnnotation}) to the type being built. | ||
| * <p> | ||
| * Type annotations are used to extend the types that parquet can store, by | ||
| * specifying how the primitive types should be interpreted. This keeps the | ||
| * set of primitive types to a minimum and reuses parquet's efficient | ||
| * encodings. For example, strings are stored as byte arrays (binary) with | ||
| * a UTF8 annotation. | ||
| * | ||
| * @param type an {@code {@link LogicalTypeAnnotation}} | ||
| * @return this builder for method chaining | ||
| */ | ||
| public THIS as(LogicalTypeAnnotation type) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This method breaks the fluent API of the builder as you need to use an outside factory method of the final type to create a LogicalTypeAnnotation. If it is practically feasible, I would suggest to refactor the LogicalTypeAnnotation API to fit more in the fluent API of Types. |
||
| this.logicalTypeAnnotation = type; | ||
| this.newLogicalTypeSet = true; | ||
| return self(); | ||
| } | ||
|
|
||
|
|
@@ -351,6 +370,9 @@ public THIS length(int length) { | |
| return self(); | ||
| } | ||
|
|
||
| private boolean precisionAlreadySet; | ||
| private boolean scaleAlreadySet; | ||
|
|
||
| /** | ||
| * Adds the precision for a DECIMAL. | ||
| * <p> | ||
|
|
@@ -360,9 +382,13 @@ public THIS length(int length) { | |
| * | ||
| * @param precision an int precision value for the DECIMAL | ||
| * @return this builder for method chaining | ||
| * | ||
| * @deprecated use {@link #as(LogicalTypeAnnotation)} with the corresponding decimal type instead | ||
| */ | ||
| @Deprecated | ||
| public THIS precision(int precision) { | ||
| this.precision = precision; | ||
| precisionAlreadySet = true; | ||
| return self(); | ||
| } | ||
|
|
||
|
|
@@ -378,9 +404,13 @@ public THIS precision(int precision) { | |
| * | ||
| * @param scale an int scale value for the DECIMAL | ||
| * @return this builder for method chaining | ||
| * | ||
| * @deprecated use {@link #as(LogicalTypeAnnotation)} with the corresponding decimal type instead | ||
| */ | ||
| @Deprecated | ||
| public THIS scale(int scale) { | ||
| this.scale = scale; | ||
| scaleAlreadySet = true; | ||
| return self(); | ||
| } | ||
|
|
||
|
|
@@ -498,11 +528,24 @@ private static long maxPrecision(int numBytes) { | |
| protected DecimalMetadata decimalMetadata() { | ||
| DecimalMetadata meta = null; | ||
| if (OriginalType.DECIMAL == getOriginalType()) { | ||
| LogicalTypeAnnotation.DecimalLogicalTypeAnnotation decimalType = (LogicalTypeAnnotation.DecimalLogicalTypeAnnotation) logicalTypeAnnotation; | ||
| if (newLogicalTypeSet) { | ||
| if (scaleAlreadySet) { | ||
| Preconditions.checkArgument(this.scale == decimalType.getScale(), | ||
| "Decimal scale should match with the scale of the logical type"); | ||
| } | ||
| if (precisionAlreadySet) { | ||
| Preconditions.checkArgument(this.precision == decimalType.getPrecision(), | ||
| "Decimal precision should match with the precision of the logical type"); | ||
| } | ||
| scale = decimalType.getScale(); | ||
| precision = decimalType.getPrecision(); | ||
| } | ||
| Preconditions.checkArgument(precision > 0, | ||
| "Invalid DECIMAL precision: " + precision); | ||
| Preconditions.checkArgument(scale >= 0, | ||
| "Invalid DECIMAL scale: " + scale); | ||
| Preconditions.checkArgument(scale <= precision, | ||
| Preconditions.checkArgument(this.scale >= 0, | ||
| "Invalid DECIMAL scale: " + this.scale); | ||
| Preconditions.checkArgument(this.scale <= precision, | ||
| "Invalid DECIMAL scale: cannot be greater than precision"); | ||
| meta = new DecimalMetadata(precision, scale); | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think, it is enough to deprecate public API. If a method is not public we can freely modify/remove it.