diff --git a/parquet-arrow/src/main/java/org/apache/parquet/arrow/schema/SchemaConverter.java b/parquet-arrow/src/main/java/org/apache/parquet/arrow/schema/SchemaConverter.java index e02b03b5d5..51057c589e 100644 --- a/parquet-arrow/src/main/java/org/apache/parquet/arrow/schema/SchemaConverter.java +++ b/parquet-arrow/src/main/java/org/apache/parquet/arrow/schema/SchemaConverter.java @@ -23,6 +23,7 @@ import static java.util.Optional.of; import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.MICROS; import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.MILLIS; +import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.NANOS; import static org.apache.parquet.schema.LogicalTypeAnnotation.dateType; import static org.apache.parquet.schema.LogicalTypeAnnotation.decimalType; import static org.apache.parquet.schema.LogicalTypeAnnotation.intType; @@ -246,6 +247,8 @@ public TypeMapping visit(Time type) { return primitive(INT32, timeType(false, MILLIS)); } else if (bitWidth == 64 && timeUnit == TimeUnit.MICROSECOND) { return primitive(INT64, timeType(false, MICROS)); + } else if (bitWidth == 64 && timeUnit == TimeUnit.NANOSECOND) { + return primitive(INT64, timeType(false, NANOS)); } throw new UnsupportedOperationException("Unsupported type " + type); } @@ -257,6 +260,8 @@ public TypeMapping visit(Timestamp type) { return primitive(INT64, timestampType(isUtcNormalized(type), MILLIS)); } else if (timeUnit == TimeUnit.MICROSECOND) { return primitive(INT64, timestampType(isUtcNormalized(type), MICROS)); + } else if (timeUnit == TimeUnit.NANOSECOND) { + return primitive(INT64, timestampType(isUtcNormalized(type), NANOS)); } throw new UnsupportedOperationException("Unsupported type " + type); } @@ -460,6 +465,8 @@ public Optional visit(LogicalTypeAnnotation.IntLogicalTypeAnnotatio public Optional visit(LogicalTypeAnnotation.TimeLogicalTypeAnnotation timeLogicalType) { if (timeLogicalType.getUnit() == MICROS) { return of(field(new ArrowType.Time(TimeUnit.MICROSECOND, 64))); + } else if (timeLogicalType.getUnit() == NANOS) { + return of(field(new ArrowType.Time(TimeUnit.NANOSECOND, 64))); } return empty(); } @@ -471,6 +478,8 @@ public Optional visit(LogicalTypeAnnotation.TimestampLogicalTypeAnn return of(field(new ArrowType.Timestamp(TimeUnit.MICROSECOND, getTimeZone(timestampLogicalType)))); case MILLIS: return of(field(new ArrowType.Timestamp(TimeUnit.MILLISECOND, getTimeZone(timestampLogicalType)))); + case NANOS: + return of(field(new ArrowType.Timestamp(TimeUnit.NANOSECOND, getTimeZone(timestampLogicalType)))); } return empty(); } diff --git a/parquet-arrow/src/test/java/org/apache/parquet/arrow/schema/TestSchemaConverter.java b/parquet-arrow/src/test/java/org/apache/parquet/arrow/schema/TestSchemaConverter.java index 2817de2634..c962b5456f 100644 --- a/parquet-arrow/src/test/java/org/apache/parquet/arrow/schema/TestSchemaConverter.java +++ b/parquet-arrow/src/test/java/org/apache/parquet/arrow/schema/TestSchemaConverter.java @@ -21,6 +21,7 @@ import static java.util.Arrays.asList; import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.MICROS; import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.MILLIS; +import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.NANOS; import static org.apache.parquet.schema.LogicalTypeAnnotation.timeType; import static org.apache.parquet.schema.LogicalTypeAnnotation.timestampType; import static org.apache.parquet.schema.OriginalType.DATE; @@ -66,6 +67,7 @@ import org.apache.parquet.arrow.schema.SchemaMapping.TypeMappingVisitor; import org.apache.parquet.arrow.schema.SchemaMapping.UnionTypeMapping; import org.apache.parquet.example.Paper; +import org.apache.parquet.schema.LogicalTypeAnnotation; import org.apache.parquet.schema.MessageType; import org.apache.parquet.schema.Types; import org.junit.Assert; @@ -93,6 +95,7 @@ private static Field field(String name, ArrowType type, Field... children) { field("f", new ArrowType.FixedSizeList(1), field(null, new ArrowType.Date(DateUnit.DAY))), field("g", new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)), field("h", new ArrowType.Timestamp(TimeUnit.MILLISECOND, "UTC")), + field("i", new ArrowType.Timestamp(TimeUnit.NANOSECOND, "UTC")), field("j", new ArrowType.Timestamp(TimeUnit.MILLISECOND, null)), field("k", new ArrowType.Timestamp(TimeUnit.MICROSECOND, "UTC")), field("l", new ArrowType.Timestamp(TimeUnit.MICROSECOND, null)), @@ -112,6 +115,7 @@ private static Field field(String name, ArrowType type, Field... children) { .named("f")) .addField(Types.optional(FLOAT).named("g")) .addField(Types.optional(INT64).as(timestampType(true, MILLIS)).named("h")) + .addField(Types.optional(INT64).as(timestampType(true, NANOS)).named("i")) .addField(Types.optional(INT64).as(timestampType(false, MILLIS)).named("j")) .addField(Types.optional(INT64).as(timestampType(true, MICROS)).named("k")) .addField(Types.optional(INT64).as(timestampType(false, MICROS)).named("l")) @@ -144,8 +148,10 @@ private static Field field(String name, ArrowType type, Field... children) { field("m", new ArrowType.Time(TimeUnit.MILLISECOND, 32)), field("n", new ArrowType.Timestamp(TimeUnit.MILLISECOND, "UTC")), field("o", new ArrowType.Interval(IntervalUnit.DAY_TIME)), - field("o1", new ArrowType.Interval(IntervalUnit.YEAR_MONTH)) - )); + field("o1", new ArrowType.Interval(IntervalUnit.YEAR_MONTH)), + field("p", new ArrowType.Time(TimeUnit.NANOSECOND, 64)), + field("q", new ArrowType.Timestamp(TimeUnit.NANOSECOND, "UTC")) + )); private final MessageType allTypesParquetSchema = Types.buildMessage() .addField(Types.optional(BINARY).named("a")) @@ -182,6 +188,8 @@ private static Field field(String name, ArrowType type, Field... children) { .addField(Types.optional(INT64).as(TIMESTAMP_MILLIS).named("n")) .addField(Types.optional(FIXED_LEN_BYTE_ARRAY).length(12).as(INTERVAL).named("o")) .addField(Types.optional(FIXED_LEN_BYTE_ARRAY).length(12).as(INTERVAL).named("o1")) + .addField(Types.optional(INT64).as(timeType(false, NANOS)).named("p")) + .addField(Types.optional(INT64).as(timestampType(true, NANOS)).named("q")) .named("root"); private final Schema supportedTypesArrowSchema = new Schema(asList( @@ -205,7 +213,9 @@ private static Field field(String name, ArrowType type, Field... children) { field("j2", new ArrowType.Decimal(25, 5)), field("k", new ArrowType.Date(DateUnit.DAY)), field("l", new ArrowType.Time(TimeUnit.MILLISECOND, 32)), - field("m", new ArrowType.Timestamp(TimeUnit.MILLISECOND, "UTC")) + field("m", new ArrowType.Timestamp(TimeUnit.MILLISECOND, "UTC")), + field("n", new ArrowType.Time(TimeUnit.NANOSECOND, 64)), + field("o", new ArrowType.Timestamp(TimeUnit.NANOSECOND, "UTC")) )); private final MessageType supportedTypesParquetSchema = Types.buildMessage() @@ -234,6 +244,8 @@ private static Field field(String name, ArrowType type, Field... children) { .addField(Types.optional(INT32).as(DATE).named("k")) .addField(Types.optional(INT32).as(TIME_MILLIS).named("l")) .addField(Types.optional(INT64).as(TIMESTAMP_MILLIS).named("m")) + .addField(Types.optional(INT64).as(timeType(true, NANOS)).named("n")) + .addField(Types.optional(INT64).as(timestampType(true, NANOS)).named("o")) .named("root"); private final Schema paperArrowSchema = new Schema(asList( @@ -307,7 +319,7 @@ private void compareFields(List left, List right) { @Test public void testAllMap() throws IOException { SchemaMapping map = converter.map(allTypesArrowSchema, allTypesParquetSchema); - Assert.assertEquals("p, s

, l

, l

, u

, p, p, p, p, p, p, p, p, p, p, p, p, p, p, p, p, p, p, p, p, p", toSummaryString(map)); + Assert.assertEquals("p, s

, l

, l

, u

, p, p, p, p, p, p, p, p, p, p, p, p, p, p, p, p, p, p, p, p, p, p, p", toSummaryString(map)); } private String toSummaryString(SchemaMapping map) { @@ -387,13 +399,6 @@ public void testArrowTimeMicrosecondToParquet() { Types.buildMessage().addField(Types.optional(INT64).as(timeType(false, MICROS)).named("a")).named("root")); } - @Test(expected = UnsupportedOperationException.class) - public void testArrowTimeNanosecondToParquet() { - converter.fromArrow(new Schema(asList( - field("a", new ArrowType.Time(TimeUnit.NANOSECOND, 64)) - ))).getParquetSchema(); - } - @Test public void testParquetInt32TimeMillisToArrow() { MessageType parquet = Types.buildMessage() @@ -449,13 +454,6 @@ public void testArrowTimestampMicrosecondToParquet() { Assert.assertEquals(expected, Types.buildMessage().addField(Types.optional(INT64).as(TIMESTAMP_MICROS).named("a")).named("root")); } - @Test(expected = UnsupportedOperationException.class) - public void testArrowTimestampNanosecondToParquet() { - converter.fromArrow(new Schema(asList( - field("a", new ArrowType.Timestamp(TimeUnit.NANOSECOND, "UTC")) - ))).getParquetSchema(); - } - @Test public void testParquetInt64TimestampMillisToArrow() { MessageType parquet = Types.buildMessage() diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java b/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java index 6046a39310..1a0cdfc517 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java @@ -520,7 +520,8 @@ PrimitiveStringifier valueStringifier(PrimitiveType primitiveType) { public enum TimeUnit { MILLIS, - MICROS + MICROS, + NANOS } public static class TimeLogicalTypeAnnotation extends LogicalTypeAnnotation { @@ -540,7 +541,7 @@ public OriginalType toOriginalType() { case MICROS: return OriginalType.TIME_MICROS; default: - throw new RuntimeException("Unknown original type for " + unit); + return null; } } @@ -610,7 +611,7 @@ public OriginalType toOriginalType() { case MICROS: return OriginalType.TIMESTAMP_MICROS; default: - throw new RuntimeException("Unknown original type for " + unit); + return null; } } @@ -664,6 +665,8 @@ PrimitiveStringifier valueStringifier(PrimitiveType primitiveType) { return PrimitiveStringifier.TIMESTAMP_MICROS_STRINGIFIER; case MILLIS: return PrimitiveStringifier.TIMESTAMP_MILLIS_STRINGIFIER; + case NANOS: + return PrimitiveStringifier.TIMESTAMP_NANOS_STRINGIFIER; default: return super.valueStringifier(primitiveType); } @@ -697,7 +700,7 @@ public OriginalType toOriginalType() { case 64: return isSigned ? OriginalType.INT_64 : OriginalType.UINT_64; default: - throw new RuntimeException("Unknown original type " + toOriginalType()); + return null; } } diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveStringifier.java b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveStringifier.java index c1a9b582fe..3c3417e0d6 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveStringifier.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveStringifier.java @@ -22,6 +22,7 @@ import static java.util.concurrent.TimeUnit.MICROSECONDS; import static java.util.concurrent.TimeUnit.MILLISECONDS; import static java.util.concurrent.TimeUnit.MINUTES; +import static java.util.concurrent.TimeUnit.NANOSECONDS; import static java.util.concurrent.TimeUnit.SECONDS; import java.math.BigDecimal; @@ -306,6 +307,19 @@ long toMillis(long value) { } }; + static final PrimitiveStringifier TIMESTAMP_NANOS_STRINGIFIER = new DateStringifier( + "TIMESTAMP_NANOS_STRINGIFIER", "yyyy-MM-dd'T'HH:mm:ss.SSS") { + @Override + public String stringify(long value) { + return super.stringify(value) + String.format("%06d", Math.abs(value % 1_000_000)); + } + + @Override + long toMillis(long value) { + return value / 1_000_000; + } + }; + static final PrimitiveStringifier TIME_STRINGIFIER = new PrimitiveStringifier("TIME_STRINGIFIER") { @Override public String stringify(int millis) { @@ -331,6 +345,26 @@ private long convert(long duration, TimeUnit from, TimeUnit to, TimeUnit higher) } }; + static final PrimitiveStringifier TIME_NANOS_STRINGIFIER = new PrimitiveStringifier("TIME_NANOS_STRINGIFIER") { + @Override + public String stringify(long nanos) { + return toTimeString(nanos, NANOSECONDS); + } + + private String toTimeString(long nanos, TimeUnit unit) { + String format = "%02d:%02d:%02d.%09d"; + return String.format(format, + unit.toHours(nanos), + convert(nanos, unit, MINUTES, HOURS), + convert(nanos, unit, SECONDS, MINUTES), + convert(nanos, unit, unit, SECONDS)); + } + + private long convert(long duration, TimeUnit from, TimeUnit to, TimeUnit higher) { + return Math.abs(to.convert(duration, from) % to.convert(1, higher)); + } + }; + static PrimitiveStringifier createDecimalStringifier(final int scale) { return new BinaryStringifierBase("DECIMAL_STRINGIFIER(scale: " + scale + ")") { @Override diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/Types.java b/parquet-column/src/main/java/org/apache/parquet/schema/Types.java index 378d6653e1..a1cd736580 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/Types.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/Types.java @@ -507,6 +507,7 @@ public Optional visit(LogicalTypeAnnotation.TimeLogicalTypeAnnotation t checkInt32PrimitiveType(timeLogicalType); break; case MICROS: + case NANOS: checkInt64PrimitiveType(timeLogicalType); break; default: diff --git a/parquet-column/src/test/java/org/apache/parquet/parser/TestParquetParser.java b/parquet-column/src/test/java/org/apache/parquet/parser/TestParquetParser.java index d8536012ba..fa200ab424 100644 --- a/parquet-column/src/test/java/org/apache/parquet/parser/TestParquetParser.java +++ b/parquet-column/src/test/java/org/apache/parquet/parser/TestParquetParser.java @@ -19,6 +19,7 @@ package org.apache.parquet.parser; import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.MILLIS; +import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.NANOS; import static org.apache.parquet.schema.LogicalTypeAnnotation.intType; import static org.apache.parquet.schema.LogicalTypeAnnotation.stringType; import static org.apache.parquet.schema.LogicalTypeAnnotation.timeType; @@ -271,7 +272,9 @@ public void testTimeAnnotations() { " required int64 timestamp (TIMESTAMP_MILLIS);" + " required FIXED_LEN_BYTE_ARRAY(12) interval (INTERVAL);" + " required int32 newTime (TIME(MILLIS,true));" + + " required int64 nanoTime (TIME(NANOS,true));" + " required int64 newTimestamp (TIMESTAMP(MILLIS,false));" + + " required int64 nanoTimestamp (TIMESTAMP(NANOS,false));" + "}\n"; MessageType parsed = MessageTypeParser.parseMessageType(message); @@ -281,7 +284,9 @@ public void testTimeAnnotations() { .required(INT64).as(TIMESTAMP_MILLIS).named("timestamp") .required(FIXED_LEN_BYTE_ARRAY).length(12).as(INTERVAL).named("interval") .required(INT32).as(timeType(true, MILLIS)).named("newTime") + .required(INT64).as(timeType(true, NANOS)).named("nanoTime") .required(INT64).as(timestampType(false, MILLIS)).named("newTimestamp") + .required(INT64).as(timestampType(false, NANOS)).named("nanoTimestamp") .named("TimeMessage"); assertEquals(expected, parsed); diff --git a/parquet-column/src/test/java/org/apache/parquet/schema/TestPrimitiveStringifier.java b/parquet-column/src/test/java/org/apache/parquet/schema/TestPrimitiveStringifier.java index 53045cfb8c..b4e7062964 100644 --- a/parquet-column/src/test/java/org/apache/parquet/schema/TestPrimitiveStringifier.java +++ b/parquet-column/src/test/java/org/apache/parquet/schema/TestPrimitiveStringifier.java @@ -23,10 +23,12 @@ import static java.util.concurrent.TimeUnit.MICROSECONDS; import static java.util.concurrent.TimeUnit.MILLISECONDS; import static java.util.concurrent.TimeUnit.MINUTES; +import static java.util.concurrent.TimeUnit.NANOSECONDS; import static java.util.concurrent.TimeUnit.SECONDS; import static org.apache.parquet.schema.PrimitiveStringifier.DATE_STRINGIFIER; import static org.apache.parquet.schema.PrimitiveStringifier.DEFAULT_STRINGIFIER; import static org.apache.parquet.schema.PrimitiveStringifier.INTERVAL_STRINGIFIER; +import static org.apache.parquet.schema.PrimitiveStringifier.TIME_NANOS_STRINGIFIER; import static org.apache.parquet.schema.PrimitiveStringifier.TIME_STRINGIFIER; import static org.apache.parquet.schema.PrimitiveStringifier.UNSIGNED_STRINGIFIER; import static org.apache.parquet.schema.PrimitiveStringifier.UTF8_STRINGIFIER; @@ -200,6 +202,28 @@ public void testTimestampMicrosStringifier() { checkThrowingUnsupportedException(stringifier, Long.TYPE); } + @Test + public void testTimestampNanosStringifier() { + PrimitiveStringifier stringifier = PrimitiveStringifier.TIMESTAMP_NANOS_STRINGIFIER; + + assertEquals("1970-01-01T00:00:00.000000000", stringifier.stringify(0l)); + + Calendar cal = Calendar.getInstance(UTC); + cal.clear(); + cal.set(2053, Calendar.JULY, 10, 22, 13, 24); + cal.set(Calendar.MILLISECOND, 84); + long nanos = cal.getTimeInMillis() * 1_000_000 + 536; + assertEquals("2053-07-10T22:13:24.084000536", stringifier.stringify(nanos)); + + cal.clear(); + cal.set(1848, Calendar.MARCH, 15, 9, 23, 59); + cal.set(Calendar.MILLISECOND, 765); + nanos = cal.getTimeInMillis() * 1_000_000 - 1; + assertEquals("1848-03-15T09:23:59.765000001", stringifier.stringify(nanos)); + + checkThrowingUnsupportedException(stringifier, Long.TYPE); + } + @Test public void testTimeStringifier() { PrimitiveStringifier stringifier = TIME_STRINGIFIER; @@ -222,6 +246,20 @@ public void testTimeStringifier() { checkThrowingUnsupportedException(stringifier, Integer.TYPE, Long.TYPE); } + @Test + public void testTimeNanoStringifier() { + PrimitiveStringifier stringifier = TIME_NANOS_STRINGIFIER; + + assertEquals("00:00:00.000000000", stringifier.stringify(0l)); + + assertEquals("12:34:56.789012987", stringifier.stringify(convert(NANOSECONDS, 12, 34, 56, 789012987))); + assertEquals("-12:34:56.000789012", stringifier.stringify(convert(NANOSECONDS, -12, -34, -56, -789012))); + assertEquals("12345:12:34.000056789", stringifier.stringify(convert(NANOSECONDS, 12345, 12, 34, 56789))); + assertEquals("-12345:12:34.000056789", stringifier.stringify(convert(NANOSECONDS, -12345, -12, -34, -56789))); + + checkThrowingUnsupportedException(stringifier, Integer.TYPE, Long.TYPE); + } + private long convert(TimeUnit unit, long hours, long minutes, long seconds, long rest) { return unit.convert(hours, HOURS) + unit.convert(minutes, MINUTES) + unit.convert(seconds, SECONDS) + rest; } diff --git a/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuildersWithLogicalTypes.java b/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuildersWithLogicalTypes.java new file mode 100644 index 0000000000..fe13e604b6 --- /dev/null +++ b/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuildersWithLogicalTypes.java @@ -0,0 +1,408 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.parquet.schema; + +import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName; +import org.apache.parquet.schema.Type.Repetition; +import org.junit.Assert; +import org.junit.Test; + +import java.util.concurrent.Callable; + +import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.MICROS; +import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.MILLIS; +import static org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit.NANOS; +import static org.apache.parquet.schema.LogicalTypeAnnotation.bsonType; +import static org.apache.parquet.schema.LogicalTypeAnnotation.dateType; +import static org.apache.parquet.schema.LogicalTypeAnnotation.decimalType; +import static org.apache.parquet.schema.LogicalTypeAnnotation.intType; +import static org.apache.parquet.schema.LogicalTypeAnnotation.jsonType; +import static org.apache.parquet.schema.LogicalTypeAnnotation.stringType; +import static org.apache.parquet.schema.LogicalTypeAnnotation.timeType; +import static org.apache.parquet.schema.LogicalTypeAnnotation.timestampType; +import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.BINARY; +import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.BOOLEAN; +import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.DOUBLE; +import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY; +import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.FLOAT; +import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT32; +import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT64; +import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT96; +import static org.apache.parquet.schema.Type.Repetition.REQUIRED; + +public class TestTypeBuildersWithLogicalTypes { + @Test + public void testGroupTypeConstruction() { + PrimitiveType f1 = Types.required(BINARY).as(stringType()).named("f1"); + PrimitiveType f2 = Types.required(INT32).named("f2"); + PrimitiveType f3 = Types.optional(INT32).named("f3"); + String name = "group"; + for (Repetition repetition : Repetition.values()) { + GroupType expected = new GroupType(repetition, name, + f1, + new GroupType(repetition, "g1", f2, f3)); + GroupType built = Types.buildGroup(repetition) + .addField(f1) + .group(repetition).addFields(f2, f3).named("g1") + .named(name); + Assert.assertEquals(expected, built); + + switch (repetition) { + case REQUIRED: + built = Types.requiredGroup() + .addField(f1) + .requiredGroup().addFields(f2, f3).named("g1") + .named(name); + break; + case OPTIONAL: + built = Types.optionalGroup() + .addField(f1) + .optionalGroup().addFields(f2, f3).named("g1") + .named(name); + break; + case REPEATED: + built = Types.repeatedGroup() + .addField(f1) + .repeatedGroup().addFields(f2, f3).named("g1") + .named(name); + break; + } + Assert.assertEquals(expected, built); + } + } + + @Test + public void testDecimalAnnotation() { + // int32 primitive type + MessageType expected = new MessageType("DecimalMessage", + new PrimitiveType(REQUIRED, INT32, 0, "aDecimal", + decimalType(2, 9), null)); + MessageType builderType = Types.buildMessage() + .required(INT32) + .as(decimalType(2, 9)) + .named("aDecimal") + .named("DecimalMessage"); + Assert.assertEquals(expected, builderType); + // int64 primitive type + expected = new MessageType("DecimalMessage", + new PrimitiveType(REQUIRED, INT64, 0, "aDecimal", + decimalType(2, 18), null)); + builderType = Types.buildMessage() + .required(INT64) + .as(decimalType(2, 18)).precision(18).scale(2) + .named("aDecimal") + .named("DecimalMessage"); + Assert.assertEquals(expected, builderType); + // binary primitive type + expected = new MessageType("DecimalMessage", + new PrimitiveType(REQUIRED, BINARY, 0, "aDecimal", + decimalType(2, 9), null)); + builderType = Types.buildMessage() + .required(BINARY).as(decimalType(2, 9)) + .named("aDecimal") + .named("DecimalMessage"); + Assert.assertEquals(expected, builderType); + // fixed primitive type + expected = new MessageType("DecimalMessage", + new PrimitiveType(REQUIRED, FIXED_LEN_BYTE_ARRAY, 4, "aDecimal", + decimalType(2, 9), null)); + builderType = Types.buildMessage() + .required(FIXED_LEN_BYTE_ARRAY).length(4) + .as(decimalType(2, 9)) + .named("aDecimal") + .named("DecimalMessage"); + Assert.assertEquals(expected, builderType); + } + + @Test + public void testDecimalAnnotationPrecisionScaleBound() { + assertThrows("Should reject scale greater than precision", + IllegalArgumentException.class, () -> Types.buildMessage() + .required(INT32).as(decimalType(4, 3)) + .named("aDecimal") + .named("DecimalMessage")); + assertThrows("Should reject scale greater than precision", + IllegalArgumentException.class, () -> Types.buildMessage() + .required(INT64).as(decimalType(4, 3)) + .named("aDecimal") + .named("DecimalMessage")); + assertThrows("Should reject scale greater than precision", + IllegalArgumentException.class, () -> Types.buildMessage() + .required(BINARY).as(decimalType(4, 3)) + .named("aDecimal") + .named("DecimalMessage")); + assertThrows("Should reject scale greater than precision", + IllegalArgumentException.class, () -> Types.buildMessage() + .required(FIXED_LEN_BYTE_ARRAY).length(7) + .as(decimalType(4, 3)) + .named("aDecimal") + .named("DecimalMessage") + ); + } + + @Test + public void testDecimalAnnotationLengthCheck() { + // maximum precision for 4 bytes is 9 + assertThrows("should reject precision 10 with length 4", + IllegalStateException.class, () -> Types.required(FIXED_LEN_BYTE_ARRAY).length(4) + .as(decimalType(2, 10)) + .named("aDecimal")); + assertThrows("should reject precision 10 with length 4", + IllegalStateException.class, () -> Types.required(INT32) + .as(decimalType(2, 10)) + .named("aDecimal")); + // maximum precision for 8 bytes is 19 + assertThrows("should reject precision 19 with length 8", + IllegalStateException.class, () -> Types.required(FIXED_LEN_BYTE_ARRAY).length(8) + .as(decimalType(4, 19)) + .named("aDecimal")); + assertThrows("should reject precision 19 with length 8", + IllegalStateException.class, () -> Types.required(INT64).length(8) + .as(decimalType(4, 19)) + .named("aDecimal") + ); + } + + @Test + public void testDECIMALAnnotationRejectsUnsupportedTypes() { + PrimitiveTypeName[] unsupported = new PrimitiveTypeName[]{ + BOOLEAN, INT96, DOUBLE, FLOAT + }; + for (final PrimitiveTypeName type : unsupported) { + assertThrows("Should reject non-binary type: " + type, + IllegalStateException.class, () -> Types.required(type) + .as(decimalType(2, 9)) + .named("d")); + } + } + + @Test + public void testBinaryAnnotations() { + LogicalTypeAnnotation[] types = new LogicalTypeAnnotation[] { + stringType(), jsonType(), bsonType()}; + for (final LogicalTypeAnnotation logicalType : types) { + PrimitiveType expected = new PrimitiveType(REQUIRED, BINARY, "col", logicalType); + PrimitiveType string = Types.required(BINARY).as(logicalType).named("col"); + Assert.assertEquals(expected, string); + } + } + + @Test + public void testBinaryAnnotationsRejectsNonBinary() { + LogicalTypeAnnotation[] types = new LogicalTypeAnnotation[] { + stringType(), jsonType(), bsonType()}; + for (final LogicalTypeAnnotation logicalType : types) { + PrimitiveTypeName[] nonBinary = new PrimitiveTypeName[]{ + BOOLEAN, INT32, INT64, INT96, DOUBLE, FLOAT + }; + for (final PrimitiveTypeName type : nonBinary) { + assertThrows("Should reject non-binary type: " + type, + IllegalStateException.class, () -> Types.required(type).as(logicalType).named("col")); + } + assertThrows("Should reject non-binary type: FIXED_LEN_BYTE_ARRAY", + IllegalStateException.class, () -> Types.required(FIXED_LEN_BYTE_ARRAY).length(1) + .as(logicalType).named("col")); + } + } + + @Test + public void testInt32Annotations() { + LogicalTypeAnnotation[] types = new LogicalTypeAnnotation[] { + dateType(), timeType(true, MILLIS), timeType(false, MILLIS), + intType(8, false), intType(16, false), intType(32, false), + intType(8, true), intType(16, true), intType(32, true)}; + for (LogicalTypeAnnotation logicalType : types) { + PrimitiveType expected = new PrimitiveType(REQUIRED, INT32, "col", logicalType); + PrimitiveType date = Types.required(INT32).as(logicalType).named("col"); + Assert.assertEquals(expected, date); + } + } + + @Test + public void testInt32AnnotationsRejectNonInt32() { + LogicalTypeAnnotation[] types = new LogicalTypeAnnotation[] { + dateType(), timeType(true, MILLIS), timeType(false, MILLIS), + intType(8, false), intType(16, false), intType(32, false), + intType(8, true), intType(16, true), intType(32, true)}; + for (final LogicalTypeAnnotation logicalType : types) { + PrimitiveTypeName[] nonInt32 = new PrimitiveTypeName[]{ + BOOLEAN, INT64, INT96, DOUBLE, FLOAT, BINARY + }; + for (final PrimitiveTypeName type : nonInt32) { + assertThrows("Should reject non-int32 type: " + type, + IllegalStateException.class, () -> Types.required(type).as(logicalType).named("col")); + } + assertThrows("Should reject non-int32 type: FIXED_LEN_BYTE_ARRAY", + IllegalStateException.class, () -> Types.required(FIXED_LEN_BYTE_ARRAY).length(1) + .as(logicalType).named("col")); + } + } + + @Test + public void testInt64Annotations() { + LogicalTypeAnnotation[] types = new LogicalTypeAnnotation[] { + timeType(true, MICROS), timeType(false, MICROS), + timeType(true, NANOS), timeType(false, NANOS), + timestampType(true, MILLIS), timestampType(false, MILLIS), + timestampType(true, MICROS), timestampType(false, MICROS), + timestampType(true, NANOS), timestampType(false, NANOS), + intType(64, true), intType(64, false)}; + for (LogicalTypeAnnotation logicalType : types) { + PrimitiveType expected = new PrimitiveType(REQUIRED, INT64, "col", logicalType); + PrimitiveType date = Types.required(INT64).as(logicalType).named("col"); + Assert.assertEquals(expected, date); + } + } + + @Test + public void testInt64AnnotationsRejectNonInt64() { + LogicalTypeAnnotation[] types = new LogicalTypeAnnotation[] { + timeType(true, MICROS), timeType(false, MICROS), + timeType(true, NANOS), timeType(false, NANOS), + timestampType(true, MILLIS), timestampType(false, MILLIS), + timestampType(true, MICROS), timestampType(false, MICROS), + timestampType(true, NANOS), timestampType(false, NANOS), + intType(64, true), intType(64, false)}; + for (final LogicalTypeAnnotation logicalType : types) { + PrimitiveTypeName[] nonInt64 = new PrimitiveTypeName[]{ + BOOLEAN, INT32, INT96, DOUBLE, FLOAT, BINARY + }; + for (final PrimitiveTypeName type : nonInt64) { + assertThrows("Should reject non-int64 type: " + type, + IllegalStateException.class, (Callable) () -> Types.required(type).as(logicalType).named("col")); + } + assertThrows("Should reject non-int64 type: FIXED_LEN_BYTE_ARRAY", + IllegalStateException.class, (Callable) () -> Types.required(FIXED_LEN_BYTE_ARRAY).length(1) + .as(logicalType).named("col")); + } + } + + @Test + public void testIntervalAnnotationRejectsNonFixed() { + PrimitiveTypeName[] nonFixed = new PrimitiveTypeName[]{ + BOOLEAN, INT32, INT64, INT96, DOUBLE, FLOAT, BINARY + }; + for (final PrimitiveTypeName type : nonFixed) { + assertThrows("Should reject non-fixed type: " + type, + IllegalStateException.class, () -> Types.required(type) + .as(LogicalTypeAnnotation.IntervalLogicalTypeAnnotation.getInstance()).named("interval")); + } + } + + @Test + public void testIntervalAnnotationRejectsNonFixed12() { + assertThrows("Should reject fixed with length != 12: " + 11, + IllegalStateException.class, () -> Types.required(FIXED_LEN_BYTE_ARRAY).length(11) + .as(LogicalTypeAnnotation.IntervalLogicalTypeAnnotation.getInstance()).named("interval")); + } + + @Test + public void testTypeConstructionWithUnsupportedColumnOrder() { + assertThrows(null, IllegalArgumentException.class, + () -> Types.optional(INT96).columnOrder(ColumnOrder.typeDefined()).named("int96_unsupported")); + assertThrows(null, IllegalArgumentException.class, + () -> Types.optional(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(12) + .as(LogicalTypeAnnotation.IntervalLogicalTypeAnnotation.getInstance()) + .columnOrder(ColumnOrder.typeDefined()).named("interval_unsupported")); + } + + @Test + public void testDecimalLogicalType() { + PrimitiveType expected = new PrimitiveType(REQUIRED, BINARY, "aDecimal", + LogicalTypeAnnotation.decimalType(3, 4)); + PrimitiveType actual = Types.required(BINARY) + .as(LogicalTypeAnnotation.decimalType(3, 4)).named("aDecimal"); + Assert.assertEquals(expected, actual); + } + + @Test + public void testDecimalLogicalTypeWithDeprecatedScale() { + PrimitiveType expected = new PrimitiveType(REQUIRED, BINARY, "aDecimal", + LogicalTypeAnnotation.decimalType(3, 4)); + PrimitiveType actual = Types.required(BINARY) + .as(LogicalTypeAnnotation.decimalType(3, 4)).scale(3).named("aDecimal"); + Assert.assertEquals(expected, actual); + } + + @Test + public void testDecimalLogicalTypeWithDeprecatedPrecision() { + PrimitiveType expected = new PrimitiveType(REQUIRED, BINARY, "aDecimal", + LogicalTypeAnnotation.decimalType(3, 4)); + PrimitiveType actual = Types.required(BINARY) + .as(LogicalTypeAnnotation.decimalType(3, 4)).precision(4).named("aDecimal"); + Assert.assertEquals(expected, actual); + } + + @Test + public void testTimestampLogicalTypeWithUTCParameter() { + PrimitiveType utcMillisExpected = new PrimitiveType(REQUIRED, INT64, "aTimestamp", + timestampType(true, MILLIS)); + PrimitiveType nonUtcMillisExpected = new PrimitiveType(REQUIRED, INT64, "aTimestamp", + timestampType(false, MILLIS)); + PrimitiveType utcMicrosExpected = new PrimitiveType(REQUIRED, INT64, "aTimestamp", + timestampType(true, MICROS)); + PrimitiveType nonUtcMicrosExpected = new PrimitiveType(REQUIRED, INT64, "aTimestamp", + timestampType(false, MICROS)); + + PrimitiveType utcMillisActual = Types.required(INT64) + .as(timestampType(true, MILLIS)).named("aTimestamp"); + PrimitiveType nonUtcMillisActual = Types.required(INT64) + .as(timestampType(false, MILLIS)).named("aTimestamp"); + PrimitiveType utcMicrosActual = Types.required(INT64) + .as(timestampType(true, MICROS)).named("aTimestamp"); + PrimitiveType nonUtcMicrosActual = Types.required(INT64) + .as(timestampType(false, MICROS)).named("aTimestamp"); + + Assert.assertEquals(utcMillisExpected, utcMillisActual); + Assert.assertEquals(nonUtcMillisExpected, nonUtcMillisActual); + Assert.assertEquals(utcMicrosExpected, utcMicrosActual); + Assert.assertEquals(nonUtcMicrosExpected, nonUtcMicrosActual); + } + + @Test(expected = IllegalArgumentException.class) + public void testDecimalLogicalTypeWithDeprecatedScaleMismatch() { + Types.required(BINARY) + .as(LogicalTypeAnnotation.decimalType(3, 4)) + .scale(4).named("aDecimal"); + } + + @Test(expected = IllegalArgumentException.class) + public void testDecimalLogicalTypeWithDeprecatedPrecisionMismatch() { + Types.required(BINARY) + .as(LogicalTypeAnnotation.decimalType(3, 4)) + .precision(5).named("aDecimal"); + } + + /** + * A convenience method to avoid a large number of @Test(expected=...) tests + * @param message A String message to describe this assertion + * @param expected An Exception class that the Runnable should throw + * @param callable A Callable that is expected to throw the exception + */ + public static void assertThrows( + String message, Class expected, Callable callable) { + try { + callable.call(); + Assert.fail("No exception was thrown (" + message + "), expected: " + + expected.getName()); + } catch (Exception actual) { + Assert.assertEquals(message, expected, actual.getClass()); + } + } +} diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java index 9478e94205..65c596b4e0 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java @@ -18,6 +18,9 @@ */ package org.apache.parquet.format.converter; +import static java.util.Optional.empty; + +import static java.util.Optional.empty; import static java.util.Optional.of; import static org.apache.parquet.format.Util.readFileMetaData; import static org.apache.parquet.format.Util.writePageHeader; @@ -53,6 +56,7 @@ import org.apache.parquet.format.MapType; import org.apache.parquet.format.MicroSeconds; import org.apache.parquet.format.MilliSeconds; +import org.apache.parquet.format.NanoSeconds; import org.apache.parquet.format.NullType; import org.apache.parquet.format.PageEncodingStats; import org.apache.parquet.format.StringType; @@ -249,7 +253,7 @@ LogicalType convertToLogicalType(LogicalTypeAnnotation logicalTypeAnnotation) { } ConvertedType convertToConvertedType(LogicalTypeAnnotation logicalTypeAnnotation) { - return logicalTypeAnnotation.accept(CONVERTED_TYPE_CONVERTER_VISITOR).get(); + return logicalTypeAnnotation.accept(CONVERTED_TYPE_CONVERTER_VISITOR).orElse(null); } static org.apache.parquet.format.TimeUnit convertUnit(LogicalTypeAnnotation.TimeUnit unit) { @@ -258,6 +262,8 @@ static org.apache.parquet.format.TimeUnit convertUnit(LogicalTypeAnnotation.Time return org.apache.parquet.format.TimeUnit.MICROS(new MicroSeconds()); case MILLIS: return org.apache.parquet.format.TimeUnit.MILLIS(new MilliSeconds()); + case NANOS: + return TimeUnit.NANOS(new NanoSeconds()); default: throw new RuntimeException("Unknown time unit " + unit); } @@ -301,6 +307,8 @@ public Optional visit(LogicalTypeAnnotation.TimeLogicalTypeAnnota return of(ConvertedType.TIME_MILLIS); case MICROS: return of(ConvertedType.TIME_MICROS); + case NANOS: + return empty(); default: throw new RuntimeException("Unknown converted type for " + timeLogicalType.toOriginalType()); } @@ -313,6 +321,8 @@ public Optional visit(LogicalTypeAnnotation.TimestampLogicalTypeA return of(ConvertedType.TIMESTAMP_MICROS); case MILLIS: return of(ConvertedType.TIMESTAMP_MILLIS); + case NANOS: + return empty(); default: throw new RuntimeException("Unknown converted type for " + timestampLogicalType.toOriginalType()); } @@ -936,6 +946,8 @@ private LogicalTypeAnnotation.TimeUnit convertTimeUnit(TimeUnit unit) { return LogicalTypeAnnotation.TimeUnit.MICROS; case MILLIS: return LogicalTypeAnnotation.TimeUnit.MILLIS; + case NANOS: + return LogicalTypeAnnotation.TimeUnit.NANOS; default: throw new RuntimeException("Unknown time unit " + unit); } diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java index d1a3a3c233..5fdf62242b 100644 --- a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java +++ b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java @@ -201,6 +201,12 @@ public void testTimeLogicalTypes() { .required(PrimitiveTypeName.INT64) .as(timestampType(true, LogicalTypeAnnotation.TimeUnit.MICROS)) .named("aTimestampUtcMicros") + .required(PrimitiveTypeName.INT64) + .as(timestampType(false, LogicalTypeAnnotation.TimeUnit.NANOS)) + .named("aTimestampNonUtcNanos") + .required(PrimitiveTypeName.INT64) + .as(timestampType(true, LogicalTypeAnnotation.TimeUnit.NANOS)) + .named("aTimestampUtcNanos") .required(PrimitiveTypeName.INT32) .as(timeType(false, LogicalTypeAnnotation.TimeUnit.MILLIS)) .named("aTimeNonUtcMillis") @@ -213,6 +219,12 @@ public void testTimeLogicalTypes() { .required(PrimitiveTypeName.INT64) .as(timeType(true, LogicalTypeAnnotation.TimeUnit.MICROS)) .named("aTimeUtcMicros") + .required(PrimitiveTypeName.INT64) + .as(timeType(false, LogicalTypeAnnotation.TimeUnit.NANOS)) + .named("aTimeNonUtcNanos") + .required(PrimitiveTypeName.INT64) + .as(timeType(true, LogicalTypeAnnotation.TimeUnit.NANOS)) + .named("aTimeUtcNanos") .named("Message"); List parquetSchema = parquetMetadataConverter.toParquetSchema(expected); MessageType schema = parquetMetadataConverter.fromParquetSchema(parquetSchema, null); diff --git a/pom.xml b/pom.xml index 7b3f36fe5b..1990de1a99 100644 --- a/pom.xml +++ b/pom.xml @@ -81,7 +81,7 @@ 1.2.1 2.7.1 3.1.2 - 2.4.0 + 2.6.0 1.7.0 thrift 2.10.6 @@ -213,13 +213,13 @@ - + org.apache.maven.plugins maven-resources-plugin 2.7 - + maven-enforcer-plugin 1.3.1 @@ -373,7 +373,7 @@ - + org.apache.maven.plugins maven-resources-plugin @@ -388,8 +388,8 @@ true - - + + org.apache.maven.plugins