diff --git a/aws/src/test/java/org/apache/iceberg/aws/glue/TestIcebergToGlueConverter.java b/aws/src/test/java/org/apache/iceberg/aws/glue/TestIcebergToGlueConverter.java index 1136ad63b410..edebfd3420e2 100644 --- a/aws/src/test/java/org/apache/iceberg/aws/glue/TestIcebergToGlueConverter.java +++ b/aws/src/test/java/org/apache/iceberg/aws/glue/TestIcebergToGlueConverter.java @@ -238,7 +238,7 @@ public void testSetTableInputInformationWithRemovedColumns() { Schema newSchema = new Schema(Types.NestedField.required(1, "x", Types.StringType.get(), "comment1")); - tableMetadata = tableMetadata.updateSchema(newSchema, 3); + tableMetadata = tableMetadata.updateSchema(newSchema); IcebergToGlueConverter.setTableInputInformation(actualTableInputBuilder, tableMetadata); TableInput actualTableInput = actualTableInputBuilder.build(); diff --git a/core/src/main/java/org/apache/iceberg/MetadataUpdate.java b/core/src/main/java/org/apache/iceberg/MetadataUpdate.java index 49fb1fe01c44..ba038c196e43 100644 --- a/core/src/main/java/org/apache/iceberg/MetadataUpdate.java +++ b/core/src/main/java/org/apache/iceberg/MetadataUpdate.java @@ -86,6 +86,16 @@ class AddSchema implements MetadataUpdate { private final Schema schema; private final int lastColumnId; + public AddSchema(Schema schema) { + this(schema, schema.highestFieldId()); + } + + /** + * Set the schema + * + * @deprecated since 1.8.0, will be removed 1.9.0 or 2.0.0, use AddSchema(schema). + */ + @Deprecated public AddSchema(Schema schema, int lastColumnId) { this.schema = schema; this.lastColumnId = lastColumnId; diff --git a/core/src/main/java/org/apache/iceberg/SchemaUpdate.java b/core/src/main/java/org/apache/iceberg/SchemaUpdate.java index 069097778606..2b541080ac72 100644 --- a/core/src/main/java/org/apache/iceberg/SchemaUpdate.java +++ b/core/src/main/java/org/apache/iceberg/SchemaUpdate.java @@ -444,7 +444,7 @@ public Schema apply() { @Override public void commit() { - TableMetadata update = applyChangesToMetadata(base.updateSchema(apply(), lastColumnId)); + TableMetadata update = applyChangesToMetadata(base.updateSchema(apply())); ops.commit(base, update); } diff --git a/core/src/main/java/org/apache/iceberg/TableMetadata.java b/core/src/main/java/org/apache/iceberg/TableMetadata.java index 3cdc53995dce..b507fd833a01 100644 --- a/core/src/main/java/org/apache/iceberg/TableMetadata.java +++ b/core/src/main/java/org/apache/iceberg/TableMetadata.java @@ -562,10 +562,23 @@ public TableMetadata withUUID() { return new Builder(this).assignUUID().build(); } + /** + * Updates the schema + * + * @deprecated since 1.8.0, will be removed in 1.9.0 or 2.0.0, use updateSchema(schema). + */ + @Deprecated public TableMetadata updateSchema(Schema newSchema, int newLastColumnId) { return new Builder(this).setCurrentSchema(newSchema, newLastColumnId).build(); } + /** Updates the schema */ + public TableMetadata updateSchema(Schema newSchema) { + return new Builder(this) + .setCurrentSchema(newSchema, Math.max(this.lastColumnId, newSchema.highestFieldId())) + .build(); + } + // The caller is responsible to pass a newPartitionSpec with correct partition field IDs public TableMetadata updatePartitionSpec(PartitionSpec newPartitionSpec) { return new Builder(this).setDefaultPartitionSpec(newPartitionSpec).build(); @@ -1081,8 +1094,18 @@ public Builder setCurrentSchema(int schemaId) { return this; } + public Builder addSchema(Schema schema) { + addSchemaInternal(schema, Math.max(lastColumnId, schema.highestFieldId())); + return this; + } + + /** + * Add a new schema. + * + * @deprecated since 1.8.0, will be removed in 1.9.0 or 2.0.0, use AddSchema(schema). + */ + @Deprecated public Builder addSchema(Schema schema, int newLastColumnId) { - // TODO: remove requirement for newLastColumnId addSchemaInternal(schema, newLastColumnId); return this; } diff --git a/core/src/main/java/org/apache/iceberg/rest/RESTSessionCatalog.java b/core/src/main/java/org/apache/iceberg/rest/RESTSessionCatalog.java index cc42604f700d..d74b9fc8c5d0 100644 --- a/core/src/main/java/org/apache/iceberg/rest/RESTSessionCatalog.java +++ b/core/src/main/java/org/apache/iceberg/rest/RESTSessionCatalog.java @@ -954,7 +954,7 @@ private static List createChanges(TableMetadata meta) { changes.add(new MetadataUpdate.UpgradeFormatVersion(meta.formatVersion())); Schema schema = meta.schema(); - changes.add(new MetadataUpdate.AddSchema(schema, schema.highestFieldId())); + changes.add(new MetadataUpdate.AddSchema(schema)); changes.add(new MetadataUpdate.SetCurrentSchema(-1)); PartitionSpec spec = meta.spec(); diff --git a/core/src/main/java/org/apache/iceberg/view/ViewMetadata.java b/core/src/main/java/org/apache/iceberg/view/ViewMetadata.java index ae837ff96882..94f3a56ba931 100644 --- a/core/src/main/java/org/apache/iceberg/view/ViewMetadata.java +++ b/core/src/main/java/org/apache/iceberg/view/ViewMetadata.java @@ -372,20 +372,15 @@ private int addSchemaInternal(Schema schema) { newSchema = schema; } - int highestFieldId = Math.max(highestFieldId(), newSchema.highestFieldId()); schemas.add(newSchema); schemasById.put(newSchema.schemaId(), newSchema); - changes.add(new MetadataUpdate.AddSchema(newSchema, highestFieldId)); + changes.add(new MetadataUpdate.AddSchema(newSchema)); this.lastAddedSchemaId = newSchemaId; return newSchemaId; } - private int highestFieldId() { - return schemas.stream().map(Schema::highestFieldId).max(Integer::compareTo).orElse(0); - } - private int reuseOrCreateNewSchemaId(Schema newSchema) { // if the schema already exists, use its id; otherwise use the highest id + 1 int newSchemaId = INITIAL_SCHEMA_ID; diff --git a/core/src/test/java/org/apache/iceberg/TestMetadataUpdateParser.java b/core/src/test/java/org/apache/iceberg/TestMetadataUpdateParser.java index bfed6ebebe2c..cae19fece4e9 100644 --- a/core/src/test/java/org/apache/iceberg/TestMetadataUpdateParser.java +++ b/core/src/test/java/org/apache/iceberg/TestMetadataUpdateParser.java @@ -112,23 +112,9 @@ public void testUpgradeFormatVersionFromJson() { public void testAddSchemaFromJson() { String action = MetadataUpdateParser.ADD_SCHEMA; Schema schema = ID_DATA_SCHEMA; - int lastColumnId = schema.highestFieldId(); - String json = - String.format( - "{\"action\":\"add-schema\",\"schema\":%s,\"last-column-id\":%d}", - SchemaParser.toJson(schema), lastColumnId); - MetadataUpdate actualUpdate = new MetadataUpdate.AddSchema(schema, lastColumnId); - assertEquals(action, actualUpdate, MetadataUpdateParser.fromJson(json)); - } - - @Test - public void testAddSchemaFromJsonWithoutLastColumnId() { - String action = MetadataUpdateParser.ADD_SCHEMA; - Schema schema = ID_DATA_SCHEMA; - int lastColumnId = schema.highestFieldId(); String json = String.format("{\"action\":\"add-schema\",\"schema\":%s}", SchemaParser.toJson(schema)); - MetadataUpdate actualUpdate = new MetadataUpdate.AddSchema(schema, lastColumnId); + MetadataUpdate actualUpdate = new MetadataUpdate.AddSchema(schema); assertEquals(action, actualUpdate, MetadataUpdateParser.fromJson(json)); } @@ -140,7 +126,7 @@ public void testAddSchemaToJson() { String.format( "{\"action\":\"add-schema\",\"schema\":%s,\"last-column-id\":%d}", SchemaParser.toJson(schema), lastColumnId); - MetadataUpdate update = new MetadataUpdate.AddSchema(schema, lastColumnId); + MetadataUpdate update = new MetadataUpdate.AddSchema(schema); String actual = MetadataUpdateParser.toJson(update); assertThat(actual) .as("Add schema should convert to the correct JSON value") diff --git a/core/src/test/java/org/apache/iceberg/TestTableMetadata.java b/core/src/test/java/org/apache/iceberg/TestTableMetadata.java index 71254b3abb1b..ba395a9e4486 100644 --- a/core/src/test/java/org/apache/iceberg/TestTableMetadata.java +++ b/core/src/test/java/org/apache/iceberg/TestTableMetadata.java @@ -1369,7 +1369,7 @@ public void testUpdateSchemaIdentifierFields() { new Schema( Lists.newArrayList(Types.NestedField.required(1, "x", Types.StringType.get())), Sets.newHashSet(1)); - TableMetadata newMeta = meta.updateSchema(newSchema, 1); + TableMetadata newMeta = meta.updateSchema(newSchema); assertThat(newMeta.schemas()).hasSize(2); assertThat(newMeta.schema().identifierFieldIds()).containsExactly(1); } @@ -1391,7 +1391,7 @@ public void testUpdateSchema() { new Schema( Types.NestedField.required(1, "y", Types.LongType.get(), "comment"), Types.NestedField.required(2, "x", Types.StringType.get())); - TableMetadata twoSchemasTable = freshTable.updateSchema(schema2, 2); + TableMetadata twoSchemasTable = freshTable.updateSchema(schema2); assertThat(twoSchemasTable.currentSchemaId()).isEqualTo(1); assertSameSchemaList( ImmutableList.of(schema, new Schema(1, schema2.columns())), twoSchemasTable.schemas()); @@ -1403,26 +1403,26 @@ public void testUpdateSchema() { new Schema( Types.NestedField.required(1, "y", Types.LongType.get(), "comment"), Types.NestedField.required(2, "x", Types.StringType.get())); - TableMetadata sameSchemaTable = twoSchemasTable.updateSchema(sameSchema2, 2); + TableMetadata sameSchemaTable = twoSchemasTable.updateSchema(sameSchema2); assertThat(sameSchemaTable).isSameAs(twoSchemasTable); // update schema with the same schema and different last column ID as current should create // a new table - TableMetadata differentColumnIdTable = sameSchemaTable.updateSchema(sameSchema2, 3); + TableMetadata differentColumnIdTable = sameSchemaTable.updateSchema(sameSchema2); assertThat(differentColumnIdTable.currentSchemaId()).isEqualTo(1); assertSameSchemaList( ImmutableList.of(schema, new Schema(1, schema2.columns())), differentColumnIdTable.schemas()); assertThat(differentColumnIdTable.schema().asStruct()).isEqualTo(schema2.asStruct()); - assertThat(differentColumnIdTable.lastColumnId()).isEqualTo(3); + assertThat(differentColumnIdTable.lastColumnId()).isEqualTo(2); // update schema with old schema does not change schemas - TableMetadata revertSchemaTable = differentColumnIdTable.updateSchema(schema, 3); + TableMetadata revertSchemaTable = differentColumnIdTable.updateSchema(schema); assertThat(revertSchemaTable.currentSchemaId()).isEqualTo(0); assertSameSchemaList( ImmutableList.of(schema, new Schema(1, schema2.columns())), revertSchemaTable.schemas()); assertThat(revertSchemaTable.schema().asStruct()).isEqualTo(schema.asStruct()); - assertThat(revertSchemaTable.lastColumnId()).isEqualTo(3); + assertThat(revertSchemaTable.lastColumnId()).isEqualTo(2); // create new schema will use the largest schema id + 1 Schema schema3 = @@ -1430,7 +1430,7 @@ public void testUpdateSchema() { Types.NestedField.required(2, "y", Types.LongType.get(), "comment"), Types.NestedField.required(4, "x", Types.StringType.get()), Types.NestedField.required(6, "z", Types.IntegerType.get())); - TableMetadata threeSchemaTable = revertSchemaTable.updateSchema(schema3, 6); + TableMetadata threeSchemaTable = revertSchemaTable.updateSchema(schema3); assertThat(threeSchemaTable.currentSchemaId()).isEqualTo(2); assertSameSchemaList( ImmutableList.of( diff --git a/core/src/test/java/org/apache/iceberg/TestUpdateRequirements.java b/core/src/test/java/org/apache/iceberg/TestUpdateRequirements.java index 1a6c289ea241..e5b342850820 100644 --- a/core/src/test/java/org/apache/iceberg/TestUpdateRequirements.java +++ b/core/src/test/java/org/apache/iceberg/TestUpdateRequirements.java @@ -223,9 +223,9 @@ public void addSchema() { UpdateRequirements.forUpdateTable( metadata, ImmutableList.of( - new MetadataUpdate.AddSchema(new Schema(), lastColumnId), - new MetadataUpdate.AddSchema(new Schema(), lastColumnId + 1), - new MetadataUpdate.AddSchema(new Schema(), lastColumnId + 2))); + new MetadataUpdate.AddSchema(new Schema()), + new MetadataUpdate.AddSchema(new Schema()), + new MetadataUpdate.AddSchema(new Schema()))); requirements.forEach(req -> req.validate(metadata)); assertThat(requirements) @@ -253,9 +253,9 @@ public void addSchemaFailure() { UpdateRequirements.forUpdateTable( metadata, ImmutableList.of( - new MetadataUpdate.AddSchema(new Schema(), 1), - new MetadataUpdate.AddSchema(new Schema(), 2), - new MetadataUpdate.AddSchema(new Schema(), 3))); + new MetadataUpdate.AddSchema(new Schema()), + new MetadataUpdate.AddSchema(new Schema()), + new MetadataUpdate.AddSchema(new Schema()))); assertThatThrownBy(() -> requirements.forEach(req -> req.validate(updated))) .isInstanceOf(CommitFailedException.class) @@ -269,9 +269,9 @@ public void addSchemaForView() { UpdateRequirements.forReplaceView( viewMetadata, ImmutableList.of( - new MetadataUpdate.AddSchema(new Schema(), lastColumnId), - new MetadataUpdate.AddSchema(new Schema(), lastColumnId + 1), - new MetadataUpdate.AddSchema(new Schema(), lastColumnId + 2))); + new MetadataUpdate.AddSchema(new Schema()), + new MetadataUpdate.AddSchema(new Schema()), + new MetadataUpdate.AddSchema(new Schema()))); requirements.forEach(req -> req.validate(viewMetadata)); assertThat(requirements) diff --git a/open-api/rest-catalog-open-api.py b/open-api/rest-catalog-open-api.py index c3372544ef95..d63e9bfe548c 100644 --- a/open-api/rest-catalog-open-api.py +++ b/open-api/rest-catalog-open-api.py @@ -1152,7 +1152,7 @@ class AddSchemaUpdate(BaseUpdate): last_column_id: Optional[int] = Field( None, alias='last-column-id', - description='The highest assigned column ID for the table. This is used to ensure columns are always assigned an unused ID when evolving schemas. When omitted, it will be computed on the server side.', + description="This optional field is **DEPRECATED for REMOVAL** since it more safe to handle this internally, and shouldn't be exposed to the clients.\nThe highest assigned column ID for the table. This is used to ensure columns are always assigned an unused ID when evolving schemas. When omitted, it will be computed on the server side.", ) diff --git a/open-api/rest-catalog-open-api.yaml b/open-api/rest-catalog-open-api.yaml index 9635af96c1ca..a154ce97b564 100644 --- a/open-api/rest-catalog-open-api.yaml +++ b/open-api/rest-catalog-open-api.yaml @@ -2692,7 +2692,13 @@ components: $ref: '#/components/schemas/Schema' last-column-id: type: integer - description: The highest assigned column ID for the table. This is used to ensure columns are always assigned an unused ID when evolving schemas. When omitted, it will be computed on the server side. + deprecated: true + description: + This optional field is **DEPRECATED for REMOVAL** since it more safe to handle this internally, + and shouldn't be exposed to the clients. + + The highest assigned column ID for the table. This is used to ensure columns are always + assigned an unused ID when evolving schemas. When omitted, it will be computed on the server side. SetCurrentSchemaUpdate: allOf: diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkMetadataColumns.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkMetadataColumns.java index 230a660c0117..93f39299116b 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkMetadataColumns.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkMetadataColumns.java @@ -189,10 +189,7 @@ public void testPartitionMetadataColumnWithManyColumns() { TableOperations ops = ((HasTableOperations) table).operations(); TableMetadata base = ops.current(); - ops.commit( - base, - base.updateSchema(manyColumnsSchema, manyColumnsSchema.highestFieldId()) - .updatePartitionSpec(spec)); + ops.commit(base, base.updateSchema(manyColumnsSchema).updatePartitionSpec(spec)); Dataset df = spark diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkReadProjection.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkReadProjection.java index 99a327402d97..becf6a064dcc 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkReadProjection.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkReadProjection.java @@ -152,8 +152,7 @@ protected Record writeAndRead(String desc, Schema writeSchema, Schema readSchema Schema expectedSchema = reassignIds(readSchema, idMapping); // Set the schema to the expected schema directly to simulate the table schema evolving - TestTables.replaceMetadata( - desc, TestTables.readMetadata(desc).updateSchema(expectedSchema, 100)); + TestTables.replaceMetadata(desc, TestTables.readMetadata(desc).updateSchema(expectedSchema)); Dataset df = spark