Skip to content
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Merge branch 'main' into honah_glue_endpoint_config
# Conflicts:
#	tests/catalog/test_glue.py
  • Loading branch information
HonahX committed Jul 12, 2024
commit 95444942bb34f854e7857e825bafdefdbb9e0028
148 changes: 147 additions & 1 deletion tests/catalog/test_glue.py
Original file line number Diff line number Diff line change
Expand Up @@ -715,7 +715,153 @@ def test_commit_table_properties(

updated_table_metadata = table.metadata
assert test_catalog._parse_metadata_version(table.metadata_location) == 1
assert updated_table_metadata.properties == {"test_a": "test_aa", "test_c": "test_c"}
assert updated_table_metadata.properties == {"Description": "test_description", "test_a": "test_aa", "test_c": "test_c"}

table_info = _glue.get_table(
DatabaseName=database_name,
Name=table_name,
)
assert table_info["Table"]["Description"] == "test_description"


@mock_aws
def test_commit_append_table_snapshot_properties(
_bucket_initialize: None, moto_endpoint_url: str, table_schema_simple: Schema, database_name: str, table_name: str
) -> None:
catalog_name = "glue"
identifier = (database_name, table_name)
test_catalog = GlueCatalog(catalog_name, **{"s3.endpoint": moto_endpoint_url, "warehouse": f"s3://{BUCKET_NAME}"})
test_catalog.create_namespace(namespace=database_name)
table = test_catalog.create_table(identifier=identifier, schema=table_schema_simple)

assert test_catalog._parse_metadata_version(table.metadata_location) == 0

table.append(
pa.Table.from_pylist(
[{"foo": "foo_val", "bar": 1, "baz": False}],
schema=schema_to_pyarrow(table_schema_simple),
),
snapshot_properties={"snapshot_prop_a": "test_prop_a"},
)

updated_table_metadata = table.metadata
summary = updated_table_metadata.snapshots[-1].summary
assert test_catalog._parse_metadata_version(table.metadata_location) == 1
assert summary is not None
assert summary["snapshot_prop_a"] == "test_prop_a"


@mock_aws
def test_commit_overwrite_table_snapshot_properties(
_bucket_initialize: None, moto_endpoint_url: str, table_schema_simple: Schema, database_name: str, table_name: str
) -> None:
catalog_name = "glue"
identifier = (database_name, table_name)
test_catalog = GlueCatalog(catalog_name, **{"s3.endpoint": moto_endpoint_url, "warehouse": f"s3://{BUCKET_NAME}"})
test_catalog.create_namespace(namespace=database_name)
table = test_catalog.create_table(identifier=identifier, schema=table_schema_simple)

assert test_catalog._parse_metadata_version(table.metadata_location) == 0

table.append(
pa.Table.from_pylist(
[{"foo": "foo_val", "bar": 1, "baz": False}],
schema=schema_to_pyarrow(table_schema_simple),
),
snapshot_properties={"snapshot_prop_a": "test_prop_a"},
)

assert test_catalog._parse_metadata_version(table.metadata_location) == 1

table.overwrite(
pa.Table.from_pylist(
[{"foo": "foo_val", "bar": 2, "baz": True}],
schema=schema_to_pyarrow(table_schema_simple),
),
snapshot_properties={"snapshot_prop_b": "test_prop_b"},
)

updated_table_metadata = table.metadata
summary = updated_table_metadata.snapshots[-1].summary
assert test_catalog._parse_metadata_version(table.metadata_location) == 2
assert summary is not None
assert summary["snapshot_prop_a"] is None
assert summary["snapshot_prop_b"] == "test_prop_b"


@mock_aws
@pytest.mark.parametrize("format_version", [1, 2])
def test_create_table_transaction(
_glue: boto3.client,
_bucket_initialize: None,
moto_endpoint_url: str,
table_schema_nested: Schema,
database_name: str,
table_name: str,
format_version: int,
) -> None:
catalog_name = "glue"
identifier = (database_name, table_name)
test_catalog = GlueCatalog(catalog_name, **{"s3.endpoint": moto_endpoint_url, "warehouse": f"s3://{BUCKET_NAME}"})
test_catalog.create_namespace(namespace=database_name)

with test_catalog.create_table_transaction(
identifier,
table_schema_nested,
partition_spec=PartitionSpec(PartitionField(source_id=1, field_id=1000, transform=IdentityTransform(), name="foo")),
properties={"format-version": format_version},
) as txn:
with txn.update_schema() as update_schema:
update_schema.add_column(path="b", field_type=IntegerType())

with txn.update_spec() as update_spec:
update_spec.add_identity("bar")

txn.set_properties(test_a="test_aa", test_b="test_b", test_c="test_c")

table = test_catalog.load_table(identifier)

assert TABLE_METADATA_LOCATION_REGEX.match(table.metadata_location)
assert test_catalog._parse_metadata_version(table.metadata_location) == 0
assert table.format_version == format_version
assert table.schema().find_field("b").field_type == IntegerType()
assert table.properties == {"test_a": "test_aa", "test_b": "test_b", "test_c": "test_c"}
assert table.spec().last_assigned_field_id == 1001
assert table.spec().fields_by_source_id(1)[0].name == "foo"
assert table.spec().fields_by_source_id(1)[0].field_id == 1000
assert table.spec().fields_by_source_id(1)[0].transform == IdentityTransform()
assert table.spec().fields_by_source_id(2)[0].name == "bar"
assert table.spec().fields_by_source_id(2)[0].field_id == 1001
assert table.spec().fields_by_source_id(2)[0].transform == IdentityTransform()


@mock_aws
def test_table_exists(
_bucket_initialize: None, moto_endpoint_url: str, table_schema_simple: Schema, database_name: str, table_name: str
) -> None:
catalog_name = "glue"
identifier = (database_name, table_name)
test_catalog = GlueCatalog(catalog_name, **{"s3.endpoint": moto_endpoint_url, "warehouse": f"s3://{BUCKET_NAME}"})
test_catalog.create_namespace(namespace=database_name)
test_catalog.create_table(identifier=identifier, schema=table_schema_simple)
# Act and Assert for an existing table
assert test_catalog.table_exists(identifier) is True
# Act and Assert for a non-existing table
assert test_catalog.table_exists(("non", "exist")) is False


@mock_aws
def test_register_table_with_given_location(
_bucket_initialize: None, moto_endpoint_url: str, metadata_location: str, database_name: str, table_name: str
) -> None:
catalog_name = "glue"
identifier = (database_name, table_name)
location = metadata_location
test_catalog = GlueCatalog(catalog_name, **{"s3.endpoint": moto_endpoint_url, "warehouse": f"s3://{BUCKET_NAME}"})
test_catalog.create_namespace(namespace=database_name, properties={"location": f"s3://{BUCKET_NAME}/{database_name}.db"})
table = test_catalog.register_table(identifier, location)
assert table.identifier == (catalog_name,) + identifier
assert test_catalog.table_exists(identifier) is True


def test_glue_endpoint_override(moto_endpoint_url: str, database_name: str) -> None:
Expand Down
You are viewing a condensed version of this merge commit. You can view the full changes here.