Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions pyiceberg/table/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1111,6 +1111,7 @@ def refresh(self) -> Table:
An updated instance of the same Iceberg table
"""
fresh = self.catalog.load_table(self._identifier)
self._check_uuid(fresh.metadata)
self.metadata = fresh.metadata
self.io = fresh.io
self.metadata_location = fresh.metadata_location
Expand Down Expand Up @@ -1491,9 +1492,21 @@ def refs(self) -> dict[str, SnapshotRef]:
"""Return the snapshot references in the table."""
return self.metadata.refs

def _check_uuid(self, new_metadata: TableMetadata) -> None:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: i find this easier to read as a staticmethod, wdyt?

@staticmethod
def _check_uuid(current_metadata: TableMetadata, new_metadata: TableMetadata)

"""Validate that the table UUID matches after refresh or commit."""
current = self.metadata.table_uuid
refreshed = new_metadata.table_uuid

if current and refreshed and current != refreshed:
raise ValueError(f"Table UUID does not match: current={current} != refreshed={refreshed}")

def _do_commit(self, updates: tuple[TableUpdate, ...], requirements: tuple[TableRequirement, ...]) -> None:
response = self.catalog.commit_table(self, requirements, updates)

# Only check UUID for existing tables, not new tables
if not isinstance(self, StagedTable):
self._check_uuid(response.metadata)

Comment on lines +1506 to +1509
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the scenario where the commit response has a different table uuid?
The commit request should include AssertTableUUID so I would expect the catalog to verify that

# https://github.com/apache/iceberg/blob/f6faa58/core/src/main/java/org/apache/iceberg/CatalogUtil.java#L527
# delete old metadata if METADATA_DELETE_AFTER_COMMIT_ENABLED is set to true and uses
# TableProperties.METADATA_PREVIOUS_VERSIONS_MAX to determine how many previous versions to keep -
Expand Down
84 changes: 84 additions & 0 deletions tests/catalog/test_rest.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
from pyiceberg.table.sorting import SortField, SortOrder
from pyiceberg.transforms import IdentityTransform, TruncateTransform
from pyiceberg.typedef import RecursiveDict
from pyiceberg.types import StringType
from pyiceberg.utils.config import Config

TEST_URI = "https://iceberg-test-catalog/"
Expand Down Expand Up @@ -2155,3 +2156,86 @@ def test_view_endpoints_enabled_with_config(self, requests_mock: Mocker) -> None
# View endpoints should be supported when enabled
catalog._check_endpoint(Capability.V1_LIST_VIEWS)
catalog._check_endpoint(Capability.V1_DELETE_VIEW)


def test_table_uuid_check_on_commit(rest_mock: Mocker, example_table_metadata_v2: dict[str, Any]) -> None:
original_uuid = "9c12d441-03fe-4693-9a96-a0705ddf69c1"
different_uuid = "550e8400-e29b-41d4-a716-446655440000"
metadata_location = "s3://warehouse/database/table/metadata.json"

rest_mock.get(
f"{TEST_URI}v1/namespaces/namespace/tables/table_name",
json={
"metadata-location": metadata_location,
"metadata": example_table_metadata_v2,
"config": {},
},
status_code=200,
request_headers=TEST_HEADERS,
)

catalog = RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN)
table = catalog.load_table(("namespace", "table_name"))

assert str(table.metadata.table_uuid) == original_uuid

metadata_with_different_uuid = {**example_table_metadata_v2, "table-uuid": different_uuid}

rest_mock.post(
f"{TEST_URI}v1/namespaces/namespace/tables/table_name",
json={
"metadata-location": metadata_location,
"metadata": metadata_with_different_uuid,
},
status_code=200,
request_headers=TEST_HEADERS,
)

with pytest.raises(ValueError) as exc_info:
table.update_schema().add_column("new_col", StringType()).commit()

assert "Table UUID does not match" in str(exc_info.value)
assert f"current={original_uuid}" in str(exc_info.value)
assert f"refreshed={different_uuid}" in str(exc_info.value)


def test_table_uuid_check_on_refresh(rest_mock: Mocker, example_table_metadata_v2: dict[str, Any]) -> None:
original_uuid = "9c12d441-03fe-4693-9a96-a0705ddf69c1"
different_uuid = "550e8400-e29b-41d4-a716-446655440000"
metadata_location = "s3://warehouse/database/table/metadata.json"

rest_mock.get(
f"{TEST_URI}v1/namespaces/namespace/tables/table_name",
json={
"metadata-location": metadata_location,
"metadata": example_table_metadata_v2,
"config": {},
},
status_code=200,
request_headers=TEST_HEADERS,
)

catalog = RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN)
table = catalog.load_table(("namespace", "table_name"))

assert str(table.metadata.table_uuid) == original_uuid

metadata_with_different_uuid = {**example_table_metadata_v2, "table-uuid": different_uuid}

rest_mock.get(
f"{TEST_URI}v1/namespaces/namespace/tables/table_name",
json={
"metadata-location": metadata_location,
"metadata": metadata_with_different_uuid,
"config": {},
},
status_code=200,
request_headers=TEST_HEADERS,
)

with pytest.raises(ValueError) as exc_info:
table.refresh()

assert "Table UUID does not match" in str(exc_info.value)
assert f"current={original_uuid}" in str(exc_info.value)
assert f"refreshed={different_uuid}" in str(exc_info.value)
18 changes: 18 additions & 0 deletions tests/table/test_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -1639,3 +1639,21 @@ def model_roundtrips(model: BaseModel) -> bool:
if model != type(model).model_validate(model_data):
pytest.fail(f"model {type(model)} did not roundtrip successfully")
return True


def test_check_uuid_raises_when_mismatch(table_v2: Table, example_table_metadata_v2: dict[str, Any]) -> None:
different_uuid = "550e8400-e29b-41d4-a716-446655440000"
metadata_with_different_uuid = {**example_table_metadata_v2, "table-uuid": different_uuid}
new_metadata = TableMetadataV2(**metadata_with_different_uuid)

with pytest.raises(ValueError) as exc_info:
table_v2._check_uuid(new_metadata)

assert "Table UUID does not match" in str(exc_info.value)
assert different_uuid in str(exc_info.value)


def test_check_uuid_passes_when_match(table_v2: Table, example_table_metadata_v2: dict[str, Any]) -> None:
new_metadata = TableMetadataV2(**example_table_metadata_v2)
# Should not raise with same uuid
table_v2._check_uuid(new_metadata)