diff --git a/pyiceberg/table/update/snapshot.py b/pyiceberg/table/update/snapshot.py index 37d120969a..1df4e64c24 100644 --- a/pyiceberg/table/update/snapshot.py +++ b/pyiceberg/table/update/snapshot.py @@ -652,7 +652,7 @@ def _get_entries(manifest: ManifestFile) -> list[ManifestEntry]: return [ ManifestEntry.from_args( status=ManifestEntryStatus.DELETED, - snapshot_id=entry.snapshot_id, + snapshot_id=self._snapshot_id, sequence_number=entry.sequence_number, file_sequence_number=entry.file_sequence_number, data_file=entry.data_file, diff --git a/tests/integration/test_deletes.py b/tests/integration/test_deletes.py index e3b487e465..c49689b716 100644 --- a/tests/integration/test_deletes.py +++ b/tests/integration/test_deletes.py @@ -975,3 +975,52 @@ def assert_manifest_entry(expected_status: ManifestEntryStatus, expected_snapsho assert after_delete_snapshot is not None assert_manifest_entry(ManifestEntryStatus.DELETED, after_delete_snapshot.snapshot_id) + + +@pytest.mark.integration +def test_manifest_entry_snapshot_id_after_partial_deletes(session_catalog: RestCatalog) -> None: + identifier = "default.test_manifest_entry_snapshot_id_after_partial_deletes" + try: + session_catalog.drop_table(identifier) + except NoSuchTableError: + pass + + schema = pa.schema( + [ + ("id", pa.int32()), + ("name", pa.string()), + ] + ) + + table = session_catalog.create_table(identifier, schema) + data = pa.Table.from_pylist( + [ + {"id": 1, "name": "keep"}, + {"id": 2, "name": "keep"}, + {"id": 3, "name": "delete_me"}, + {"id": 4, "name": "delete_me"}, + ], + schema=schema, + ) + table.append(data) + + # Partial delete: only some rows match, triggering CoW overwrite via _OverwriteFiles + table.delete(EqualTo("name", "delete_me")) + after_delete_snapshot = table.refresh().current_snapshot() + assert after_delete_snapshot is not None + + manifests = after_delete_snapshot.manifests(table.io) + deleted_entries = [ + entry + for manifest in manifests + for entry in manifest.fetch_manifest_entry(table.io, discard_deleted=False) + if entry.status == ManifestEntryStatus.DELETED + ] + + assert len(deleted_entries) > 0, "Expected at least one DELETED manifest entry from the CoW overwrite" + + for entry in deleted_entries: + assert entry.snapshot_id == after_delete_snapshot.snapshot_id, ( + f"DELETED entry snapshot_id should be {after_delete_snapshot.snapshot_id} " + f"(the deleting snapshot), but was {entry.snapshot_id}" + )