You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iceberg.apache.org by bl...@apache.org on 2022/08/19 16:51:11 UTC
[iceberg] branch master updated: Python: Add Table API methods (#5562)
This is an automated email from the ASF dual-hosted git repository.
blue pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/master by this push:
new 62a95cfd72 Python: Add Table API methods (#5562)
62a95cfd72 is described below
commit 62a95cfd72a0fee780c616866f0c97b8032c5621
Author: Fokko Driesprong <fo...@apache.org>
AuthorDate: Fri Aug 19 18:51:04 2022 +0200
Python: Add Table API methods (#5562)
---
python/pyiceberg/cli/console.py | 17 ++--
python/pyiceberg/cli/output.py | 7 +-
python/pyiceberg/table/__init__.py | 71 +++++++++++++-
python/pyiceberg/table/metadata.py | 18 +---
python/pyiceberg/table/snapshots.py | 10 ++
python/pyiceberg/table/sorting.py | 2 +-
python/tests/catalog/test_hive.py | 27 ++++--
python/tests/cli/test_console.py | 2 +-
python/tests/conftest.py | 3 +-
python/tests/table/test_init.py | 180 ++++++++++++++++++++++++++++++++++++
python/tests/table/test_metadata.py | 21 ++++-
11 files changed, 315 insertions(+), 43 deletions(-)
diff --git a/python/pyiceberg/cli/console.py b/python/pyiceberg/cli/console.py
index c95508d2b9..b4e0c15d95 100644
--- a/python/pyiceberg/cli/console.py
+++ b/python/pyiceberg/cli/console.py
@@ -146,9 +146,8 @@ def schema(ctx: Context, identifier: str):
catalog, output = _catalog_and_output(ctx)
try:
- metadata = catalog.load_table(identifier).metadata
- assert metadata
- output.schema(metadata.current_schema())
+ table = catalog.load_table(identifier)
+ output.schema(table.schema())
except Exception as exc:
output.exception(exc)
ctx.exit(1)
@@ -161,9 +160,8 @@ def spec(ctx: Context, identifier: str):
"""Returns the partition spec of the table"""
catalog, output = _catalog_and_output(ctx)
try:
- metadata = catalog.load_table(identifier).metadata
- assert metadata
- output.spec(metadata.current_partition_spec())
+ table = catalog.load_table(identifier)
+ output.spec(table.spec())
except Exception as exc:
output.exception(exc)
ctx.exit(1)
@@ -177,7 +175,6 @@ def uuid(ctx: Context, identifier: str):
catalog, output = _catalog_and_output(ctx)
try:
metadata = catalog.load_table(identifier).metadata
- assert metadata
output.uuid(metadata.table_uuid)
except Exception as exc:
output.exception(exc)
@@ -191,9 +188,8 @@ def location(ctx: Context, identifier: str):
"""Returns the location of the table"""
catalog, output = _catalog_and_output(ctx)
try:
- metadata = catalog.load_table(identifier).metadata
- assert metadata
- output.text(metadata.location)
+ table = catalog.load_table(identifier)
+ output.text(table.location())
except Exception as exc:
output.exception(exc)
ctx.exit(1)
@@ -389,7 +385,6 @@ def table(ctx: Context, identifier: str, property_name: str): # noqa: F811
catalog, output = _catalog_and_output(ctx)
try:
table = catalog.load_table(identifier)
- assert table.metadata
if property_name in table.metadata.properties:
# We should think of the process here
# Do we want something similar as in Java:
diff --git a/python/pyiceberg/cli/output.py b/python/pyiceberg/cli/output.py
index 9b635e1741..f3dbc45163 100644
--- a/python/pyiceberg/cli/output.py
+++ b/python/pyiceberg/cli/output.py
@@ -83,7 +83,6 @@ class ConsoleOutput(Output):
Console().print(table)
def describe_table(self, table: Table):
- assert table.metadata
metadata = table.metadata
table_properties = self._table
@@ -91,7 +90,7 @@ class ConsoleOutput(Output):
table_properties.add_row(key, value)
schema_tree = Tree("Schema")
- for field in metadata.current_schema().fields:
+ for field in table.schema().fields:
schema_tree.add(str(field))
snapshot_tree = Tree("Snapshots")
@@ -103,8 +102,8 @@ class ConsoleOutput(Output):
output_table.add_row("Metadata location", table.metadata_location)
output_table.add_row("Table UUID", str(table.metadata.table_uuid))
output_table.add_row("Last Updated", str(metadata.last_updated_ms))
- output_table.add_row("Partition spec", str(metadata.current_partition_spec()))
- output_table.add_row("Sort order", str(metadata.current_sort_order()))
+ output_table.add_row("Partition spec", str(table.spec()))
+ output_table.add_row("Sort order", str(table.sort_order()))
output_table.add_row("Schema", schema_tree)
output_table.add_row("Snapshots", snapshot_tree)
output_table.add_row("Properties", table_properties)
diff --git a/python/pyiceberg/table/__init__.py b/python/pyiceberg/table/__init__.py
index b9ccb1f4df..bf9f128a32 100644
--- a/python/pyiceberg/table/__init__.py
+++ b/python/pyiceberg/table/__init__.py
@@ -15,11 +15,21 @@
# specific language governing permissions and limitations
# under the License.
-from typing import Optional, Union
+
+from typing import (
+ Dict,
+ List,
+ Optional,
+ Union,
+)
from pydantic import Field
+from pyiceberg.schema import Schema
from pyiceberg.table.metadata import TableMetadataV1, TableMetadataV2
+from pyiceberg.table.partitioning import PartitionSpec
+from pyiceberg.table.snapshots import Snapshot, SnapshotLogEntry
+from pyiceberg.table.sorting import SortOrder
from pyiceberg.typedef import Identifier
from pyiceberg.utils.iceberg_base_model import IcebergBaseModel
@@ -27,4 +37,61 @@ from pyiceberg.utils.iceberg_base_model import IcebergBaseModel
class Table(IcebergBaseModel):
identifier: Identifier = Field()
metadata_location: str = Field()
- metadata: Optional[Union[TableMetadataV1, TableMetadataV2]] = Field()
+ metadata: Union[TableMetadataV1, TableMetadataV2] = Field()
+
+ def refresh(self):
+ """Refresh the current table metadata"""
+ raise NotImplementedError("To be implemented")
+
+ def schema(self) -> Schema:
+ """Return the schema for this table"""
+ return next(schema for schema in self.metadata.schemas if schema.schema_id == self.metadata.current_schema_id)
+
+ def schemas(self) -> Dict[int, Schema]:
+ """Return a dict of the schema of this table"""
+ return {schema.schema_id: schema for schema in self.metadata.schemas}
+
+ def spec(self) -> PartitionSpec:
+ """Return the partition spec of this table"""
+ return next(spec for spec in self.metadata.partition_specs if spec.spec_id == self.metadata.default_spec_id)
+
+ def specs(self) -> Dict[int, PartitionSpec]:
+ """Return a dict the partition specs this table"""
+ return {spec.spec_id: spec for spec in self.metadata.partition_specs}
+
+ def sort_order(self) -> SortOrder:
+ """Return the sort order of this table"""
+ return next(
+ sort_order for sort_order in self.metadata.sort_orders if sort_order.order_id == self.metadata.default_sort_order_id
+ )
+
+ def sort_orders(self) -> Dict[int, SortOrder]:
+ """Return a dict of the sort orders of this table"""
+ return {sort_order.order_id: sort_order for sort_order in self.metadata.sort_orders}
+
+ def location(self) -> str:
+ """Return the table's base location."""
+ return self.metadata.location
+
+ def current_snapshot(self) -> Optional[Snapshot]:
+ """Get the current snapshot for this table, or None if there is no current snapshot."""
+ if snapshot_id := self.metadata.current_snapshot_id:
+ return self.snapshot_by_id(snapshot_id)
+ return None
+
+ def snapshot_by_id(self, snapshot_id: int) -> Optional[Snapshot]:
+ """Get the snapshot of this table with the given id, or None if there is no matching snapshot."""
+ try:
+ return next(snapshot for snapshot in self.metadata.snapshots if snapshot.snapshot_id == snapshot_id)
+ except StopIteration:
+ return None
+
+ def snapshot_by_name(self, name: str) -> Optional[Snapshot]:
+ """Returns the snapshot referenced by the given name or null if no such reference exists."""
+ if ref := self.metadata.refs.get(name):
+ return self.snapshot_by_id(ref.snapshot_id)
+ return None
+
+ def history(self) -> List[SnapshotLogEntry]:
+ """Get the snapshot history of this table."""
+ return self.metadata.snapshot_log
diff --git a/python/pyiceberg/table/metadata.py b/python/pyiceberg/table/metadata.py
index 5f48a44d33..d5514a5bec 100644
--- a/python/pyiceberg/table/metadata.py
+++ b/python/pyiceberg/table/metadata.py
@@ -32,7 +32,7 @@ from pyiceberg.exceptions import ValidationError
from pyiceberg.schema import Schema
from pyiceberg.table.partitioning import PartitionSpec
from pyiceberg.table.refs import MAIN_BRANCH, SnapshotRef, SnapshotRefType
-from pyiceberg.table.snapshots import Snapshot
+from pyiceberg.table.snapshots import MetadataLogEntry, Snapshot, SnapshotLogEntry
from pyiceberg.table.sorting import UNSORTED_SORT_ORDER, UNSORTED_SORT_ORDER_ID, SortOrder
from pyiceberg.utils.iceberg_base_model import IcebergBaseModel
@@ -83,15 +83,6 @@ class TableMetadataCommonFields(IcebergBaseModel):
"""Metadata for an Iceberg table as specified in the Apache Iceberg
spec (https://iceberg.apache.org/spec/#iceberg-table-spec)"""
- def current_schema(self) -> Schema:
- return next(schema for schema in self.schemas if schema.schema_id == self.current_schema_id)
-
- def current_sort_order(self) -> SortOrder:
- return next(sort_order for sort_order in self.sort_orders if sort_order.order_id == self.default_sort_order_id)
-
- def current_partition_spec(self) -> PartitionSpec:
- return next(spec for spec in self.partition_specs if spec.spec_id == self.default_spec_id)
-
@root_validator(pre=True)
def cleanup_snapshot_id(cls, data: Dict[str, Any]):
if data.get("current-snapshot-id") == -1:
@@ -104,7 +95,8 @@ class TableMetadataCommonFields(IcebergBaseModel):
def construct_refs(cls, data: Dict[str, Any]):
# This is going to be much nicer as soon as refs is an actual pydantic object
if current_snapshot_id := data.get("current_snapshot_id"):
- data["refs"] = {MAIN_BRANCH: SnapshotRef(snapshot_id=current_snapshot_id, snapshot_ref_type=SnapshotRefType.BRANCH)}
+ if MAIN_BRANCH not in data["refs"]:
+ data["refs"][MAIN_BRANCH] = SnapshotRef(snapshot_id=current_snapshot_id, snapshot_ref_type=SnapshotRefType.BRANCH)
return data
location: str = Field()
@@ -158,7 +150,7 @@ class TableMetadataCommonFields(IcebergBaseModel):
deleted from the file system until the last snapshot in which it was
listed is garbage collected."""
- snapshot_log: List[Dict[str, Any]] = Field(alias="snapshot-log", default_factory=list)
+ snapshot_log: List[SnapshotLogEntry] = Field(alias="snapshot-log", default_factory=list)
"""A list (optional) of timestamp and snapshot ID pairs that encodes
changes to the current snapshot for the table. Each time the
current-snapshot-id is changed, a new entry should be added with the
@@ -166,7 +158,7 @@ class TableMetadataCommonFields(IcebergBaseModel):
expired from the list of valid snapshots, all entries before a snapshot
that has expired should be removed."""
- metadata_log: List[Dict[str, Any]] = Field(alias="metadata-log", default_factory=list)
+ metadata_log: List[MetadataLogEntry] = Field(alias="metadata-log", default_factory=list)
"""A list (optional) of timestamp and metadata file location pairs that
encodes changes to the previous metadata files for the table. Each time
a new metadata file is created, a new entry of the previous metadata
diff --git a/python/pyiceberg/table/snapshots.py b/python/pyiceberg/table/snapshots.py
index 042691891e..8f061777f7 100644
--- a/python/pyiceberg/table/snapshots.py
+++ b/python/pyiceberg/table/snapshots.py
@@ -95,3 +95,13 @@ class Snapshot(IcebergBaseModel):
manifest_list: Optional[str] = Field(alias="manifest-list", description="Location of the snapshot's manifest list file")
summary: Optional[Summary] = Field()
schema_id: Optional[int] = Field(alias="schema-id", default=None)
+
+
+class MetadataLogEntry(IcebergBaseModel):
+ metadata_file: str = Field(alias="metadata-file")
+ timestamp_ms: int = Field(alias="timestamp-ms")
+
+
+class SnapshotLogEntry(IcebergBaseModel):
+ snapshot_id: str = Field(alias="snapshot-id")
+ timestamp_ms: int = Field(alias="timestamp-ms")
diff --git a/python/pyiceberg/table/sorting.py b/python/pyiceberg/table/sorting.py
index 043496823a..8199caff7b 100644
--- a/python/pyiceberg/table/sorting.py
+++ b/python/pyiceberg/table/sorting.py
@@ -122,7 +122,7 @@ class SortOrder(IcebergBaseModel):
data["fields"] = fields
super().__init__(**data)
- order_id: Optional[int] = Field(alias="order-id")
+ order_id: int = Field(alias="order-id")
fields: List[SortField] = Field(default_factory=list)
def __str__(self) -> str:
diff --git a/python/tests/catalog/test_hive.py b/python/tests/catalog/test_hive.py
index e3baec7b84..e6b06e762d 100644
--- a/python/tests/catalog/test_hive.py
+++ b/python/tests/catalog/test_hive.py
@@ -47,7 +47,13 @@ from pyiceberg.serializers import ToOutputFile
from pyiceberg.table.metadata import TableMetadata, TableMetadataV2
from pyiceberg.table.partitioning import PartitionField, PartitionSpec
from pyiceberg.table.refs import SnapshotRef, SnapshotRefType
-from pyiceberg.table.snapshots import Operation, Snapshot, Summary
+from pyiceberg.table.snapshots import (
+ MetadataLogEntry,
+ Operation,
+ Snapshot,
+ SnapshotLogEntry,
+ Summary,
+)
from pyiceberg.table.sorting import (
NullOrder,
SortDirection,
@@ -336,7 +342,7 @@ def test_load_table(hive_table: HiveTable):
sequence_number=0,
timestamp_ms=1515100955770,
manifest_list="s3://a/b/1.avro",
- summary=Summary(Operation.APPEND),
+ summary=Summary(operation=Operation.APPEND),
schema_id=None,
),
Snapshot(
@@ -345,15 +351,15 @@ def test_load_table(hive_table: HiveTable):
sequence_number=1,
timestamp_ms=1555100955770,
manifest_list="s3://a/b/2.avro",
- summary=Summary(Operation.APPEND),
+ summary=Summary(operation=Operation.APPEND),
schema_id=1,
),
],
snapshot_log=[
- {"snapshot-id": 3051729675574597004, "timestamp-ms": 1515100955770},
- {"snapshot-id": 3055729675574597004, "timestamp-ms": 1555100955770},
+ SnapshotLogEntry(snapshot_id="3051729675574597004", timestamp_ms=1515100955770),
+ SnapshotLogEntry(snapshot_id="3055729675574597004", timestamp_ms=1555100955770),
],
- metadata_log=[],
+ metadata_log=[MetadataLogEntry(metadata_file="s3://bucket/.../v1.json", timestamp_ms=1515100)],
sort_orders=[
SortOrder(
3,
@@ -370,13 +376,20 @@ def test_load_table(hive_table: HiveTable):
],
default_sort_order_id=3,
refs={
+ "test": SnapshotRef(
+ snapshot_id=3051729675574597004,
+ snapshot_ref_type=SnapshotRefType.TAG,
+ min_snapshots_to_keep=None,
+ max_snapshot_age_ms=None,
+ max_ref_age_ms=10000000,
+ ),
"main": SnapshotRef(
snapshot_id=3055729675574597004,
snapshot_ref_type=SnapshotRefType.BRANCH,
min_snapshots_to_keep=None,
max_snapshot_age_ms=None,
max_ref_age_ms=None,
- )
+ ),
},
format_version=2,
last_sequence_number=34,
diff --git a/python/tests/cli/test_console.py b/python/tests/cli/test_console.py
index 32d6b80d53..2a001d14af 100644
--- a/python/tests/cli/test_console.py
+++ b/python/tests/cli/test_console.py
@@ -520,7 +520,7 @@ def test_json_describe_table():
assert result.exit_code == 0
assert (
result.output
- == """{"identifier": ["default", "foo"], "metadata_location": "s3://tmp/", "metadata": {"location": "s3://bucket/test/location", "table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1", "last-updated-ms": 1602638573590, "last-column-id": 3, "schemas": [{"type": "struct", "fields": [{"id": 1, "name": "x", "type": "long", "required": true}], "schema-id": 0, "identifier-field-ids": []}, {"type": "struct", "fields": [{"id": 1, "name": "x", "type": "long", "required": true}, {"id": 2, "n [...]
+ == """{"identifier": ["default", "foo"], "metadata_location": "s3://tmp/", "metadata": {"location": "s3://bucket/test/location", "table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1", "last-updated-ms": 1602638573590, "last-column-id": 3, "schemas": [{"type": "struct", "fields": [{"id": 1, "name": "x", "type": "long", "required": true}], "schema-id": 0, "identifier-field-ids": []}, {"type": "struct", "fields": [{"id": 1, "name": "x", "type": "long", "required": true}, {"id": 2, "n [...]
)
diff --git a/python/tests/conftest.py b/python/tests/conftest.py
index 55db81e887..089443cead 100644
--- a/python/tests/conftest.py
+++ b/python/tests/conftest.py
@@ -277,7 +277,8 @@ EXAMPLE_TABLE_METADATA_V2 = {
{"snapshot-id": 3051729675574597004, "timestamp-ms": 1515100955770},
{"snapshot-id": 3055729675574597004, "timestamp-ms": 1555100955770},
],
- "metadata-log": [],
+ "metadata-log": [{"metadata-file": "s3://bucket/.../v1.json", "timestamp-ms": 1515100}],
+ "refs": {"test": {"snapshot-id": 3051729675574597004, "type": "tag", "max-ref-age-ms": 10000000}},
}
diff --git a/python/tests/table/test_init.py b/python/tests/table/test_init.py
new file mode 100644
index 0000000000..b06421fcfb
--- /dev/null
+++ b/python/tests/table/test_init.py
@@ -0,0 +1,180 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint:disable=redefined-outer-name
+from typing import Any, Dict
+
+import pytest
+
+from pyiceberg.schema import Schema
+from pyiceberg.table import PartitionSpec, Table, TableMetadataV2
+from pyiceberg.table.partitioning import PartitionField
+from pyiceberg.table.snapshots import (
+ Operation,
+ Snapshot,
+ SnapshotLogEntry,
+ Summary,
+)
+from pyiceberg.table.sorting import (
+ NullOrder,
+ SortDirection,
+ SortField,
+ SortOrder,
+)
+from pyiceberg.transforms import BucketTransform, IdentityTransform
+from pyiceberg.types import LongType, NestedField
+
+
+@pytest.fixture
+def table(example_table_metadata_v2: Dict[str, Any]) -> Table:
+ table_metadata = TableMetadataV2(**example_table_metadata_v2)
+ return Table(
+ identifier=("database", "table"),
+ metadata=table_metadata,
+ metadata_location=f"{table_metadata.location}/uuid.metadata.json",
+ )
+
+
+def test_schema(table):
+ assert table.schema() == Schema(
+ fields=(
+ NestedField(field_id=1, name="x", field_type=LongType(), required=True),
+ NestedField(field_id=2, name="y", field_type=LongType(), required=True, doc="comment"),
+ NestedField(field_id=3, name="z", field_type=LongType(), required=True),
+ ),
+ schema_id=1,
+ identifier_field_ids=[1, 2],
+ )
+
+
+def test_schemas(table):
+ assert table.schemas() == {
+ 0: Schema(
+ fields=(NestedField(field_id=1, name="x", field_type=LongType(), required=True),),
+ schema_id=0,
+ identifier_field_ids=[],
+ ),
+ 1: Schema(
+ fields=(
+ NestedField(field_id=1, name="x", field_type=LongType(), required=True),
+ NestedField(field_id=2, name="y", field_type=LongType(), required=True, doc="comment"),
+ NestedField(field_id=3, name="z", field_type=LongType(), required=True),
+ ),
+ schema_id=1,
+ identifier_field_ids=[1, 2],
+ ),
+ }
+
+
+def test_spec(table):
+ assert table.spec() == PartitionSpec(
+ spec_id=0, fields=(PartitionField(source_id=1, field_id=1000, transform=IdentityTransform(), name="x"),)
+ )
+
+
+def test_specs(table):
+ assert table.specs() == {
+ 0: PartitionSpec(spec_id=0, fields=(PartitionField(source_id=1, field_id=1000, transform=IdentityTransform(), name="x"),))
+ }
+
+
+def test_sort_order(table):
+ assert table.sort_order() == SortOrder(
+ order_id=3,
+ fields=[
+ SortField(source_id=2, transform=IdentityTransform(), direction=SortDirection.ASC, null_order=NullOrder.NULLS_FIRST),
+ SortField(
+ source_id=3,
+ transform=BucketTransform(num_buckets=4),
+ direction=SortDirection.DESC,
+ null_order=NullOrder.NULLS_LAST,
+ ),
+ ],
+ )
+
+
+def test_sort_orders(table):
+ assert table.sort_orders() == {
+ 3: SortOrder(
+ order_id=3,
+ fields=[
+ SortField(
+ source_id=2, transform=IdentityTransform(), direction=SortDirection.ASC, null_order=NullOrder.NULLS_FIRST
+ ),
+ SortField(
+ source_id=3,
+ transform=BucketTransform(num_buckets=4),
+ direction=SortDirection.DESC,
+ null_order=NullOrder.NULLS_LAST,
+ ),
+ ],
+ )
+ }
+
+
+def test_location(table):
+ assert table.location() == "s3://bucket/test/location"
+
+
+def test_current_snapshot(table):
+ assert table.current_snapshot() == Snapshot(
+ snapshot_id=3055729675574597004,
+ parent_snapshot_id=3051729675574597004,
+ sequence_number=1,
+ timestamp_ms=1555100955770,
+ manifest_list="s3://a/b/2.avro",
+ summary=Summary(operation=Operation.APPEND),
+ schema_id=1,
+ )
+
+
+def test_snapshot_by_id(table):
+ assert table.snapshot_by_id(3055729675574597004) == Snapshot(
+ snapshot_id=3055729675574597004,
+ parent_snapshot_id=3051729675574597004,
+ sequence_number=1,
+ timestamp_ms=1555100955770,
+ manifest_list="s3://a/b/2.avro",
+ summary=Summary(operation=Operation.APPEND),
+ schema_id=1,
+ )
+
+
+def test_snapshot_by_id_does_not_exist(table):
+ assert table.snapshot_by_id(-1) is None
+
+
+def test_snapshot_by_name(table):
+ assert table.snapshot_by_name("test") == Snapshot(
+ snapshot_id=3051729675574597004,
+ parent_snapshot_id=None,
+ sequence_number=0,
+ timestamp_ms=1515100955770,
+ manifest_list="s3://a/b/1.avro",
+ summary=Summary(operation=Operation.APPEND),
+ schema_id=None,
+ )
+
+
+def test_snapshot_by_name_does_not_exist(table):
+ assert table.snapshot_by_name("doesnotexist") is None
+
+
+def test_history(table):
+ assert table.history() == [
+ SnapshotLogEntry(snapshot_id="3051729675574597004", timestamp_ms=1515100955770),
+ SnapshotLogEntry(snapshot_id="3055729675574597004", timestamp_ms=1555100955770),
+ ]
diff --git a/python/tests/table/test_metadata.py b/python/tests/table/test_metadata.py
index 352e7574f5..c357b9d825 100644
--- a/python/tests/table/test_metadata.py
+++ b/python/tests/table/test_metadata.py
@@ -87,7 +87,7 @@ def test_v2_metadata_parsing(example_table_metadata_v2: Dict[str, Any]):
assert table_metadata.properties["read.split.target.size"] == "134217728"
assert table_metadata.current_snapshot_id == 3055729675574597004
assert table_metadata.snapshots[0].snapshot_id == 3051729675574597004
- assert table_metadata.snapshot_log[0]["timestamp-ms"] == 1515100955770
+ assert table_metadata.snapshot_log[0].timestamp_ms == 1515100955770
assert table_metadata.sort_orders[0].order_id == 3
assert table_metadata.default_sort_order_id == 3
@@ -152,7 +152,7 @@ def test_serialize_v1():
def test_serialize_v2(example_table_metadata_v2: Dict[str, Any]):
table_metadata = TableMetadataV2(**example_table_metadata_v2).json()
- expected = """{"location": "s3://bucket/test/location", "table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1", "last-updated-ms": 1602638573590, "last-column-id": 3, "schemas": [{"type": "struct", "fields": [{"id": 1, "name": "x", "type": "long", "required": true}], "schema-id": 0, "identifier-field-ids": []}, {"type": "struct", "fields": [{"id": 1, "name": "x", "type": "long", "required": true}, {"id": 2, "name": "y", "type": "long", "required": true, "doc": "comment"}, {"id": 3, "na [...]
+ expected = """{"location": "s3://bucket/test/location", "table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1", "last-updated-ms": 1602638573590, "last-column-id": 3, "schemas": [{"type": "struct", "fields": [{"id": 1, "name": "x", "type": "long", "required": true}], "schema-id": 0, "identifier-field-ids": []}, {"type": "struct", "fields": [{"id": 1, "name": "x", "type": "long", "required": true}, {"id": 2, "name": "y", "type": "long", "required": true, "doc": "comment"}, {"id": 3, "na [...]
assert table_metadata == expected
@@ -464,7 +464,22 @@ def test_v1_write_metadata_for_v2():
def test_v2_ref_creation(example_table_metadata_v2: Dict[str, Any]):
table_metadata = TableMetadataV2(**example_table_metadata_v2)
- assert table_metadata.refs == {"main": SnapshotRef(snapshot_id=3055729675574597004, snapshot_ref_type=SnapshotRefType.BRANCH)}
+ assert table_metadata.refs == {
+ "main": SnapshotRef(
+ snapshot_id=3055729675574597004,
+ snapshot_ref_type=SnapshotRefType.BRANCH,
+ min_snapshots_to_keep=None,
+ max_snapshot_age_ms=None,
+ max_ref_age_ms=None,
+ ),
+ "test": SnapshotRef(
+ snapshot_id=3051729675574597004,
+ snapshot_ref_type=SnapshotRefType.TAG,
+ min_snapshots_to_keep=None,
+ max_snapshot_age_ms=None,
+ max_ref_age_ms=10000000,
+ ),
+ }
def test_metadata_v1():