You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iceberg.apache.org by dw...@apache.org on 2022/05/17 20:18:42 UTC
[iceberg] branch master updated: Change types into dataclasses (#4767)

This is an automated email from the ASF dual-hosted git repository.

dweeks pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git


The following commit(s) were added to refs/heads/master by this push:
     new 97f1771c2 Change types into dataclasses (#4767)
97f1771c2 is described below

commit 97f1771c2ac764f43e263c422c720959ca8eee9b
Author: Fokko Driesprong <fo...@tabular.io>
AuthorDate: Tue May 17 22:18:37 2022 +0200

    Change types into dataclasses (#4767)
    
    * Change types into dataclasses
    
    Proposal to change the types into dataclasses.
    
    This has several improvments:
    
    - We can use the dataclasss field(repr=True) to include the fields in the representation, instead of building our own strings
    - We can assign the types in the post_init when they are dynamic (List, Maps, Structs etc) , or just override them when they are static (Primitives)
    - We don't have to implement any eq methods because they come for free
    - The types are frozen, which is kind of nice since we re-use them
    - The code is much more consise
    - We can assign the min/max of the int/long/float as Final as of 3.8: https://peps.python.org/pep-0591/
    
    My inspiration was the comment by Kyle:
    https://github.com/apache/iceberg/pull/4742#discussion_r869494393
    
    This would entail implementing eq, but why not use the generated one since we're comparing all the attributes :)
    
    Would love to get you input
    
    * Remove explicit repr and eq
    
    * Use @cached_property to cache the string
    
    Add missing words to spelling
    
    * Add additional guard for initializing StructType using kwargs
    
    * Replace type with field_type
---
 python/setup.cfg                 |   1 +
 python/spellcheck-dictionary.txt |   6 +-
 python/src/iceberg/schema.py     |  14 +-
 python/src/iceberg/types.py      | 390 ++++++++++++++++++++-------------------
 python/tests/test_schema.py      |   6 +-
 python/tests/test_types.py       |  10 +-
 6 files changed, 222 insertions(+), 205 deletions(-)

diff --git a/python/setup.cfg b/python/setup.cfg
index 559751011..18f4d8245 100644
--- a/python/setup.cfg
+++ b/python/setup.cfg
@@ -45,6 +45,7 @@ python_requires = >=3.7
 install_requires =
     mmh3
     singledispatch
+    cached-property; python_version <= '3.7'
 [options.extras_require]
 arrow =
     pyarrow
diff --git a/python/spellcheck-dictionary.txt b/python/spellcheck-dictionary.txt
index 2476d5afd..1100dd703 100644
--- a/python/spellcheck-dictionary.txt
+++ b/python/spellcheck-dictionary.txt
@@ -26,10 +26,13 @@ FileInfo
 filesystem
 fs
 func
+IcebergType
 io
 NativeFile
+NestedField
 nullability
 pragma
+PrimitiveType
 pyarrow
 repr
 schemas
@@ -42,4 +45,5 @@ StructType
 Timestamptz
 Timestamptzs
 unscaled
-URI
\ No newline at end of file
+URI
+
diff --git a/python/src/iceberg/schema.py b/python/src/iceberg/schema.py
index 9b9650448..59b9cc835 100644
--- a/python/src/iceberg/schema.py
+++ b/python/src/iceberg/schema.py
@@ -152,7 +152,7 @@ class Schema:
             NestedField: The type of the matched NestedField
         """
         field = self.find_field(name_or_id=name_or_id, case_sensitive=case_sensitive)
-        return field.type  # type: ignore
+        return field.field_type
 
     def find_column_name(self, column_id: int) -> str:
         """Find a column name given a column ID
@@ -318,7 +318,7 @@ def _(obj: StructType, visitor: SchemaVisitor[T]) -> T:
 
     for field in obj.fields:
         visitor.before_field(field)
-        result = visit(field.type, visitor)
+        result = visit(field.field_type, visitor)
         visitor.after_field(field)
         results.append(visitor.field(field, result))
 
@@ -330,7 +330,7 @@ def _(obj: ListType, visitor: SchemaVisitor[T]) -> T:
     """Visit a ListType with a concrete SchemaVisitor"""
 
     visitor.before_list_element(obj.element)
-    result = visit(obj.element.type, visitor)
+    result = visit(obj.element.field_type, visitor)
     visitor.after_list_element(obj.element)
 
     return visitor.list(obj, result)
@@ -340,11 +340,11 @@ def _(obj: ListType, visitor: SchemaVisitor[T]) -> T:
 def _(obj: MapType, visitor: SchemaVisitor[T]) -> T:
     """Visit a MapType with a concrete SchemaVisitor"""
     visitor.before_map_key(obj.key)
-    key_result = visit(obj.key.type, visitor)
+    key_result = visit(obj.key.field_type, visitor)
     visitor.after_map_key(obj.key)
 
     visitor.before_map_value(obj.value)
-    value_result = visit(obj.value.type, visitor)
+    value_result = visit(obj.value.field_type, visitor)
     visitor.after_list_element(obj.value)
 
     return visitor.map(obj, key_result, value_result)
@@ -412,12 +412,12 @@ class _IndexByName(SchemaVisitor[Dict[str, int]]):
 
     def before_list_element(self, element: NestedField) -> None:
         """Short field names omit element when the element is a StructType"""
-        if not isinstance(element.type, StructType):
+        if not isinstance(element.field_type, StructType):
             self._short_field_names.append(element.name)
         self._field_names.append(element.name)
 
     def after_list_element(self, element: NestedField) -> None:
-        if not isinstance(element.type, StructType):
+        if not isinstance(element.field_type, StructType):
             self._short_field_names.pop()
         self._field_names.pop()
 
diff --git a/python/src/iceberg/types.py b/python/src/iceberg/types.py
index 3173c4aca..5318e4f8a 100644
--- a/python/src/iceberg/types.py
+++ b/python/src/iceberg/types.py
@@ -29,8 +29,15 @@ Example:
 Notes:
   - https://iceberg.apache.org/#spec/#primitive-types
 """
+import sys
+from dataclasses import dataclass, field
+from typing import ClassVar, Dict, List, Optional, Tuple
 
-from typing import Dict, Optional, Tuple
+if sys.version_info >= (3, 8):
+    from functools import cached_property
+else:
+    # In the case of <= Python 3.7
+    from cached_property import cached_property
 
 
 class Singleton:
@@ -42,57 +49,64 @@ class Singleton:
         return cls._instance
 
 
+@dataclass(frozen=True)
 class IcebergType:
-    """Base type for all Iceberg Types"""
+    """Base type for all Iceberg Types
 
-    _initialized = False
-
-    def __init__(self, type_string: str, repr_string: str):
-        self._type_string = type_string
-        self._repr_string = repr_string
-        self._initialized = True
+    Example:
+        >>> str(IcebergType())
+        'IcebergType()'
+        >>> repr(IcebergType())
+        'IcebergType()'
+    """
 
-    def __repr__(self):
-        return self._repr_string
+    @property
+    def string_type(self) -> str:
+        return self.__repr__()
 
-    def __str__(self):
-        return self._type_string
+    def __str__(self) -> str:
+        return self.string_type
 
     @property
     def is_primitive(self) -> bool:
         return isinstance(self, PrimitiveType)
 
 
+@dataclass(frozen=True, eq=True)
 class PrimitiveType(IcebergType):
-    """Base class for all Iceberg Primitive Types"""
+    """Base class for all Iceberg Primitive Types
+
+    Example:
+        >>> str(PrimitiveType())
+        'PrimitiveType()'
+    """
 
 
+@dataclass(frozen=True)
 class FixedType(PrimitiveType):
     """A fixed data type in Iceberg.
 
     Example:
         >>> FixedType(8)
         FixedType(length=8)
-        >>> FixedType(8)==FixedType(8)
+        >>> FixedType(8) == FixedType(8)
         True
     """
 
-    _instances: Dict[int, "FixedType"] = {}
+    length: int = field()
+
+    _instances: ClassVar[Dict[int, "FixedType"]] = {}
 
     def __new__(cls, length: int):
         cls._instances[length] = cls._instances.get(length) or object.__new__(cls)
         return cls._instances[length]
 
-    def __init__(self, length: int):
-        if not self._initialized:
-            super().__init__(f"fixed[{length}]", f"FixedType(length={length})")
-            self._length = length
-
     @property
-    def length(self) -> int:
-        return self._length
+    def string_type(self) -> str:
+        return f"fixed[{self.length}]"
 
 
+@dataclass(frozen=True, eq=True)
 class DecimalType(PrimitiveType):
     """A fixed data type in Iceberg.
 
@@ -103,38 +117,44 @@ class DecimalType(PrimitiveType):
         True
     """
 
-    _instances: Dict[Tuple[int, int], "DecimalType"] = {}
+    precision: int = field()
+    scale: int = field()
+
+    _instances: ClassVar[Dict[Tuple[int, int], "DecimalType"]] = {}
 
     def __new__(cls, precision: int, scale: int):
         key = (precision, scale)
         cls._instances[key] = cls._instances.get(key) or object.__new__(cls)
         return cls._instances[key]
 
-    def __init__(self, precision: int, scale: int):
-        if not self._initialized:
-            super().__init__(
-                f"decimal({precision}, {scale})",
-                f"DecimalType(precision={precision}, scale={scale})",
-            )
-            self._precision = precision
-            self._scale = scale
-
-    @property
-    def precision(self) -> int:
-        return self._precision
-
     @property
-    def scale(self) -> int:
-        return self._scale
+    def string_type(self) -> str:
+        return f"decimal({self.precision}, {self.scale})"
 
 
+@dataclass(frozen=True)
 class NestedField(IcebergType):
     """Represents a field of a struct, a map key, a map value, or a list element.
 
     This is where field IDs, names, docs, and nullability are tracked.
+
+    Example:
+        >>> str(NestedField(
+        ...     field_id=1,
+        ...     name='foo',
+        ...     field_type=FixedType(22),
+        ...     is_optional=False,
+        ... ))
+        '1: foo: required fixed[22]'
     """
 
-    _instances: Dict[Tuple[bool, int, str, IcebergType, Optional[str]], "NestedField"] = {}
+    field_id: int = field()
+    name: str = field()
+    field_type: IcebergType = field()
+    is_optional: bool = field(default=True)
+    doc: Optional[str] = field(default=None, repr=False)
+
+    _instances: ClassVar[Dict[Tuple[bool, int, str, IcebergType, Optional[str]], "NestedField"]] = {}
 
     def __new__(
         cls,
@@ -148,56 +168,20 @@ class NestedField(IcebergType):
         cls._instances[key] = cls._instances.get(key) or object.__new__(cls)
         return cls._instances[key]
 
-    def __init__(
-        self,
-        field_id: int,
-        name: str,
-        field_type: IcebergType,
-        is_optional: bool = True,
-        doc: Optional[str] = None,
-    ):
-        if not self._initialized:
-            docString = "" if doc is None else f", doc={repr(doc)}"
-            super().__init__(
-                (
-                    f"{field_id}: {name}: {'optional' if is_optional else 'required'} {field_type}" ""
-                    if doc is None
-                    else f" ({doc})"
-                ),
-                f"NestedField(field_id={field_id}, name={repr(name)}, field_type={repr(field_type)}, is_optional={is_optional}"
-                f"{docString})",
-            )
-            self._is_optional = is_optional
-            self._id = field_id
-            self._name = name
-            self._type = field_type
-            self._doc = doc
-
-    @property
-    def is_optional(self) -> bool:
-        return self._is_optional
-
     @property
     def is_required(self) -> bool:
-        return not self._is_optional
-
-    @property
-    def field_id(self) -> int:
-        return self._id
-
-    @property
-    def name(self) -> str:
-        return self._name
-
-    @property
-    def doc(self) -> Optional[str]:
-        return self._doc
+        return not self.is_optional
 
     @property
-    def type(self) -> IcebergType:
-        return self._type
+    def string_type(self) -> str:
+        return (
+            f"{self.field_id}: {self.name}: {'optional' if self.is_optional else 'required'} {self.field_type}"
+            if self.doc is None
+            else f" ({self.doc})"
+        )
 
 
+@dataclass(frozen=True, init=False)
 class StructType(IcebergType):
     """A struct type in Iceberg
 
@@ -209,25 +193,27 @@ class StructType(IcebergType):
         'struct<1: required_field: optional string, 2: optional_field: optional int>'
     """
 
-    _instances: Dict[Tuple[NestedField, ...], "StructType"] = {}
+    fields: List[NestedField] = field()
+
+    _instances: ClassVar[Dict[Tuple[NestedField, ...], "StructType"]] = {}
 
-    def __new__(cls, *fields: NestedField):
+    def __new__(cls, *fields: NestedField, **kwargs):
+        if not fields and "fields" in kwargs:
+            fields = kwargs["fields"]
         cls._instances[fields] = cls._instances.get(fields) or object.__new__(cls)
         return cls._instances[fields]
 
-    def __init__(self, *fields: NestedField):
-        if not self._initialized:
-            super().__init__(
-                f"struct<{', '.join(map(str, fields))}>",
-                f"StructType{repr(fields)}",
-            )
-            self._fields = fields
+    def __init__(self, *fields: NestedField, **kwargs):
+        if not fields and "fields" in kwargs:
+            fields = kwargs["fields"]
+        object.__setattr__(self, "fields", fields)
 
-    @property
-    def fields(self) -> Tuple[NestedField, ...]:
-        return self._fields
+    @cached_property
+    def string_type(self) -> str:
+        return f"struct<{', '.join(map(str, self.fields))}>"
 
 
+@dataclass(frozen=True)
 class ListType(IcebergType):
     """A list type in Iceberg
 
@@ -236,7 +222,12 @@ class ListType(IcebergType):
         ListType(element_id=3, element_type=StringType(), element_is_optional=True)
     """
 
-    _instances: Dict[Tuple[bool, int, IcebergType], "ListType"] = {}
+    element_id: int = field()
+    element_type: IcebergType = field()
+    element_is_optional: bool = field(default=True)
+    element: NestedField = field(init=False, repr=False)
+
+    _instances: ClassVar[Dict[Tuple[bool, int, IcebergType], "ListType"]] = {}
 
     def __new__(
         cls,
@@ -248,30 +239,24 @@ class ListType(IcebergType):
         cls._instances[key] = cls._instances.get(key) or object.__new__(cls)
         return cls._instances[key]
 
-    def __init__(
-        self,
-        element_id: int,
-        element_type: IcebergType,
-        element_is_optional: bool = True,
-    ):
-        if not self._initialized:
-            super().__init__(
-                f"list<{element_type}>",
-                f"ListType(element_id={element_id}, element_type={repr(element_type)}, "
-                f"element_is_optional={element_is_optional})",
-            )
-            self._element_field = NestedField(
+    def __post_init__(self):
+        object.__setattr__(
+            self,
+            "element",
+            NestedField(
                 name="element",
-                is_optional=element_is_optional,
-                field_id=element_id,
-                field_type=element_type,
-            )
+                is_optional=self.element_is_optional,
+                field_id=self.element_id,
+                field_type=self.element_type,
+            ),
+        )
 
     @property
-    def element(self) -> NestedField:
-        return self._element_field
+    def string_type(self) -> str:
+        return f"list<{self.element_type}>"
 
 
+@dataclass(frozen=True)
 class MapType(IcebergType):
     """A map type in Iceberg
 
@@ -280,7 +265,16 @@ class MapType(IcebergType):
         MapType(key_id=1, key_type=StringType(), value_id=2, value_type=IntegerType(), value_is_optional=True)
     """
 
-    _instances: Dict[Tuple[int, IcebergType, int, IcebergType, bool], "MapType"] = {}
+    key_id: int = field()
+    key_type: IcebergType = field()
+    value_id: int = field()
+    value_type: IcebergType = field()
+    value_is_optional: bool = field(default=True)
+    key: NestedField = field(init=False, repr=False)
+    value: NestedField = field(init=False, repr=False)
+
+    # _type_string_def = lambda self: f"map<{self.key_type}, {self.value_type}>"
+    _instances: ClassVar[Dict[Tuple[int, IcebergType, int, IcebergType, bool], "MapType"]] = {}
 
     def __new__(
         cls,
@@ -294,37 +288,23 @@ class MapType(IcebergType):
         cls._instances[impl_key] = cls._instances.get(impl_key) or object.__new__(cls)
         return cls._instances[impl_key]
 
-    def __init__(
-        self,
-        key_id: int,
-        key_type: IcebergType,
-        value_id: int,
-        value_type: IcebergType,
-        value_is_optional: bool = True,
-    ):
-        if not self._initialized:
-            super().__init__(
-                f"map<{key_type}, {value_type}>",
-                f"MapType(key_id={key_id}, key_type={repr(key_type)}, value_id={value_id}, value_type={repr(value_type)}, "
-                f"value_is_optional={value_is_optional})",
-            )
-            self._key_field = NestedField(name="key", field_id=key_id, field_type=key_type, is_optional=False)
-            self._value_field = NestedField(
+    def __post_init__(self):
+        object.__setattr__(
+            self, "key", NestedField(name="key", field_id=self.key_id, field_type=self.key_type, is_optional=False)
+        )
+        object.__setattr__(
+            self,
+            "value",
+            NestedField(
                 name="value",
-                field_id=value_id,
-                field_type=value_type,
-                is_optional=value_is_optional,
-            )
-
-    @property
-    def key(self) -> NestedField:
-        return self._key_field
-
-    @property
-    def value(self) -> NestedField:
-        return self._value_field
+                field_id=self.value_id,
+                field_type=self.value_type,
+                is_optional=self.value_is_optional,
+            ),
+        )
 
 
+@dataclass(frozen=True)
 class BooleanType(PrimitiveType, Singleton):
     """A boolean data type in Iceberg can be represented using an instance of this class.
 
@@ -332,13 +312,16 @@ class BooleanType(PrimitiveType, Singleton):
         >>> column_foo = BooleanType()
         >>> isinstance(column_foo, BooleanType)
         True
+        >>> column_foo
+        BooleanType()
     """
 
-    def __init__(self):
-        if not self._initialized:
-            super().__init__("boolean", "BooleanType()")
+    @property
+    def string_type(self) -> str:
+        return "boolean"
 
 
+@dataclass(frozen=True)
 class IntegerType(PrimitiveType, Singleton):
     """An Integer data type in Iceberg can be represented using an instance of this class. Integers in Iceberg are
     32-bit signed and can be promoted to Longs.
@@ -355,15 +338,15 @@ class IntegerType(PrimitiveType, Singleton):
           in Java (returns `-2147483648`)
     """
 
-    max: int = 2147483647
+    max: ClassVar[int] = 2147483647
+    min: ClassVar[int] = -2147483648
 
-    min: int = -2147483648
-
-    def __init__(self):
-        if not self._initialized:
-            super().__init__("int", "IntegerType()")
+    @property
+    def string_type(self) -> str:
+        return "int"
 
 
+@dataclass(frozen=True)
 class LongType(PrimitiveType, Singleton):
     """A Long data type in Iceberg can be represented using an instance of this class. Longs in Iceberg are
     64-bit signed integers.
@@ -372,6 +355,10 @@ class LongType(PrimitiveType, Singleton):
         >>> column_foo = LongType()
         >>> isinstance(column_foo, LongType)
         True
+        >>> column_foo
+        LongType()
+        >>> str(column_foo)
+        'long'
 
     Attributes:
         max (int): The maximum allowed value for Longs, inherited from the canonical Iceberg implementation
@@ -380,15 +367,15 @@ class LongType(PrimitiveType, Singleton):
           in Java (returns `-9223372036854775808`)
     """
 
-    max: int = 9223372036854775807
+    max: ClassVar[int] = 9223372036854775807
+    min: ClassVar[int] = -9223372036854775808
 
-    min: int = -9223372036854775808
-
-    def __init__(self):
-        if not self._initialized:
-            super().__init__("long", "LongType()")
+    @property
+    def string_type(self) -> str:
+        return "long"
 
 
+@dataclass(frozen=True)
 class FloatType(PrimitiveType, Singleton):
     """A Float data type in Iceberg can be represented using an instance of this class. Floats in Iceberg are
     32-bit IEEE 754 floating points and can be promoted to Doubles.
@@ -397,6 +384,8 @@ class FloatType(PrimitiveType, Singleton):
         >>> column_foo = FloatType()
         >>> isinstance(column_foo, FloatType)
         True
+        >>> column_foo
+        FloatType()
 
     Attributes:
         max (float): The maximum allowed value for Floats, inherited from the canonical Iceberg implementation
@@ -405,15 +394,15 @@ class FloatType(PrimitiveType, Singleton):
           in Java (returns `-3.4028235e38`)
     """
 
-    max: float = 3.4028235e38
+    max: ClassVar[float] = 3.4028235e38
+    min: ClassVar[float] = -3.4028235e38
 
-    min: float = -3.4028235e38
-
-    def __init__(self):
-        if not self._initialized:
-            super().__init__("float", "FloatType()")
+    @property
+    def string_type(self) -> str:
+        return "float"
 
 
+@dataclass(frozen=True)
 class DoubleType(PrimitiveType, Singleton):
     """A Double data type in Iceberg can be represented using an instance of this class. Doubles in Iceberg are
     64-bit IEEE 754 floating points.
@@ -422,13 +411,16 @@ class DoubleType(PrimitiveType, Singleton):
         >>> column_foo = DoubleType()
         >>> isinstance(column_foo, DoubleType)
         True
+        >>> column_foo
+        DoubleType()
     """
 
-    def __init__(self):
-        if not self._initialized:
-            super().__init__("double", "DoubleType()")
+    @property
+    def string_type(self) -> str:
+        return "double"
 
 
+@dataclass(frozen=True)
 class DateType(PrimitiveType, Singleton):
     """A Date data type in Iceberg can be represented using an instance of this class. Dates in Iceberg are
     calendar dates without a timezone or time.
@@ -437,13 +429,16 @@ class DateType(PrimitiveType, Singleton):
         >>> column_foo = DateType()
         >>> isinstance(column_foo, DateType)
         True
+        >>> column_foo
+        DateType()
     """
 
-    def __init__(self):
-        if not self._initialized:
-            super().__init__("date", "DateType()")
+    @property
+    def string_type(self) -> str:
+        return "date"
 
 
+@dataclass(frozen=True)
 class TimeType(PrimitiveType, Singleton):
     """A Time data type in Iceberg can be represented using an instance of this class. Times in Iceberg
     have microsecond precision and are a time of day without a date or timezone.
@@ -452,13 +447,16 @@ class TimeType(PrimitiveType, Singleton):
         >>> column_foo = TimeType()
         >>> isinstance(column_foo, TimeType)
         True
+        >>> column_foo
+        TimeType()
     """
 
-    def __init__(self):
-        if not self._initialized:
-            super().__init__("time", "TimeType()")
+    @property
+    def string_type(self) -> str:
+        return "time"
 
 
+@dataclass(frozen=True)
 class TimestampType(PrimitiveType, Singleton):
     """A Timestamp data type in Iceberg can be represented using an instance of this class. Timestamps in
     Iceberg have microsecond precision and include a date and a time of day without a timezone.
@@ -467,13 +465,16 @@ class TimestampType(PrimitiveType, Singleton):
         >>> column_foo = TimestampType()
         >>> isinstance(column_foo, TimestampType)
         True
+        >>> column_foo
+        TimestampType()
     """
 
-    def __init__(self):
-        if not self._initialized:
-            super().__init__("timestamp", "TimestampType()")
+    @property
+    def string_type(self) -> str:
+        return "timestamp"
 
 
+@dataclass(frozen=True)
 class TimestamptzType(PrimitiveType, Singleton):
     """A Timestamptz data type in Iceberg can be represented using an instance of this class. Timestamptzs in
     Iceberg are stored as UTC and include a date and a time of day with a timezone.
@@ -482,13 +483,16 @@ class TimestamptzType(PrimitiveType, Singleton):
         >>> column_foo = TimestamptzType()
         >>> isinstance(column_foo, TimestamptzType)
         True
+        >>> column_foo
+        TimestamptzType()
     """
 
-    def __init__(self):
-        if not self._initialized:
-            super().__init__("timestamptz", "TimestamptzType()")
+    @property
+    def string_type(self) -> str:
+        return "timestamptz"
 
 
+@dataclass(frozen=True)
 class StringType(PrimitiveType, Singleton):
     """A String data type in Iceberg can be represented using an instance of this class. Strings in
     Iceberg are arbitrary-length character sequences and are encoded with UTF-8.
@@ -497,13 +501,16 @@ class StringType(PrimitiveType, Singleton):
         >>> column_foo = StringType()
         >>> isinstance(column_foo, StringType)
         True
+        >>> column_foo
+        StringType()
     """
 
-    def __init__(self):
-        if not self._initialized:
-            super().__init__("string", "StringType()")
+    @property
+    def string_type(self) -> str:
+        return "string"
 
 
+@dataclass(frozen=True)
 class UUIDType(PrimitiveType, Singleton):
     """A UUID data type in Iceberg can be represented using an instance of this class. UUIDs in
     Iceberg are universally unique identifiers.
@@ -512,13 +519,16 @@ class UUIDType(PrimitiveType, Singleton):
         >>> column_foo = UUIDType()
         >>> isinstance(column_foo, UUIDType)
         True
+        >>> column_foo
+        UUIDType()
     """
 
-    def __init__(self):
-        if not self._initialized:
-            super().__init__("uuid", "UUIDType()")
+    @property
+    def string_type(self) -> str:
+        return "uuid"
 
 
+@dataclass(frozen=True)
 class BinaryType(PrimitiveType, Singleton):
     """A Binary data type in Iceberg can be represented using an instance of this class. Binaries in
     Iceberg are arbitrary-length byte arrays.
@@ -527,8 +537,10 @@ class BinaryType(PrimitiveType, Singleton):
         >>> column_foo = BinaryType()
         >>> isinstance(column_foo, BinaryType)
         True
+        >>> column_foo
+        BinaryType()
     """
 
-    def __init__(self):
-        if not self._initialized:
-            super().__init__("binary", "BinaryType()")
+    @property
+    def string_type(self) -> str:
+        return "binary"
diff --git a/python/tests/test_schema.py b/python/tests/test_schema.py
index 536277c91..d48198edd 100644
--- a/python/tests/test_schema.py
+++ b/python/tests/test_schema.py
@@ -221,19 +221,19 @@ def test_schema_find_field_by_id(table_schema_simple):
     column1 = index[1]
     assert isinstance(column1, NestedField)
     assert column1.field_id == 1
-    assert column1.type == StringType()
+    assert column1.field_type == StringType()
     assert column1.is_optional == False
 
     column2 = index[2]
     assert isinstance(column2, NestedField)
     assert column2.field_id == 2
-    assert column2.type == IntegerType()
+    assert column2.field_type == IntegerType()
     assert column2.is_optional == True
 
     column3 = index[3]
     assert isinstance(column3, NestedField)
     assert column3.field_id == 3
-    assert column3.type == BooleanType()
+    assert column3.field_type == BooleanType()
     assert column3.is_optional == False
 
 
diff --git a/python/tests/test_types.py b/python/tests/test_types.py
index cda56ab4e..844e3ab7d 100644
--- a/python/tests/test_types.py
+++ b/python/tests/test_types.py
@@ -146,8 +146,8 @@ def test_list_type():
         ),
         False,
     )
-    assert isinstance(type_var.element.type, StructType)
-    assert len(type_var.element.type.fields) == 2
+    assert isinstance(type_var.element.field_type, StructType)
+    assert len(type_var.element.field_type.fields) == 2
     assert type_var.element.field_id == 1
     assert str(type_var) == str(eval(repr(type_var)))
     assert type_var == eval(repr(type_var))
@@ -162,9 +162,9 @@ def test_list_type():
 
 def test_map_type():
     type_var = MapType(1, DoubleType(), 2, UUIDType(), False)
-    assert isinstance(type_var.key.type, DoubleType)
+    assert isinstance(type_var.key.field_type, DoubleType)
     assert type_var.key.field_id == 1
-    assert isinstance(type_var.value.type, UUIDType)
+    assert isinstance(type_var.value.field_type, UUIDType)
     assert type_var.value.field_id == 2
     assert str(type_var) == str(eval(repr(type_var)))
     assert type_var == eval(repr(type_var))
@@ -193,7 +193,7 @@ def test_nested_field():
     assert field_var.is_optional
     assert not field_var.is_required
     assert field_var.field_id == 1
-    assert isinstance(field_var.type, StructType)
+    assert isinstance(field_var.field_type, StructType)
     assert str(field_var) == str(eval(repr(field_var)))