You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@iceberg.apache.org by GitBox <gi...@apache.org> on 2022/05/14 23:09:20 UTC

[GitHub] [iceberg] samredai commented on a diff in pull request #4767: Change types into dataclasses

samredai commented on code in PR #4767:
URL: https://github.com/apache/iceberg/pull/4767#discussion_r873083288


##########
python/src/iceberg/types.py:
##########
@@ -42,57 +42,63 @@ def __new__(cls, *args, **kwargs):
         return cls._instance
 
 
+@dataclass(frozen=True, eq=True, repr=True)
 class IcebergType:
-    """Base type for all Iceberg Types"""
-
-    _initialized = False
+    """Base type for all Iceberg Types
 
-    def __init__(self, type_string: str, repr_string: str):
-        self._type_string = type_string
-        self._repr_string = repr_string
-        self._initialized = True
+    Example:
+        >>> str(IcebergType())
+        'IcebergType()'
+        >>> repr(IcebergType())
+        'IcebergType()'
+    """
 
-    def __repr__(self):
-        return self._repr_string
+    type_string: str = field(init=False, repr=False)
 
     def __str__(self):
-        return self._type_string
+        if hasattr(self, "type_string"):
+            return self.type_string
+        return self.__repr__()
 
     @property
     def is_primitive(self) -> bool:
         return isinstance(self, PrimitiveType)
 
 
+@dataclass(frozen=True, eq=True)
 class PrimitiveType(IcebergType):
-    """Base class for all Iceberg Primitive Types"""
+    """Base class for all Iceberg Primitive Types
 
+    Example:
+        >>> str(PrimitiveType())
+        'PrimitiveType()'
+    """
 
+
+@dataclass(frozen=True, eq=True, repr=True)
 class FixedType(PrimitiveType):
     """A fixed data type in Iceberg.
 
     Example:
         >>> FixedType(8)
         FixedType(length=8)
-        >>> FixedType(8)==FixedType(8)
+        >>> FixedType(8) == FixedType(8)
         True
     """
 
-    _instances: Dict[int, "FixedType"] = {}
+    length: int = field()
+
+    _instances: ClassVar[Dict[int, "FixedType"]] = {}
 
     def __new__(cls, length: int):
         cls._instances[length] = cls._instances.get(length) or object.__new__(cls)
         return cls._instances[length]
 
-    def __init__(self, length: int):
-        if not self._initialized:
-            super().__init__(f"fixed[{length}]", f"FixedType(length={length})")
-            self._length = length
-
-    @property
-    def length(self) -> int:
-        return self._length
+    def __post_init__(self):
+        object.__setattr__(self, "type_string", f"fixed[{self.length}]")

Review Comment:
   Is it required to use setattr here because the class is frozen?



##########
python/src/iceberg/types.py:
##########
@@ -148,56 +159,28 @@ def __new__(
         cls._instances[key] = cls._instances.get(key) or object.__new__(cls)
         return cls._instances[key]
 
-    def __init__(
-        self,
-        field_id: int,
-        name: str,
-        field_type: IcebergType,
-        is_optional: bool = True,
-        doc: Optional[str] = None,
-    ):
-        if not self._initialized:
-            docString = "" if doc is None else f", doc={repr(doc)}"
-            super().__init__(
-                (
-                    f"{field_id}: {name}: {'optional' if is_optional else 'required'} {field_type}" ""
-                    if doc is None
-                    else f" ({doc})"
-                ),
-                f"NestedField(field_id={field_id}, name={repr(name)}, field_type={repr(field_type)}, is_optional={is_optional}"
-                f"{docString})",
-            )
-            self._is_optional = is_optional
-            self._id = field_id
-            self._name = name
-            self._type = field_type
-            self._doc = doc
-
-    @property
-    def is_optional(self) -> bool:
-        return self._is_optional
-
     @property
     def is_required(self) -> bool:
-        return not self._is_optional
-
-    @property
-    def field_id(self) -> int:
-        return self._id
-
-    @property
-    def name(self) -> str:
-        return self._name
-
-    @property
-    def doc(self) -> Optional[str]:
-        return self._doc
-
+        return not self.is_optional
+
+    def __post_init__(self):
+        object.__setattr__(
+            self,
+            "type_string",
+            (
+                f"{self.field_id}: {self.name}: {'optional' if self.is_optional else 'required'} {self.type}"
+                if self.doc is None
+                else f" ({self.doc})"
+            ),
+        )
+
+    # Alias for field_type
     @property
     def type(self) -> IcebergType:
-        return self._type
+        return self.field_type

Review Comment:
   Should we just rename this property to "field_type"? It might require some small changes in other parts of the library but it's probably better to not clash with a keyword and field_type is the argument name anyway. We can also do this in a follow-up PR.



##########
python/src/iceberg/types.py:
##########
@@ -209,25 +192,24 @@ class StructType(IcebergType):
         'struct<1: required_field: optional string, 2: optional_field: optional int>'
     """
 
-    _instances: Dict[Tuple[NestedField, ...], "StructType"] = {}
+    fields: List[NestedField] = field()
+
+    _instances: ClassVar[Dict[Tuple[NestedField, ...], "StructType"]] = {}
 
-    def __new__(cls, *fields: NestedField):
+    def __new__(cls, *fields: NestedField, **kwargs):
+        if "fields" in kwargs:
+            fields = kwargs["fields"]
         cls._instances[fields] = cls._instances.get(fields) or object.__new__(cls)
         return cls._instances[fields]
 
-    def __init__(self, *fields: NestedField):
-        if not self._initialized:
-            super().__init__(
-                f"struct<{', '.join(map(str, fields))}>",
-                f"StructType{repr(fields)}",
-            )
-            self._fields = fields
-
-    @property
-    def fields(self) -> Tuple[NestedField, ...]:
-        return self._fields
+    def __init__(self, *fields: NestedField, **kwargs):
+        if "fields" in kwargs:
+            fields = kwargs["fields"]
+        object.__setattr__(self, "fields", fields)

Review Comment:
   This should be nested in the if statement right above it right? If there's no fields kwarg we can't set it.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@iceberg.apache.org
For additional commands, e-mail: issues-help@iceberg.apache.org