You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@iceberg.apache.org by GitBox <gi...@apache.org> on 2022/07/25 13:21:06 UTC

[GitHub] [iceberg] Fokko commented on a diff in pull request #5258: More Python Expressions

Fokko commented on code in PR #5258:
URL: https://github.com/apache/iceberg/pull/5258#discussion_r928739649


##########
python/pyiceberg/expressions/base.py:
##########
@@ -156,7 +158,7 @@ def bind(self, schema: Schema, case_sensitive: bool) -> BoundReference[T]:
         Returns:
             BoundReference: A reference bound to the specific field in the Iceberg schema
         """
-        field = schema.find_field(name_or_id=self.name, case_sensitive=case_sensitive)
+        field = schema.find_field(name_or_id=self.name, case_sensitive=case_sensitive)  # pylint: disable=W0621

Review Comment:
   ```suggestion
           field = schema.find_field(name_or_id=self.name, case_sensitive=case_sensitive)  # pylint: disable=redefined-outer-name
   ```



##########
python/pyiceberg/expressions/base.py:
##########
@@ -169,16 +171,64 @@ def bind(self, schema: Schema, case_sensitive: bool) -> BoundReference[T]:
         return BoundReference(field=field, accessor=accessor)
 
 
-@dataclass(frozen=True)  # type: ignore[misc]
 class BoundPredicate(Bound[T], BooleanExpression):
-    term: BoundReference[T]
-    literals: Tuple[Literal[T], ...]
+    def __init__(self, term: BoundTerm[T], literals: tuple[Literal[T], ...] | Literal[T] | None = None):
+        self._term = term
+        self._literals = literals if isinstance(literals, tuple) else (literals and (literals,))
+        self._validate_literals()
+
+    def _validate_literals(self):
+        if self.literals is None or len(self.literals) != 1:
+            raise AttributeError(f"{self.__class__.__name__} must have exactly 1 literal.")
+
+    @property
+    def term(self) -> BoundTerm[T]:
+        return self._term
+
+    @property
+    def literals(self) -> tuple[Literal[T], ...]:
+        return self._literals  # type: ignore
+
+    def __str__(self) -> str:
+        return f"{self.__class__.__name__}({str(self.term)}{self.literals and ', '+str(self.literals)})"
+
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}({repr(self.term)}{self.literals and ', '+repr(self.literals)})"
+
+    def __eq__(self, other) -> bool:
+        return id(self) == id(other) or (
+            type(self) == type(other) and self.term == other.term and self.literals == other.literals
+        )
 
 
-@dataclass(frozen=True)  # type: ignore[misc]
 class UnboundPredicate(Unbound[T, BooleanExpression], BooleanExpression):
-    term: Reference[T]
-    literals: Tuple[Literal[T], ...]
+    def __init__(self, term: UnboundTerm[T], literals: tuple[Literal[T], ...] | Literal[T] | None = None):
+        self._term = term
+        self._literals = literals if isinstance(literals, tuple) else (literals and (literals,))
+        self._validate_literals()
+
+    def _validate_literals(self):
+        if self.literals is None or len(self.literals) != 1:
+            raise AttributeError(f"{self.__class__.__name__} must have exactly 1 literal.")
+
+    @property
+    def term(self) -> UnboundTerm[T]:
+        return self._term
+
+    @property
+    def literals(self) -> tuple[Literal[T], ...]:
+        return self._literals  # type: ignore
+
+    def __str__(self) -> str:
+        return f"{self.__class__.__name__}({str(self.term)}{self.literals and ', '+str(self.literals)})"
+
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}({repr(self.term)}{self.literals and ', '+repr(self.literals)})"
+
+    def __eq__(self, other) -> bool:

Review Comment:
   We could also leverage the Python dataclasses and have the eq method generated.



##########
python/pyiceberg/expressions/base.py:
##########
@@ -169,16 +171,64 @@ def bind(self, schema: Schema, case_sensitive: bool) -> BoundReference[T]:
         return BoundReference(field=field, accessor=accessor)
 
 
-@dataclass(frozen=True)  # type: ignore[misc]
 class BoundPredicate(Bound[T], BooleanExpression):
-    term: BoundReference[T]
-    literals: Tuple[Literal[T], ...]
+    def __init__(self, term: BoundTerm[T], literals: tuple[Literal[T], ...] | Literal[T] | None = None):
+        self._term = term
+        self._literals = literals if isinstance(literals, tuple) else (literals and (literals,))
+        self._validate_literals()
+
+    def _validate_literals(self):
+        if self.literals is None or len(self.literals) != 1:

Review Comment:
   ```suggestion
           if not self.literals or len(self.literals) != 1:
   ```



##########
python/pyiceberg/expressions/base.py:
##########
@@ -169,16 +171,64 @@ def bind(self, schema: Schema, case_sensitive: bool) -> BoundReference[T]:
         return BoundReference(field=field, accessor=accessor)
 
 
-@dataclass(frozen=True)  # type: ignore[misc]
 class BoundPredicate(Bound[T], BooleanExpression):

Review Comment:
   For readability and type checking it is best to also define the variables in the class itself:
   ```python
   class BoundPredicate(Bound[T], BooleanExpression):
       _term: BoundTerm[T]
       _literals: Tuple[Literal[T]]
   ```
   This will also remove the ignore below:
   ```python
   @property
   def literals(self) -> tuple[Literal[T], ...]:
       return self._literals  # type: ignore
   ```



##########
python/pyiceberg/expressions/base.py:
##########
@@ -169,16 +171,64 @@ def bind(self, schema: Schema, case_sensitive: bool) -> BoundReference[T]:
         return BoundReference(field=field, accessor=accessor)
 
 
-@dataclass(frozen=True)  # type: ignore[misc]
 class BoundPredicate(Bound[T], BooleanExpression):
-    term: BoundReference[T]
-    literals: Tuple[Literal[T], ...]
+    def __init__(self, term: BoundTerm[T], literals: tuple[Literal[T], ...] | Literal[T] | None = None):
+        self._term = term
+        self._literals = literals if isinstance(literals, tuple) else (literals and (literals,))
+        self._validate_literals()
+
+    def _validate_literals(self):
+        if self.literals is None or len(self.literals) != 1:

Review Comment:
   I'm a bit confused by the signature of the `__init__`, that allows us to pass in a `None`, and also the default value is `None`, but we clearly require the literal to be set. Wouldn't it be better to also set this in the signature?
   
   Instead of:
   ```python
   def __init__(self, term: BoundTerm[T], literals: tuple[Literal[T], ...] | Literal[T] | None = None):
   ```
   Go for something like:
   ```python
   def __init__(self, term: BoundTerm[T], literals: tuple[Literal[T]] | Literal[T]):
   ```



##########
python/pyiceberg/expressions/base.py:
##########
@@ -169,16 +171,64 @@ def bind(self, schema: Schema, case_sensitive: bool) -> BoundReference[T]:
         return BoundReference(field=field, accessor=accessor)
 
 
-@dataclass(frozen=True)  # type: ignore[misc]
 class BoundPredicate(Bound[T], BooleanExpression):
-    term: BoundReference[T]
-    literals: Tuple[Literal[T], ...]
+    def __init__(self, term: BoundTerm[T], literals: tuple[Literal[T], ...] | Literal[T] | None = None):
+        self._term = term
+        self._literals = literals if isinstance(literals, tuple) else (literals and (literals,))
+        self._validate_literals()
+
+    def _validate_literals(self):
+        if self.literals is None or len(self.literals) != 1:
+            raise AttributeError(f"{self.__class__.__name__} must have exactly 1 literal.")
+
+    @property
+    def term(self) -> BoundTerm[T]:
+        return self._term
+
+    @property
+    def literals(self) -> tuple[Literal[T], ...]:
+        return self._literals  # type: ignore
+
+    def __str__(self) -> str:
+        return f"{self.__class__.__name__}({str(self.term)}{self.literals and ', '+str(self.literals)})"
+
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}({repr(self.term)}{self.literals and ', '+repr(self.literals)})"
+
+    def __eq__(self, other) -> bool:
+        return id(self) == id(other) or (
+            type(self) == type(other) and self.term == other.term and self.literals == other.literals
+        )
 
 
-@dataclass(frozen=True)  # type: ignore[misc]
 class UnboundPredicate(Unbound[T, BooleanExpression], BooleanExpression):

Review Comment:
   This one is missing the `bind()` method



##########
python/pyiceberg/expressions/base.py:
##########
@@ -169,16 +171,64 @@ def bind(self, schema: Schema, case_sensitive: bool) -> BoundReference[T]:
         return BoundReference(field=field, accessor=accessor)
 
 
-@dataclass(frozen=True)  # type: ignore[misc]
 class BoundPredicate(Bound[T], BooleanExpression):
-    term: BoundReference[T]
-    literals: Tuple[Literal[T], ...]
+    def __init__(self, term: BoundTerm[T], literals: tuple[Literal[T], ...] | Literal[T] | None = None):

Review Comment:
   What do you think of just accumulating the positional arguments into a list of literals?
   ```suggestion
       def __init__(self, term: BoundTerm[T], *literals: Literal[T]):
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@iceberg.apache.org
For additional commands, e-mail: issues-help@iceberg.apache.org