You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@iceberg.apache.org by GitBox <gi...@apache.org> on 2022/07/27 08:51:35 UTC

[GitHub] [iceberg] Fokko commented on a diff in pull request #5362: Python: Refactor unary expressions

Fokko commented on code in PR #5362:
URL: https://github.com/apache/iceberg/pull/5362#discussion_r930724220


##########
python/pyiceberg/expressions/base.py:
##########
@@ -342,107 +349,104 @@ def __str__(self) -> str:
 
 
 @dataclass(frozen=True)
-class AlwaysTrue(BooleanExpression, ABC, Singleton):
+class AlwaysTrue(BooleanExpression, Singleton):
     """TRUE expression"""
 
     def __invert__(self) -> AlwaysFalse:
         return AlwaysFalse()
 
 
 @dataclass(frozen=True)
-class AlwaysFalse(BooleanExpression, ABC, Singleton):
+class AlwaysFalse(BooleanExpression, Singleton):
     """FALSE expression"""
 
     def __invert__(self) -> AlwaysTrue:
         return AlwaysTrue()
 
 
-class IsNull(UnboundPredicate[T]):
-    def __invert__(self) -> NotNull:
-        return NotNull(self.term)
-
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals is not None:
-            raise AttributeError("Null is a unary predicate and takes no Literals.")
+@dataclass(frozen=True)
+class UnaryPredicate(Unbound[T, BooleanExpression], BooleanExpression, ABC):
+    inverse: ClassVar[type]
+    as_bound: ClassVar[type]
+    term: UnboundTerm[T]
 
-    def bind(self, schema: Schema, case_sensitive: bool) -> BoundIsNull[T]:
-        bound_ref = self.term.bind(schema, case_sensitive)
-        return BoundIsNull(bound_ref)
+    def __invert__(self) -> inverse:
+        return self.inverse(self.term)
 
+    def bind(self, schema: Schema, case_sensitive: bool = True) -> BooleanExpression:
+        bound_term = self.term.bind(schema, case_sensitive)
+        return self.as_bound(bound_term)
 
-class BoundIsNull(BoundPredicate[T]):
-    def __invert__(self) -> BoundNotNull:
-        return BoundNotNull(self.term)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("Null is a unary predicate and takes no Literals.")
+@dataclass(frozen=True)
+class BoundUnaryPredicate(Bound[T], BooleanExpression):
+    inverse: ClassVar[type]
+    term: BoundTerm[T]
 
+    def __invert__(self) -> inverse:
+        return self.inverse(self.term)
 
-class NotNull(UnboundPredicate[T]):
-    def __invert__(self) -> IsNull:
-        return IsNull(self.term)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("NotNull is a unary predicate and takes no Literals.")
+class IsNull(UnaryPredicate[T]):
+    pass
 
-    def bind(self, schema: Schema, case_sensitive: bool) -> BoundNotNull[T]:
-        bound_ref = self.term.bind(schema, case_sensitive)
-        return BoundNotNull(bound_ref)
 
+class NotNull(UnaryPredicate[T]):
+    pass
 
-class BoundNotNull(BoundPredicate[T]):
-    def __invert__(self) -> BoundIsNull:
-        return BoundIsNull(self.term)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("NotNull is a unary predicate and takes no Literals.")
+class BoundIsNull(BoundUnaryPredicate[T]):
+    def __new__(cls, term: BoundTerm[T]):
+        if term.ref().field.required:
+            return AlwaysFalse()
+        return super().__new__(cls)
 
 
-class IsNaN(UnboundPredicate[T]):
-    def __invert__(self) -> NotNaN:
-        return NotNaN(self.term)
+class BoundNotNull(BoundUnaryPredicate[T]):
+    def __new__(cls, term: BoundTerm[T]):
+        if term.ref().field.required:
+            return AlwaysTrue()
+        return super().__new__(cls)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("IsNaN is a unary predicate and takes no Literals.")
 
-    def bind(self, schema: Schema, case_sensitive: bool) -> BoundIsNaN[T]:
-        bound_ref = self.term.bind(schema, case_sensitive)
-        return BoundIsNaN(bound_ref)
+IsNull.inverse = NotNull
+IsNull.as_bound = BoundIsNull
+NotNull.inverse = IsNull
+NotNull.as_bound = BoundNotNull
+BoundIsNull.inverse = BoundNotNull
+BoundNotNull.inverse = BoundIsNull
 
 
-class BoundIsNaN(BoundPredicate[T]):
-    def __invert__(self) -> BoundNotNaN:
-        return BoundNotNaN(self.term)
+class IsNaN(UnaryPredicate[T]):
+    pass
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("IsNaN is a unary predicate and takes no Literals.")
 
+class NotNaN(UnaryPredicate[T]):
+    pass
 
-class NotNaN(UnboundPredicate[T]):
-    def __invert__(self) -> IsNaN:
-        return IsNaN(self.term)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("NotNaN is a unary predicate and takes no Literals.")
+class BoundIsNaN(BoundUnaryPredicate[T]):
+    def __new__(cls, term: BoundTerm[T]):
+        bound_type = term.ref().field.field_type
+        if bound_type == FloatType() or bound_type == DoubleType():

Review Comment:
   This way we don't have to fetch the actual `{Float,Double}Type`.
   ```suggestion
           if type(bound_type) in {FloatType, DoubleType}:
   ```



##########
python/pyiceberg/expressions/base.py:
##########
@@ -342,107 +349,104 @@ def __str__(self) -> str:
 
 
 @dataclass(frozen=True)
-class AlwaysTrue(BooleanExpression, ABC, Singleton):
+class AlwaysTrue(BooleanExpression, Singleton):
     """TRUE expression"""
 
     def __invert__(self) -> AlwaysFalse:
         return AlwaysFalse()
 
 
 @dataclass(frozen=True)
-class AlwaysFalse(BooleanExpression, ABC, Singleton):
+class AlwaysFalse(BooleanExpression, Singleton):
     """FALSE expression"""
 
     def __invert__(self) -> AlwaysTrue:
         return AlwaysTrue()
 
 
-class IsNull(UnboundPredicate[T]):
-    def __invert__(self) -> NotNull:
-        return NotNull(self.term)
-
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals is not None:
-            raise AttributeError("Null is a unary predicate and takes no Literals.")
+@dataclass(frozen=True)
+class UnaryPredicate(Unbound[T, BooleanExpression], BooleanExpression, ABC):
+    inverse: ClassVar[type]
+    as_bound: ClassVar[type]
+    term: UnboundTerm[T]
 
-    def bind(self, schema: Schema, case_sensitive: bool) -> BoundIsNull[T]:
-        bound_ref = self.term.bind(schema, case_sensitive)
-        return BoundIsNull(bound_ref)
+    def __invert__(self) -> inverse:
+        return self.inverse(self.term)
 
+    def bind(self, schema: Schema, case_sensitive: bool = True) -> BooleanExpression:
+        bound_term = self.term.bind(schema, case_sensitive)
+        return self.as_bound(bound_term)
 
-class BoundIsNull(BoundPredicate[T]):
-    def __invert__(self) -> BoundNotNull:
-        return BoundNotNull(self.term)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("Null is a unary predicate and takes no Literals.")
+@dataclass(frozen=True)
+class BoundUnaryPredicate(Bound[T], BooleanExpression):
+    inverse: ClassVar[type]
+    term: BoundTerm[T]
 
+    def __invert__(self) -> inverse:
+        return self.inverse(self.term)
 
-class NotNull(UnboundPredicate[T]):
-    def __invert__(self) -> IsNull:
-        return IsNull(self.term)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("NotNull is a unary predicate and takes no Literals.")
+class IsNull(UnaryPredicate[T]):
+    pass
 
-    def bind(self, schema: Schema, case_sensitive: bool) -> BoundNotNull[T]:
-        bound_ref = self.term.bind(schema, case_sensitive)
-        return BoundNotNull(bound_ref)
 
+class NotNull(UnaryPredicate[T]):
+    pass
 
-class BoundNotNull(BoundPredicate[T]):
-    def __invert__(self) -> BoundIsNull:
-        return BoundIsNull(self.term)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("NotNull is a unary predicate and takes no Literals.")
+class BoundIsNull(BoundUnaryPredicate[T]):
+    def __new__(cls, term: BoundTerm[T]):
+        if term.ref().field.required:
+            return AlwaysFalse()
+        return super().__new__(cls)
 
 
-class IsNaN(UnboundPredicate[T]):
-    def __invert__(self) -> NotNaN:
-        return NotNaN(self.term)
+class BoundNotNull(BoundUnaryPredicate[T]):
+    def __new__(cls, term: BoundTerm[T]):
+        if term.ref().field.required:
+            return AlwaysTrue()
+        return super().__new__(cls)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("IsNaN is a unary predicate and takes no Literals.")
 
-    def bind(self, schema: Schema, case_sensitive: bool) -> BoundIsNaN[T]:
-        bound_ref = self.term.bind(schema, case_sensitive)
-        return BoundIsNaN(bound_ref)
+IsNull.inverse = NotNull
+IsNull.as_bound = BoundIsNull
+NotNull.inverse = IsNull
+NotNull.as_bound = BoundNotNull
+BoundIsNull.inverse = BoundNotNull
+BoundNotNull.inverse = BoundIsNull

Review Comment:
   I like it that we only store the types. I'm unsure if reimplementing would cause issues. I would probably override the `__invert__` method in `UnaryPredicate`:
   ```python
       @abstractmethod
       def __invert__(self) -> BooleanExpression:
           pass
   ```
   The return type would then change to `BooleanExpression` as it can return a `UnaryPredicate`, but also a `Always{True,False}`. This is nice because it gives us typing and we don't flatten everything to `type`:
   
   And then implement them in the function itself. This keeps everything in one place, and we make sure that we implement all the methods:
   
   By pytest:
   ```python
       def test_isnull_bind_required():
           schema = Schema(NestedField(2, "a", IntegerType(), required=True), schema_id=1)
   >       assert base.IsNull(base.Reference("a")).bind(schema) == base.AlwaysFalse()
   E       TypeError: Can't instantiate abstract class IsNull with abstract method __invert__
   ```
   
   Also by static analysis:
   ```python
   python/pyiceberg/expressions/base.py:393: error: Cannot instantiate abstract class "NotNull" with abstract attribute "__invert__"
   ```



##########
python/pyiceberg/expressions/base.py:
##########
@@ -342,107 +349,104 @@ def __str__(self) -> str:
 
 
 @dataclass(frozen=True)
-class AlwaysTrue(BooleanExpression, ABC, Singleton):
+class AlwaysTrue(BooleanExpression, Singleton):
     """TRUE expression"""
 
     def __invert__(self) -> AlwaysFalse:
         return AlwaysFalse()
 
 
 @dataclass(frozen=True)
-class AlwaysFalse(BooleanExpression, ABC, Singleton):
+class AlwaysFalse(BooleanExpression, Singleton):
     """FALSE expression"""
 
     def __invert__(self) -> AlwaysTrue:
         return AlwaysTrue()
 
 
-class IsNull(UnboundPredicate[T]):
-    def __invert__(self) -> NotNull:
-        return NotNull(self.term)
-
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals is not None:
-            raise AttributeError("Null is a unary predicate and takes no Literals.")
+@dataclass(frozen=True)
+class UnaryPredicate(Unbound[T, BooleanExpression], BooleanExpression, ABC):
+    inverse: ClassVar[type]
+    as_bound: ClassVar[type]
+    term: UnboundTerm[T]
 
-    def bind(self, schema: Schema, case_sensitive: bool) -> BoundIsNull[T]:
-        bound_ref = self.term.bind(schema, case_sensitive)
-        return BoundIsNull(bound_ref)
+    def __invert__(self) -> inverse:
+        return self.inverse(self.term)
 
+    def bind(self, schema: Schema, case_sensitive: bool = True) -> BooleanExpression:
+        bound_term = self.term.bind(schema, case_sensitive)
+        return self.as_bound(bound_term)
 
-class BoundIsNull(BoundPredicate[T]):
-    def __invert__(self) -> BoundNotNull:
-        return BoundNotNull(self.term)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("Null is a unary predicate and takes no Literals.")
+@dataclass(frozen=True)
+class BoundUnaryPredicate(Bound[T], BooleanExpression):
+    inverse: ClassVar[type]
+    term: BoundTerm[T]
 
+    def __invert__(self) -> inverse:
+        return self.inverse(self.term)
 
-class NotNull(UnboundPredicate[T]):
-    def __invert__(self) -> IsNull:
-        return IsNull(self.term)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("NotNull is a unary predicate and takes no Literals.")
+class IsNull(UnaryPredicate[T]):
+    pass
 
-    def bind(self, schema: Schema, case_sensitive: bool) -> BoundNotNull[T]:
-        bound_ref = self.term.bind(schema, case_sensitive)
-        return BoundNotNull(bound_ref)
 
+class NotNull(UnaryPredicate[T]):
+    pass
 
-class BoundNotNull(BoundPredicate[T]):
-    def __invert__(self) -> BoundIsNull:
-        return BoundIsNull(self.term)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("NotNull is a unary predicate and takes no Literals.")
+class BoundIsNull(BoundUnaryPredicate[T]):
+    def __new__(cls, term: BoundTerm[T]):
+        if term.ref().field.required:
+            return AlwaysFalse()
+        return super().__new__(cls)
 
 
-class IsNaN(UnboundPredicate[T]):
-    def __invert__(self) -> NotNaN:
-        return NotNaN(self.term)
+class BoundNotNull(BoundUnaryPredicate[T]):
+    def __new__(cls, term: BoundTerm[T]):
+        if term.ref().field.required:
+            return AlwaysTrue()
+        return super().__new__(cls)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("IsNaN is a unary predicate and takes no Literals.")
 
-    def bind(self, schema: Schema, case_sensitive: bool) -> BoundIsNaN[T]:
-        bound_ref = self.term.bind(schema, case_sensitive)
-        return BoundIsNaN(bound_ref)
+IsNull.inverse = NotNull
+IsNull.as_bound = BoundIsNull
+NotNull.inverse = IsNull
+NotNull.as_bound = BoundNotNull
+BoundIsNull.inverse = BoundNotNull
+BoundNotNull.inverse = BoundIsNull
 
 
-class BoundIsNaN(BoundPredicate[T]):
-    def __invert__(self) -> BoundNotNaN:
-        return BoundNotNaN(self.term)
+class IsNaN(UnaryPredicate[T]):
+    pass
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("IsNaN is a unary predicate and takes no Literals.")
 
+class NotNaN(UnaryPredicate[T]):
+    pass
 
-class NotNaN(UnboundPredicate[T]):
-    def __invert__(self) -> IsNaN:
-        return IsNaN(self.term)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("NotNaN is a unary predicate and takes no Literals.")
+class BoundIsNaN(BoundUnaryPredicate[T]):
+    def __new__(cls, term: BoundTerm[T]):
+        bound_type = term.ref().field.field_type
+        if bound_type == FloatType() or bound_type == DoubleType():
+            return super().__new__(cls)
+        return AlwaysFalse()
 
-    def bind(self, schema: Schema, case_sensitive: bool) -> BoundNotNaN[T]:
-        bound_ref = self.term.bind(schema, case_sensitive)
-        return BoundNotNaN(bound_ref)
 
+class BoundNotNaN(BoundUnaryPredicate[T]):
+    def __new__(cls, term: BoundTerm[T]):
+        bound_type = term.ref().field.field_type
+        if bound_type == FloatType() or bound_type == DoubleType():

Review Comment:
   ```suggestion
           if type(bound_type) in {FloatType, DoubleType}:
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@iceberg.apache.org
For additional commands, e-mail: issues-help@iceberg.apache.org