You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@iceberg.apache.org by GitBox <gi...@apache.org> on 2022/07/28 06:19:10 UTC

[GitHub] [iceberg] Fokko commented on a diff in pull request #5362: Python: Refactor unary and set expressions

Fokko commented on code in PR #5362:
URL: https://github.com/apache/iceberg/pull/5362#discussion_r931808275


##########
python/pyiceberg/expressions/base.py:
##########
@@ -342,151 +313,197 @@ def __str__(self) -> str:
 
 
 @dataclass(frozen=True)
-class AlwaysTrue(BooleanExpression, ABC, Singleton):
+class AlwaysTrue(BooleanExpression, Singleton):
     """TRUE expression"""
 
     def __invert__(self) -> AlwaysFalse:
         return AlwaysFalse()
 
 
 @dataclass(frozen=True)
-class AlwaysFalse(BooleanExpression, ABC, Singleton):
+class AlwaysFalse(BooleanExpression, Singleton):
     """FALSE expression"""
 
     def __invert__(self) -> AlwaysTrue:
         return AlwaysTrue()
 
 
-class IsNull(UnboundPredicate[T]):
-    def __invert__(self) -> NotNull:
-        return NotNull(self.term)
+@dataclass(frozen=True)
+class UnaryPredicate(Unbound[T, BooleanExpression], BooleanExpression, ABC):
+    as_bound: ClassVar[type]
+    term: UnboundTerm[T]
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals is not None:
-            raise AttributeError("Null is a unary predicate and takes no Literals.")
+    @abstractmethod
+    def __invert__(self) -> UnaryPredicate:
+        ...
 
-    def bind(self, schema: Schema, case_sensitive: bool) -> BoundIsNull[T]:
-        bound_ref = self.term.bind(schema, case_sensitive)
-        return BoundIsNull(bound_ref)
+    def bind(self, schema: Schema, case_sensitive: bool = True) -> BooleanExpression:
+        bound_term = self.term.bind(schema, case_sensitive)
+        return self.as_bound(bound_term)
 
 
-class BoundIsNull(BoundPredicate[T]):
-    def __invert__(self) -> BoundNotNull:
-        return BoundNotNull(self.term)
+@dataclass(frozen=True)
+class BoundUnaryPredicate(Bound[T], BooleanExpression, ABC):
+    term: BoundTerm[T]
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("Null is a unary predicate and takes no Literals.")
+    @abstractmethod
+    def __invert__(self) -> BoundUnaryPredicate:
+        ...
 
 
-class NotNull(UnboundPredicate[T]):
-    def __invert__(self) -> IsNull:
-        return IsNull(self.term)
+class BoundIsNull(BoundUnaryPredicate[T]):
+    def __new__(cls, term: BoundTerm[T]):
+        if term.ref().field.required:
+            return AlwaysFalse()
+        return super().__new__(cls)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("NotNull is a unary predicate and takes no Literals.")
+    def __invert__(self) -> BoundNotNull:
+        return BoundNotNull(self.term)
 
-    def bind(self, schema: Schema, case_sensitive: bool) -> BoundNotNull[T]:
-        bound_ref = self.term.bind(schema, case_sensitive)
-        return BoundNotNull(bound_ref)
 
+class BoundNotNull(BoundUnaryPredicate[T]):
+    def __new__(cls, term: BoundTerm[T]):
+        if term.ref().field.required:
+            return AlwaysTrue()
+        return super().__new__(cls)
 
-class BoundNotNull(BoundPredicate[T]):
     def __invert__(self) -> BoundIsNull:
         return BoundIsNull(self.term)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("NotNull is a unary predicate and takes no Literals.")
 
+class IsNull(UnaryPredicate[T]):
+    as_bound = BoundIsNull
 
-class IsNaN(UnboundPredicate[T]):
-    def __invert__(self) -> NotNaN:
-        return NotNaN(self.term)
+    def __invert__(self) -> NotNull:
+        return NotNull(self.term)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("IsNaN is a unary predicate and takes no Literals.")
 
-    def bind(self, schema: Schema, case_sensitive: bool) -> BoundIsNaN[T]:
-        bound_ref = self.term.bind(schema, case_sensitive)
-        return BoundIsNaN(bound_ref)
+class NotNull(UnaryPredicate[T]):
+    as_bound = BoundNotNull
+
+    def __invert__(self) -> IsNull:
+        return IsNull(self.term)
+
 
+class BoundIsNaN(BoundUnaryPredicate[T]):
+    def __new__(cls, term: BoundTerm[T]):
+        bound_type = term.ref().field.field_type
+        if type(bound_type) in {FloatType, DoubleType}:
+            return super().__new__(cls)
+        return AlwaysFalse()
 
-class BoundIsNaN(BoundPredicate[T]):
     def __invert__(self) -> BoundNotNaN:
         return BoundNotNaN(self.term)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("IsNaN is a unary predicate and takes no Literals.")
+
+class BoundNotNaN(BoundUnaryPredicate[T]):
+    def __new__(cls, term: BoundTerm[T]):
+        bound_type = term.ref().field.field_type
+        if type(bound_type) in {FloatType, DoubleType}:
+            return super().__new__(cls)
+        return AlwaysTrue()
+
+    def __invert__(self) -> BoundIsNaN:
+        return BoundIsNaN(self.term)
 
 
-class NotNaN(UnboundPredicate[T]):
+class IsNaN(UnaryPredicate[T]):
+    as_bound = BoundIsNaN
+
+    def __invert__(self) -> NotNaN:
+        return NotNaN(self.term)
+
+
+class NotNaN(UnaryPredicate[T]):
+    as_bound = BoundNotNaN
+
     def __invert__(self) -> IsNaN:
         return IsNaN(self.term)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("NotNaN is a unary predicate and takes no Literals.")
 
-    def bind(self, schema: Schema, case_sensitive: bool) -> BoundNotNaN[T]:
-        bound_ref = self.term.bind(schema, case_sensitive)
-        return BoundNotNaN(bound_ref)
+@dataclass(frozen=True)
+class SetPredicate(Unbound[T, BooleanExpression], BooleanExpression, ABC):
+    as_bound: ClassVar[type]
+    term: UnboundTerm[T]
+    literals: tuple[Literal[T], ...]
+
+    @abstractmethod
+    def __invert__(self) -> BooleanExpression:
+        ...
 
+    def bind(self, schema: Schema, case_sensitive: bool = True) -> BooleanExpression:
+        bound_term = self.term.bind(schema, case_sensitive)
+        return self.as_bound(bound_term, {lit.to(bound_term.ref().field.field_type) for lit in self.literals})
 
-class BoundNotNaN(BoundPredicate[T]):
-    def __invert__(self) -> BoundIsNaN:
-        return BoundIsNaN(self.term)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("NotNaN is a unary predicate and takes no Literals.")
+@dataclass(frozen=True)
+class BoundSetPredicate(Bound[T], BooleanExpression, ABC):
+    term: BoundTerm[T]
+    literals: set[Literal[T]]
 
+    @abstractmethod
+    def __invert__(self) -> BooleanExpression:
+        ...
 
-class BoundIn(BoundPredicate[T]):
-    def _validate_literals(self):  # pylint: disable=W0238
-        if not self.literals:
-            raise AttributeError("BoundIn must contain at least 1 literal.")
 
-    def __invert__(self) -> BoundNotIn[T]:
-        return BoundNotIn(self.term, *self.literals)
+class BoundIn(BoundSetPredicate[T]):
+    def __new__(cls, term: BoundTerm[T], literals: set[Literal[T]]) -> BooleanExpression:
+        count = len(literals)
+        if count == 0:
+            return AlwaysFalse()
+        if count == 1:
+            return BoundEq(term, literals.pop())

Review Comment:
   This will actually mutate the `literals`, and turn it into an empty set.
   ```suggestion
               return BoundEq(term, next(iter(literals)))
   ```



##########
python/pyiceberg/expressions/base.py:
##########
@@ -342,151 +313,197 @@ def __str__(self) -> str:
 
 
 @dataclass(frozen=True)
-class AlwaysTrue(BooleanExpression, ABC, Singleton):
+class AlwaysTrue(BooleanExpression, Singleton):
     """TRUE expression"""
 
     def __invert__(self) -> AlwaysFalse:
         return AlwaysFalse()
 
 
 @dataclass(frozen=True)
-class AlwaysFalse(BooleanExpression, ABC, Singleton):
+class AlwaysFalse(BooleanExpression, Singleton):
     """FALSE expression"""
 
     def __invert__(self) -> AlwaysTrue:
         return AlwaysTrue()
 
 
-class IsNull(UnboundPredicate[T]):
-    def __invert__(self) -> NotNull:
-        return NotNull(self.term)
+@dataclass(frozen=True)
+class UnaryPredicate(Unbound[T, BooleanExpression], BooleanExpression, ABC):
+    as_bound: ClassVar[type]
+    term: UnboundTerm[T]
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals is not None:
-            raise AttributeError("Null is a unary predicate and takes no Literals.")
+    @abstractmethod
+    def __invert__(self) -> UnaryPredicate:
+        ...
 
-    def bind(self, schema: Schema, case_sensitive: bool) -> BoundIsNull[T]:
-        bound_ref = self.term.bind(schema, case_sensitive)
-        return BoundIsNull(bound_ref)
+    def bind(self, schema: Schema, case_sensitive: bool = True) -> BooleanExpression:
+        bound_term = self.term.bind(schema, case_sensitive)
+        return self.as_bound(bound_term)
 
 
-class BoundIsNull(BoundPredicate[T]):
-    def __invert__(self) -> BoundNotNull:
-        return BoundNotNull(self.term)
+@dataclass(frozen=True)
+class BoundUnaryPredicate(Bound[T], BooleanExpression, ABC):
+    term: BoundTerm[T]
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("Null is a unary predicate and takes no Literals.")
+    @abstractmethod
+    def __invert__(self) -> BoundUnaryPredicate:
+        ...
 
 
-class NotNull(UnboundPredicate[T]):
-    def __invert__(self) -> IsNull:
-        return IsNull(self.term)
+class BoundIsNull(BoundUnaryPredicate[T]):
+    def __new__(cls, term: BoundTerm[T]):
+        if term.ref().field.required:
+            return AlwaysFalse()
+        return super().__new__(cls)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("NotNull is a unary predicate and takes no Literals.")
+    def __invert__(self) -> BoundNotNull:
+        return BoundNotNull(self.term)
 
-    def bind(self, schema: Schema, case_sensitive: bool) -> BoundNotNull[T]:
-        bound_ref = self.term.bind(schema, case_sensitive)
-        return BoundNotNull(bound_ref)
 
+class BoundNotNull(BoundUnaryPredicate[T]):
+    def __new__(cls, term: BoundTerm[T]):
+        if term.ref().field.required:
+            return AlwaysTrue()
+        return super().__new__(cls)
 
-class BoundNotNull(BoundPredicate[T]):
     def __invert__(self) -> BoundIsNull:
         return BoundIsNull(self.term)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("NotNull is a unary predicate and takes no Literals.")
 
+class IsNull(UnaryPredicate[T]):
+    as_bound = BoundIsNull
 
-class IsNaN(UnboundPredicate[T]):
-    def __invert__(self) -> NotNaN:
-        return NotNaN(self.term)
+    def __invert__(self) -> NotNull:
+        return NotNull(self.term)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("IsNaN is a unary predicate and takes no Literals.")
 
-    def bind(self, schema: Schema, case_sensitive: bool) -> BoundIsNaN[T]:
-        bound_ref = self.term.bind(schema, case_sensitive)
-        return BoundIsNaN(bound_ref)
+class NotNull(UnaryPredicate[T]):
+    as_bound = BoundNotNull
+
+    def __invert__(self) -> IsNull:
+        return IsNull(self.term)
+
 
+class BoundIsNaN(BoundUnaryPredicate[T]):
+    def __new__(cls, term: BoundTerm[T]):
+        bound_type = term.ref().field.field_type
+        if type(bound_type) in {FloatType, DoubleType}:
+            return super().__new__(cls)
+        return AlwaysFalse()
 
-class BoundIsNaN(BoundPredicate[T]):
     def __invert__(self) -> BoundNotNaN:
         return BoundNotNaN(self.term)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("IsNaN is a unary predicate and takes no Literals.")
+
+class BoundNotNaN(BoundUnaryPredicate[T]):
+    def __new__(cls, term: BoundTerm[T]):
+        bound_type = term.ref().field.field_type
+        if type(bound_type) in {FloatType, DoubleType}:
+            return super().__new__(cls)
+        return AlwaysTrue()
+
+    def __invert__(self) -> BoundIsNaN:
+        return BoundIsNaN(self.term)
 
 
-class NotNaN(UnboundPredicate[T]):
+class IsNaN(UnaryPredicate[T]):
+    as_bound = BoundIsNaN
+
+    def __invert__(self) -> NotNaN:
+        return NotNaN(self.term)
+
+
+class NotNaN(UnaryPredicate[T]):
+    as_bound = BoundNotNaN
+
     def __invert__(self) -> IsNaN:
         return IsNaN(self.term)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("NotNaN is a unary predicate and takes no Literals.")
 
-    def bind(self, schema: Schema, case_sensitive: bool) -> BoundNotNaN[T]:
-        bound_ref = self.term.bind(schema, case_sensitive)
-        return BoundNotNaN(bound_ref)
+@dataclass(frozen=True)
+class SetPredicate(Unbound[T, BooleanExpression], BooleanExpression, ABC):
+    as_bound: ClassVar[type]
+    term: UnboundTerm[T]
+    literals: tuple[Literal[T], ...]
+
+    @abstractmethod
+    def __invert__(self) -> BooleanExpression:
+        ...
 
+    def bind(self, schema: Schema, case_sensitive: bool = True) -> BooleanExpression:
+        bound_term = self.term.bind(schema, case_sensitive)
+        return self.as_bound(bound_term, {lit.to(bound_term.ref().field.field_type) for lit in self.literals})
 
-class BoundNotNaN(BoundPredicate[T]):
-    def __invert__(self) -> BoundIsNaN:
-        return BoundIsNaN(self.term)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("NotNaN is a unary predicate and takes no Literals.")
+@dataclass(frozen=True)
+class BoundSetPredicate(Bound[T], BooleanExpression, ABC):
+    term: BoundTerm[T]
+    literals: set[Literal[T]]
 
+    @abstractmethod
+    def __invert__(self) -> BooleanExpression:
+        ...
 
-class BoundIn(BoundPredicate[T]):
-    def _validate_literals(self):  # pylint: disable=W0238
-        if not self.literals:
-            raise AttributeError("BoundIn must contain at least 1 literal.")
 
-    def __invert__(self) -> BoundNotIn[T]:
-        return BoundNotIn(self.term, *self.literals)
+class BoundIn(BoundSetPredicate[T]):
+    def __new__(cls, term: BoundTerm[T], literals: set[Literal[T]]) -> BooleanExpression:
+        count = len(literals)
+        if count == 0:
+            return AlwaysFalse()
+        if count == 1:
+            return BoundEq(term, literals.pop())
+        else:
+            return super().__new__(cls)
 
+    def __invert__(self) -> BooleanExpression:
+        return BoundNotIn(self.term, self.literals)
 
-class In(UnboundPredicate[T]):
-    def _validate_literals(self):  # pylint: disable=W0238
-        if not self.literals:
-            raise AttributeError("In must contain at least 1 literal.")
 
-    def __invert__(self) -> NotIn[T]:
-        return NotIn(self.term, *self.literals)
+class BoundNotIn(BoundSetPredicate[T]):
+    def __new__(cls, term: BoundTerm[T], literals: set[Literal[T]]) -> BooleanExpression:
+        count = len(literals)
+        if count == 0:
+            return AlwaysTrue()
+        if count == 1:
+            return BoundNotEq(term, literals.pop())
+        else:
+            return super().__new__(cls)
 
-    def bind(self, schema: Schema, case_sensitive: bool) -> BoundIn[T]:
-        bound_ref = self.term.bind(schema, case_sensitive)
-        return BoundIn(bound_ref, *tuple(lit.to(bound_ref.field.field_type) for lit in self.literals))  # type: ignore
+    def __invert__(self) -> BooleanExpression:
+        return BoundIn(self.term, self.literals)
 
 
-class BoundNotIn(BoundPredicate[T]):
-    def _validate_literals(self):  # pylint: disable=W0238
-        if not self.literals:
-            raise AttributeError("BoundNotIn must contain at least 1 literal.")
+class In(SetPredicate[T]):
+    as_bound = BoundIn
 
-    def __invert__(self) -> BoundIn[T]:
-        return BoundIn(self.term, *self.literals)
+    def __new__(cls, term: UnboundTerm[T], literals: tuple[Literal[T], ...]) -> BooleanExpression:
+        count = len(literals)
+        if count == 0:
+            return AlwaysFalse()
+        if count == 1:

Review Comment:
   ```suggestion
           elif count == 1:
   ```



##########
python/pyiceberg/expressions/base.py:
##########
@@ -342,151 +313,197 @@ def __str__(self) -> str:
 
 
 @dataclass(frozen=True)
-class AlwaysTrue(BooleanExpression, ABC, Singleton):
+class AlwaysTrue(BooleanExpression, Singleton):
     """TRUE expression"""
 
     def __invert__(self) -> AlwaysFalse:
         return AlwaysFalse()
 
 
 @dataclass(frozen=True)
-class AlwaysFalse(BooleanExpression, ABC, Singleton):
+class AlwaysFalse(BooleanExpression, Singleton):
     """FALSE expression"""
 
     def __invert__(self) -> AlwaysTrue:
         return AlwaysTrue()
 
 
-class IsNull(UnboundPredicate[T]):
-    def __invert__(self) -> NotNull:
-        return NotNull(self.term)
+@dataclass(frozen=True)
+class UnaryPredicate(Unbound[T, BooleanExpression], BooleanExpression, ABC):
+    as_bound: ClassVar[type]
+    term: UnboundTerm[T]
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals is not None:
-            raise AttributeError("Null is a unary predicate and takes no Literals.")
+    @abstractmethod
+    def __invert__(self) -> UnaryPredicate:
+        ...
 
-    def bind(self, schema: Schema, case_sensitive: bool) -> BoundIsNull[T]:
-        bound_ref = self.term.bind(schema, case_sensitive)
-        return BoundIsNull(bound_ref)
+    def bind(self, schema: Schema, case_sensitive: bool = True) -> BooleanExpression:
+        bound_term = self.term.bind(schema, case_sensitive)
+        return self.as_bound(bound_term)
 
 
-class BoundIsNull(BoundPredicate[T]):
-    def __invert__(self) -> BoundNotNull:
-        return BoundNotNull(self.term)
+@dataclass(frozen=True)
+class BoundUnaryPredicate(Bound[T], BooleanExpression, ABC):
+    term: BoundTerm[T]
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("Null is a unary predicate and takes no Literals.")
+    @abstractmethod
+    def __invert__(self) -> BoundUnaryPredicate:
+        ...
 
 
-class NotNull(UnboundPredicate[T]):
-    def __invert__(self) -> IsNull:
-        return IsNull(self.term)
+class BoundIsNull(BoundUnaryPredicate[T]):
+    def __new__(cls, term: BoundTerm[T]):
+        if term.ref().field.required:
+            return AlwaysFalse()
+        return super().__new__(cls)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("NotNull is a unary predicate and takes no Literals.")
+    def __invert__(self) -> BoundNotNull:
+        return BoundNotNull(self.term)
 
-    def bind(self, schema: Schema, case_sensitive: bool) -> BoundNotNull[T]:
-        bound_ref = self.term.bind(schema, case_sensitive)
-        return BoundNotNull(bound_ref)
 
+class BoundNotNull(BoundUnaryPredicate[T]):
+    def __new__(cls, term: BoundTerm[T]):
+        if term.ref().field.required:
+            return AlwaysTrue()
+        return super().__new__(cls)
 
-class BoundNotNull(BoundPredicate[T]):
     def __invert__(self) -> BoundIsNull:
         return BoundIsNull(self.term)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("NotNull is a unary predicate and takes no Literals.")
 
+class IsNull(UnaryPredicate[T]):
+    as_bound = BoundIsNull
 
-class IsNaN(UnboundPredicate[T]):
-    def __invert__(self) -> NotNaN:
-        return NotNaN(self.term)
+    def __invert__(self) -> NotNull:
+        return NotNull(self.term)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("IsNaN is a unary predicate and takes no Literals.")
 
-    def bind(self, schema: Schema, case_sensitive: bool) -> BoundIsNaN[T]:
-        bound_ref = self.term.bind(schema, case_sensitive)
-        return BoundIsNaN(bound_ref)
+class NotNull(UnaryPredicate[T]):
+    as_bound = BoundNotNull
+
+    def __invert__(self) -> IsNull:
+        return IsNull(self.term)
+
 
+class BoundIsNaN(BoundUnaryPredicate[T]):
+    def __new__(cls, term: BoundTerm[T]):
+        bound_type = term.ref().field.field_type
+        if type(bound_type) in {FloatType, DoubleType}:
+            return super().__new__(cls)
+        return AlwaysFalse()
 
-class BoundIsNaN(BoundPredicate[T]):
     def __invert__(self) -> BoundNotNaN:
         return BoundNotNaN(self.term)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("IsNaN is a unary predicate and takes no Literals.")
+
+class BoundNotNaN(BoundUnaryPredicate[T]):
+    def __new__(cls, term: BoundTerm[T]):
+        bound_type = term.ref().field.field_type
+        if type(bound_type) in {FloatType, DoubleType}:
+            return super().__new__(cls)
+        return AlwaysTrue()
+
+    def __invert__(self) -> BoundIsNaN:
+        return BoundIsNaN(self.term)
 
 
-class NotNaN(UnboundPredicate[T]):
+class IsNaN(UnaryPredicate[T]):
+    as_bound = BoundIsNaN
+
+    def __invert__(self) -> NotNaN:
+        return NotNaN(self.term)
+
+
+class NotNaN(UnaryPredicate[T]):
+    as_bound = BoundNotNaN
+
     def __invert__(self) -> IsNaN:
         return IsNaN(self.term)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("NotNaN is a unary predicate and takes no Literals.")
 
-    def bind(self, schema: Schema, case_sensitive: bool) -> BoundNotNaN[T]:
-        bound_ref = self.term.bind(schema, case_sensitive)
-        return BoundNotNaN(bound_ref)
+@dataclass(frozen=True)
+class SetPredicate(Unbound[T, BooleanExpression], BooleanExpression, ABC):
+    as_bound: ClassVar[type]
+    term: UnboundTerm[T]
+    literals: tuple[Literal[T], ...]
+
+    @abstractmethod
+    def __invert__(self) -> BooleanExpression:
+        ...
 
+    def bind(self, schema: Schema, case_sensitive: bool = True) -> BooleanExpression:
+        bound_term = self.term.bind(schema, case_sensitive)
+        return self.as_bound(bound_term, {lit.to(bound_term.ref().field.field_type) for lit in self.literals})
 
-class BoundNotNaN(BoundPredicate[T]):
-    def __invert__(self) -> BoundIsNaN:
-        return BoundIsNaN(self.term)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("NotNaN is a unary predicate and takes no Literals.")
+@dataclass(frozen=True)
+class BoundSetPredicate(Bound[T], BooleanExpression, ABC):
+    term: BoundTerm[T]
+    literals: set[Literal[T]]
 
+    @abstractmethod
+    def __invert__(self) -> BooleanExpression:
+        ...
 
-class BoundIn(BoundPredicate[T]):
-    def _validate_literals(self):  # pylint: disable=W0238
-        if not self.literals:
-            raise AttributeError("BoundIn must contain at least 1 literal.")
 
-    def __invert__(self) -> BoundNotIn[T]:
-        return BoundNotIn(self.term, *self.literals)
+class BoundIn(BoundSetPredicate[T]):
+    def __new__(cls, term: BoundTerm[T], literals: set[Literal[T]]) -> BooleanExpression:
+        count = len(literals)
+        if count == 0:
+            return AlwaysFalse()
+        if count == 1:

Review Comment:
   ```suggestion
           elif count == 1:
   ```



##########
python/pyiceberg/expressions/base.py:
##########
@@ -342,151 +313,197 @@ def __str__(self) -> str:
 
 
 @dataclass(frozen=True)
-class AlwaysTrue(BooleanExpression, ABC, Singleton):
+class AlwaysTrue(BooleanExpression, Singleton):
     """TRUE expression"""
 
     def __invert__(self) -> AlwaysFalse:
         return AlwaysFalse()
 
 
 @dataclass(frozen=True)
-class AlwaysFalse(BooleanExpression, ABC, Singleton):
+class AlwaysFalse(BooleanExpression, Singleton):
     """FALSE expression"""
 
     def __invert__(self) -> AlwaysTrue:
         return AlwaysTrue()
 
 
-class IsNull(UnboundPredicate[T]):
-    def __invert__(self) -> NotNull:
-        return NotNull(self.term)
+@dataclass(frozen=True)
+class UnaryPredicate(Unbound[T, BooleanExpression], BooleanExpression, ABC):
+    as_bound: ClassVar[type]
+    term: UnboundTerm[T]
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals is not None:
-            raise AttributeError("Null is a unary predicate and takes no Literals.")
+    @abstractmethod
+    def __invert__(self) -> UnaryPredicate:
+        ...
 
-    def bind(self, schema: Schema, case_sensitive: bool) -> BoundIsNull[T]:
-        bound_ref = self.term.bind(schema, case_sensitive)
-        return BoundIsNull(bound_ref)
+    def bind(self, schema: Schema, case_sensitive: bool = True) -> BooleanExpression:
+        bound_term = self.term.bind(schema, case_sensitive)
+        return self.as_bound(bound_term)
 
 
-class BoundIsNull(BoundPredicate[T]):
-    def __invert__(self) -> BoundNotNull:
-        return BoundNotNull(self.term)
+@dataclass(frozen=True)
+class BoundUnaryPredicate(Bound[T], BooleanExpression, ABC):
+    term: BoundTerm[T]
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("Null is a unary predicate and takes no Literals.")
+    @abstractmethod
+    def __invert__(self) -> BoundUnaryPredicate:
+        ...
 
 
-class NotNull(UnboundPredicate[T]):
-    def __invert__(self) -> IsNull:
-        return IsNull(self.term)
+class BoundIsNull(BoundUnaryPredicate[T]):
+    def __new__(cls, term: BoundTerm[T]):
+        if term.ref().field.required:
+            return AlwaysFalse()
+        return super().__new__(cls)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("NotNull is a unary predicate and takes no Literals.")
+    def __invert__(self) -> BoundNotNull:
+        return BoundNotNull(self.term)
 
-    def bind(self, schema: Schema, case_sensitive: bool) -> BoundNotNull[T]:
-        bound_ref = self.term.bind(schema, case_sensitive)
-        return BoundNotNull(bound_ref)
 
+class BoundNotNull(BoundUnaryPredicate[T]):
+    def __new__(cls, term: BoundTerm[T]):
+        if term.ref().field.required:
+            return AlwaysTrue()
+        return super().__new__(cls)
 
-class BoundNotNull(BoundPredicate[T]):
     def __invert__(self) -> BoundIsNull:
         return BoundIsNull(self.term)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("NotNull is a unary predicate and takes no Literals.")
 
+class IsNull(UnaryPredicate[T]):
+    as_bound = BoundIsNull
 
-class IsNaN(UnboundPredicate[T]):
-    def __invert__(self) -> NotNaN:
-        return NotNaN(self.term)
+    def __invert__(self) -> NotNull:
+        return NotNull(self.term)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("IsNaN is a unary predicate and takes no Literals.")
 
-    def bind(self, schema: Schema, case_sensitive: bool) -> BoundIsNaN[T]:
-        bound_ref = self.term.bind(schema, case_sensitive)
-        return BoundIsNaN(bound_ref)
+class NotNull(UnaryPredicate[T]):
+    as_bound = BoundNotNull
+
+    def __invert__(self) -> IsNull:
+        return IsNull(self.term)
+
 
+class BoundIsNaN(BoundUnaryPredicate[T]):
+    def __new__(cls, term: BoundTerm[T]):
+        bound_type = term.ref().field.field_type

Review Comment:
   I like how this is done, by checking this in the `__new__` method.



##########
python/pyiceberg/expressions/base.py:
##########
@@ -342,151 +313,197 @@ def __str__(self) -> str:
 
 
 @dataclass(frozen=True)
-class AlwaysTrue(BooleanExpression, ABC, Singleton):
+class AlwaysTrue(BooleanExpression, Singleton):
     """TRUE expression"""
 
     def __invert__(self) -> AlwaysFalse:
         return AlwaysFalse()
 
 
 @dataclass(frozen=True)
-class AlwaysFalse(BooleanExpression, ABC, Singleton):
+class AlwaysFalse(BooleanExpression, Singleton):
     """FALSE expression"""
 
     def __invert__(self) -> AlwaysTrue:
         return AlwaysTrue()
 
 
-class IsNull(UnboundPredicate[T]):
-    def __invert__(self) -> NotNull:
-        return NotNull(self.term)
+@dataclass(frozen=True)
+class UnaryPredicate(Unbound[T, BooleanExpression], BooleanExpression, ABC):
+    as_bound: ClassVar[type]
+    term: UnboundTerm[T]
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals is not None:
-            raise AttributeError("Null is a unary predicate and takes no Literals.")
+    @abstractmethod
+    def __invert__(self) -> UnaryPredicate:
+        ...
 
-    def bind(self, schema: Schema, case_sensitive: bool) -> BoundIsNull[T]:
-        bound_ref = self.term.bind(schema, case_sensitive)
-        return BoundIsNull(bound_ref)
+    def bind(self, schema: Schema, case_sensitive: bool = True) -> BooleanExpression:
+        bound_term = self.term.bind(schema, case_sensitive)
+        return self.as_bound(bound_term)
 
 
-class BoundIsNull(BoundPredicate[T]):
-    def __invert__(self) -> BoundNotNull:
-        return BoundNotNull(self.term)
+@dataclass(frozen=True)
+class BoundUnaryPredicate(Bound[T], BooleanExpression, ABC):
+    term: BoundTerm[T]
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("Null is a unary predicate and takes no Literals.")
+    @abstractmethod
+    def __invert__(self) -> BoundUnaryPredicate:
+        ...
 
 
-class NotNull(UnboundPredicate[T]):
-    def __invert__(self) -> IsNull:
-        return IsNull(self.term)
+class BoundIsNull(BoundUnaryPredicate[T]):
+    def __new__(cls, term: BoundTerm[T]):
+        if term.ref().field.required:
+            return AlwaysFalse()
+        return super().__new__(cls)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("NotNull is a unary predicate and takes no Literals.")
+    def __invert__(self) -> BoundNotNull:
+        return BoundNotNull(self.term)
 
-    def bind(self, schema: Schema, case_sensitive: bool) -> BoundNotNull[T]:
-        bound_ref = self.term.bind(schema, case_sensitive)
-        return BoundNotNull(bound_ref)
 
+class BoundNotNull(BoundUnaryPredicate[T]):
+    def __new__(cls, term: BoundTerm[T]):
+        if term.ref().field.required:
+            return AlwaysTrue()
+        return super().__new__(cls)
 
-class BoundNotNull(BoundPredicate[T]):
     def __invert__(self) -> BoundIsNull:
         return BoundIsNull(self.term)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("NotNull is a unary predicate and takes no Literals.")
 
+class IsNull(UnaryPredicate[T]):
+    as_bound = BoundIsNull
 
-class IsNaN(UnboundPredicate[T]):
-    def __invert__(self) -> NotNaN:
-        return NotNaN(self.term)
+    def __invert__(self) -> NotNull:
+        return NotNull(self.term)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("IsNaN is a unary predicate and takes no Literals.")
 
-    def bind(self, schema: Schema, case_sensitive: bool) -> BoundIsNaN[T]:
-        bound_ref = self.term.bind(schema, case_sensitive)
-        return BoundIsNaN(bound_ref)
+class NotNull(UnaryPredicate[T]):
+    as_bound = BoundNotNull
+
+    def __invert__(self) -> IsNull:
+        return IsNull(self.term)
+
 
+class BoundIsNaN(BoundUnaryPredicate[T]):
+    def __new__(cls, term: BoundTerm[T]):
+        bound_type = term.ref().field.field_type
+        if type(bound_type) in {FloatType, DoubleType}:
+            return super().__new__(cls)
+        return AlwaysFalse()
 
-class BoundIsNaN(BoundPredicate[T]):
     def __invert__(self) -> BoundNotNaN:
         return BoundNotNaN(self.term)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("IsNaN is a unary predicate and takes no Literals.")
+
+class BoundNotNaN(BoundUnaryPredicate[T]):
+    def __new__(cls, term: BoundTerm[T]):
+        bound_type = term.ref().field.field_type
+        if type(bound_type) in {FloatType, DoubleType}:
+            return super().__new__(cls)
+        return AlwaysTrue()
+
+    def __invert__(self) -> BoundIsNaN:
+        return BoundIsNaN(self.term)
 
 
-class NotNaN(UnboundPredicate[T]):
+class IsNaN(UnaryPredicate[T]):
+    as_bound = BoundIsNaN
+
+    def __invert__(self) -> NotNaN:
+        return NotNaN(self.term)
+
+
+class NotNaN(UnaryPredicate[T]):
+    as_bound = BoundNotNaN
+
     def __invert__(self) -> IsNaN:
         return IsNaN(self.term)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("NotNaN is a unary predicate and takes no Literals.")
 
-    def bind(self, schema: Schema, case_sensitive: bool) -> BoundNotNaN[T]:
-        bound_ref = self.term.bind(schema, case_sensitive)
-        return BoundNotNaN(bound_ref)
+@dataclass(frozen=True)
+class SetPredicate(Unbound[T, BooleanExpression], BooleanExpression, ABC):
+    as_bound: ClassVar[type]
+    term: UnboundTerm[T]
+    literals: tuple[Literal[T], ...]
+
+    @abstractmethod
+    def __invert__(self) -> BooleanExpression:
+        ...
 
+    def bind(self, schema: Schema, case_sensitive: bool = True) -> BooleanExpression:
+        bound_term = self.term.bind(schema, case_sensitive)
+        return self.as_bound(bound_term, {lit.to(bound_term.ref().field.field_type) for lit in self.literals})
 
-class BoundNotNaN(BoundPredicate[T]):
-    def __invert__(self) -> BoundIsNaN:
-        return BoundIsNaN(self.term)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("NotNaN is a unary predicate and takes no Literals.")
+@dataclass(frozen=True)
+class BoundSetPredicate(Bound[T], BooleanExpression, ABC):
+    term: BoundTerm[T]
+    literals: set[Literal[T]]
 
+    @abstractmethod
+    def __invert__(self) -> BooleanExpression:
+        ...
 
-class BoundIn(BoundPredicate[T]):
-    def _validate_literals(self):  # pylint: disable=W0238
-        if not self.literals:
-            raise AttributeError("BoundIn must contain at least 1 literal.")
 
-    def __invert__(self) -> BoundNotIn[T]:
-        return BoundNotIn(self.term, *self.literals)
+class BoundIn(BoundSetPredicate[T]):
+    def __new__(cls, term: BoundTerm[T], literals: set[Literal[T]]) -> BooleanExpression:
+        count = len(literals)
+        if count == 0:
+            return AlwaysFalse()
+        if count == 1:
+            return BoundEq(term, literals.pop())
+        else:
+            return super().__new__(cls)
 
+    def __invert__(self) -> BooleanExpression:
+        return BoundNotIn(self.term, self.literals)
 
-class In(UnboundPredicate[T]):
-    def _validate_literals(self):  # pylint: disable=W0238
-        if not self.literals:
-            raise AttributeError("In must contain at least 1 literal.")
 
-    def __invert__(self) -> NotIn[T]:
-        return NotIn(self.term, *self.literals)
+class BoundNotIn(BoundSetPredicate[T]):
+    def __new__(cls, term: BoundTerm[T], literals: set[Literal[T]]) -> BooleanExpression:
+        count = len(literals)
+        if count == 0:
+            return AlwaysTrue()
+        if count == 1:
+            return BoundNotEq(term, literals.pop())
+        else:
+            return super().__new__(cls)
 
-    def bind(self, schema: Schema, case_sensitive: bool) -> BoundIn[T]:
-        bound_ref = self.term.bind(schema, case_sensitive)
-        return BoundIn(bound_ref, *tuple(lit.to(bound_ref.field.field_type) for lit in self.literals))  # type: ignore
+    def __invert__(self) -> BooleanExpression:
+        return BoundIn(self.term, self.literals)
 
 
-class BoundNotIn(BoundPredicate[T]):
-    def _validate_literals(self):  # pylint: disable=W0238
-        if not self.literals:
-            raise AttributeError("BoundNotIn must contain at least 1 literal.")
+class In(SetPredicate[T]):
+    as_bound = BoundIn
 
-    def __invert__(self) -> BoundIn[T]:
-        return BoundIn(self.term, *self.literals)
+    def __new__(cls, term: UnboundTerm[T], literals: tuple[Literal[T], ...]) -> BooleanExpression:

Review Comment:
   How about accumulating the positional arguments like:
   ```suggestion
       def __new__(cls, term: UnboundTerm[T], *literals: Literal[T]) -> BooleanExpression:
   ```
   
   This way we can write:
   ```python
   def test_in_to_eq():
       assert base.In(
           base.Reference("x"),
           literal(34.56),
           literal(19.25)
       ) == base.Eq(base.Reference("x"), literal(34.56))
   ```
   instead of:
   ```python
   def test_in_to_eq():
       assert base.In(
           base.Reference("x"),
           (literal(34.56), literal(19.25))
       ) == base.Eq(base.Reference("x"), literal(34.56))
   ```
   I think this makes the API a bit friendlier and less verbose.
   
   You can easily expand the arguments when you want to supply a list:
   ```python
   class NotIn(SetPredicate[T]):
       as_bound = BoundNotIn
   ...
       def __invert__(self) -> BooleanExpression:
           return In(self.term, *self.literals)
   ```



##########
python/pyiceberg/expressions/base.py:
##########
@@ -342,151 +313,197 @@ def __str__(self) -> str:
 
 
 @dataclass(frozen=True)
-class AlwaysTrue(BooleanExpression, ABC, Singleton):
+class AlwaysTrue(BooleanExpression, Singleton):
     """TRUE expression"""
 
     def __invert__(self) -> AlwaysFalse:
         return AlwaysFalse()
 
 
 @dataclass(frozen=True)
-class AlwaysFalse(BooleanExpression, ABC, Singleton):
+class AlwaysFalse(BooleanExpression, Singleton):
     """FALSE expression"""
 
     def __invert__(self) -> AlwaysTrue:
         return AlwaysTrue()
 
 
-class IsNull(UnboundPredicate[T]):
-    def __invert__(self) -> NotNull:
-        return NotNull(self.term)
+@dataclass(frozen=True)
+class UnaryPredicate(Unbound[T, BooleanExpression], BooleanExpression, ABC):
+    as_bound: ClassVar[type]
+    term: UnboundTerm[T]
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals is not None:
-            raise AttributeError("Null is a unary predicate and takes no Literals.")
+    @abstractmethod
+    def __invert__(self) -> UnaryPredicate:
+        ...
 
-    def bind(self, schema: Schema, case_sensitive: bool) -> BoundIsNull[T]:
-        bound_ref = self.term.bind(schema, case_sensitive)
-        return BoundIsNull(bound_ref)
+    def bind(self, schema: Schema, case_sensitive: bool = True) -> BooleanExpression:
+        bound_term = self.term.bind(schema, case_sensitive)
+        return self.as_bound(bound_term)
 
 
-class BoundIsNull(BoundPredicate[T]):
-    def __invert__(self) -> BoundNotNull:
-        return BoundNotNull(self.term)
+@dataclass(frozen=True)
+class BoundUnaryPredicate(Bound[T], BooleanExpression, ABC):
+    term: BoundTerm[T]
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("Null is a unary predicate and takes no Literals.")
+    @abstractmethod
+    def __invert__(self) -> BoundUnaryPredicate:
+        ...
 
 
-class NotNull(UnboundPredicate[T]):
-    def __invert__(self) -> IsNull:
-        return IsNull(self.term)
+class BoundIsNull(BoundUnaryPredicate[T]):
+    def __new__(cls, term: BoundTerm[T]):
+        if term.ref().field.required:
+            return AlwaysFalse()
+        return super().__new__(cls)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("NotNull is a unary predicate and takes no Literals.")
+    def __invert__(self) -> BoundNotNull:
+        return BoundNotNull(self.term)
 
-    def bind(self, schema: Schema, case_sensitive: bool) -> BoundNotNull[T]:
-        bound_ref = self.term.bind(schema, case_sensitive)
-        return BoundNotNull(bound_ref)
 
+class BoundNotNull(BoundUnaryPredicate[T]):
+    def __new__(cls, term: BoundTerm[T]):
+        if term.ref().field.required:
+            return AlwaysTrue()
+        return super().__new__(cls)
 
-class BoundNotNull(BoundPredicate[T]):
     def __invert__(self) -> BoundIsNull:
         return BoundIsNull(self.term)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("NotNull is a unary predicate and takes no Literals.")
 
+class IsNull(UnaryPredicate[T]):
+    as_bound = BoundIsNull
 
-class IsNaN(UnboundPredicate[T]):
-    def __invert__(self) -> NotNaN:
-        return NotNaN(self.term)
+    def __invert__(self) -> NotNull:
+        return NotNull(self.term)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("IsNaN is a unary predicate and takes no Literals.")
 
-    def bind(self, schema: Schema, case_sensitive: bool) -> BoundIsNaN[T]:
-        bound_ref = self.term.bind(schema, case_sensitive)
-        return BoundIsNaN(bound_ref)
+class NotNull(UnaryPredicate[T]):
+    as_bound = BoundNotNull
+
+    def __invert__(self) -> IsNull:
+        return IsNull(self.term)
+
 
+class BoundIsNaN(BoundUnaryPredicate[T]):
+    def __new__(cls, term: BoundTerm[T]):
+        bound_type = term.ref().field.field_type
+        if type(bound_type) in {FloatType, DoubleType}:
+            return super().__new__(cls)
+        return AlwaysFalse()
 
-class BoundIsNaN(BoundPredicate[T]):
     def __invert__(self) -> BoundNotNaN:
         return BoundNotNaN(self.term)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("IsNaN is a unary predicate and takes no Literals.")
+
+class BoundNotNaN(BoundUnaryPredicate[T]):
+    def __new__(cls, term: BoundTerm[T]):
+        bound_type = term.ref().field.field_type
+        if type(bound_type) in {FloatType, DoubleType}:
+            return super().__new__(cls)
+        return AlwaysTrue()
+
+    def __invert__(self) -> BoundIsNaN:
+        return BoundIsNaN(self.term)
 
 
-class NotNaN(UnboundPredicate[T]):
+class IsNaN(UnaryPredicate[T]):
+    as_bound = BoundIsNaN
+
+    def __invert__(self) -> NotNaN:
+        return NotNaN(self.term)
+
+
+class NotNaN(UnaryPredicate[T]):
+    as_bound = BoundNotNaN
+
     def __invert__(self) -> IsNaN:
         return IsNaN(self.term)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("NotNaN is a unary predicate and takes no Literals.")
 
-    def bind(self, schema: Schema, case_sensitive: bool) -> BoundNotNaN[T]:
-        bound_ref = self.term.bind(schema, case_sensitive)
-        return BoundNotNaN(bound_ref)
+@dataclass(frozen=True)
+class SetPredicate(Unbound[T, BooleanExpression], BooleanExpression, ABC):
+    as_bound: ClassVar[type]
+    term: UnboundTerm[T]
+    literals: tuple[Literal[T], ...]
+
+    @abstractmethod
+    def __invert__(self) -> BooleanExpression:
+        ...
 
+    def bind(self, schema: Schema, case_sensitive: bool = True) -> BooleanExpression:
+        bound_term = self.term.bind(schema, case_sensitive)
+        return self.as_bound(bound_term, {lit.to(bound_term.ref().field.field_type) for lit in self.literals})
 
-class BoundNotNaN(BoundPredicate[T]):
-    def __invert__(self) -> BoundIsNaN:
-        return BoundIsNaN(self.term)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("NotNaN is a unary predicate and takes no Literals.")
+@dataclass(frozen=True)
+class BoundSetPredicate(Bound[T], BooleanExpression, ABC):
+    term: BoundTerm[T]
+    literals: set[Literal[T]]
 
+    @abstractmethod
+    def __invert__(self) -> BooleanExpression:
+        ...
 
-class BoundIn(BoundPredicate[T]):
-    def _validate_literals(self):  # pylint: disable=W0238
-        if not self.literals:
-            raise AttributeError("BoundIn must contain at least 1 literal.")
 
-    def __invert__(self) -> BoundNotIn[T]:
-        return BoundNotIn(self.term, *self.literals)
+class BoundIn(BoundSetPredicate[T]):
+    def __new__(cls, term: BoundTerm[T], literals: set[Literal[T]]) -> BooleanExpression:
+        count = len(literals)
+        if count == 0:
+            return AlwaysFalse()
+        if count == 1:
+            return BoundEq(term, literals.pop())
+        else:
+            return super().__new__(cls)
 
+    def __invert__(self) -> BooleanExpression:
+        return BoundNotIn(self.term, self.literals)
 
-class In(UnboundPredicate[T]):
-    def _validate_literals(self):  # pylint: disable=W0238
-        if not self.literals:
-            raise AttributeError("In must contain at least 1 literal.")
 
-    def __invert__(self) -> NotIn[T]:
-        return NotIn(self.term, *self.literals)
+class BoundNotIn(BoundSetPredicate[T]):
+    def __new__(cls, term: BoundTerm[T], literals: set[Literal[T]]) -> BooleanExpression:
+        count = len(literals)
+        if count == 0:
+            return AlwaysTrue()
+        if count == 1:
+            return BoundNotEq(term, literals.pop())
+        else:
+            return super().__new__(cls)
 
-    def bind(self, schema: Schema, case_sensitive: bool) -> BoundIn[T]:
-        bound_ref = self.term.bind(schema, case_sensitive)
-        return BoundIn(bound_ref, *tuple(lit.to(bound_ref.field.field_type) for lit in self.literals))  # type: ignore
+    def __invert__(self) -> BooleanExpression:
+        return BoundIn(self.term, self.literals)
 
 
-class BoundNotIn(BoundPredicate[T]):
-    def _validate_literals(self):  # pylint: disable=W0238
-        if not self.literals:
-            raise AttributeError("BoundNotIn must contain at least 1 literal.")
+class In(SetPredicate[T]):
+    as_bound = BoundIn
 
-    def __invert__(self) -> BoundIn[T]:
-        return BoundIn(self.term, *self.literals)
+    def __new__(cls, term: UnboundTerm[T], literals: tuple[Literal[T], ...]) -> BooleanExpression:
+        count = len(literals)
+        if count == 0:
+            return AlwaysFalse()
+        if count == 1:
+            return Eq(term, literals[0])
+        else:
+            return super().__new__(cls)
 
+    def __invert__(self) -> BooleanExpression:
+        return NotIn(self.term, self.literals)
 
-class NotIn(UnboundPredicate[T]):
-    def _validate_literals(self):  # pylint: disable=W0238
-        if not self.literals:
-            raise AttributeError("NotIn must contain at least 1 literal.")
 
-    def __invert__(self) -> In[T]:
-        return In(self.term, *self.literals)
+class NotIn(SetPredicate[T]):
+    as_bound = BoundNotIn
 
-    def bind(self, schema: Schema, case_sensitive: bool) -> BoundNotIn[T]:
-        bound_ref = self.term.bind(schema, case_sensitive)
-        return BoundNotIn(bound_ref, *tuple(lit.to(bound_ref.field.field_type) for lit in self.literals))  # type: ignore
+    def __new__(cls, term: UnboundTerm[T], literals: tuple[Literal[T], ...]) -> BooleanExpression:
+        count = len(literals)
+        if count == 0:
+            return AlwaysTrue()
+        if count == 1:

Review Comment:
   ```suggestion
           elif count == 1:
   ```



##########
python/pyiceberg/expressions/base.py:
##########
@@ -342,151 +313,197 @@ def __str__(self) -> str:
 
 
 @dataclass(frozen=True)
-class AlwaysTrue(BooleanExpression, ABC, Singleton):
+class AlwaysTrue(BooleanExpression, Singleton):
     """TRUE expression"""
 
     def __invert__(self) -> AlwaysFalse:
         return AlwaysFalse()
 
 
 @dataclass(frozen=True)
-class AlwaysFalse(BooleanExpression, ABC, Singleton):
+class AlwaysFalse(BooleanExpression, Singleton):
     """FALSE expression"""
 
     def __invert__(self) -> AlwaysTrue:
         return AlwaysTrue()
 
 
-class IsNull(UnboundPredicate[T]):
-    def __invert__(self) -> NotNull:
-        return NotNull(self.term)
+@dataclass(frozen=True)
+class UnaryPredicate(Unbound[T, BooleanExpression], BooleanExpression, ABC):
+    as_bound: ClassVar[type]
+    term: UnboundTerm[T]
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals is not None:
-            raise AttributeError("Null is a unary predicate and takes no Literals.")
+    @abstractmethod
+    def __invert__(self) -> UnaryPredicate:
+        ...
 
-    def bind(self, schema: Schema, case_sensitive: bool) -> BoundIsNull[T]:
-        bound_ref = self.term.bind(schema, case_sensitive)
-        return BoundIsNull(bound_ref)
+    def bind(self, schema: Schema, case_sensitive: bool = True) -> BooleanExpression:
+        bound_term = self.term.bind(schema, case_sensitive)
+        return self.as_bound(bound_term)
 
 
-class BoundIsNull(BoundPredicate[T]):
-    def __invert__(self) -> BoundNotNull:
-        return BoundNotNull(self.term)
+@dataclass(frozen=True)
+class BoundUnaryPredicate(Bound[T], BooleanExpression, ABC):
+    term: BoundTerm[T]
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("Null is a unary predicate and takes no Literals.")
+    @abstractmethod
+    def __invert__(self) -> BoundUnaryPredicate:
+        ...
 
 
-class NotNull(UnboundPredicate[T]):
-    def __invert__(self) -> IsNull:
-        return IsNull(self.term)
+class BoundIsNull(BoundUnaryPredicate[T]):
+    def __new__(cls, term: BoundTerm[T]):
+        if term.ref().field.required:
+            return AlwaysFalse()
+        return super().__new__(cls)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("NotNull is a unary predicate and takes no Literals.")
+    def __invert__(self) -> BoundNotNull:
+        return BoundNotNull(self.term)
 
-    def bind(self, schema: Schema, case_sensitive: bool) -> BoundNotNull[T]:
-        bound_ref = self.term.bind(schema, case_sensitive)
-        return BoundNotNull(bound_ref)
 
+class BoundNotNull(BoundUnaryPredicate[T]):
+    def __new__(cls, term: BoundTerm[T]):
+        if term.ref().field.required:
+            return AlwaysTrue()
+        return super().__new__(cls)
 
-class BoundNotNull(BoundPredicate[T]):
     def __invert__(self) -> BoundIsNull:
         return BoundIsNull(self.term)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("NotNull is a unary predicate and takes no Literals.")
 
+class IsNull(UnaryPredicate[T]):
+    as_bound = BoundIsNull
 
-class IsNaN(UnboundPredicate[T]):
-    def __invert__(self) -> NotNaN:
-        return NotNaN(self.term)
+    def __invert__(self) -> NotNull:
+        return NotNull(self.term)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("IsNaN is a unary predicate and takes no Literals.")
 
-    def bind(self, schema: Schema, case_sensitive: bool) -> BoundIsNaN[T]:
-        bound_ref = self.term.bind(schema, case_sensitive)
-        return BoundIsNaN(bound_ref)
+class NotNull(UnaryPredicate[T]):
+    as_bound = BoundNotNull
+
+    def __invert__(self) -> IsNull:
+        return IsNull(self.term)
+
 
+class BoundIsNaN(BoundUnaryPredicate[T]):
+    def __new__(cls, term: BoundTerm[T]):
+        bound_type = term.ref().field.field_type
+        if type(bound_type) in {FloatType, DoubleType}:
+            return super().__new__(cls)
+        return AlwaysFalse()
 
-class BoundIsNaN(BoundPredicate[T]):
     def __invert__(self) -> BoundNotNaN:
         return BoundNotNaN(self.term)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("IsNaN is a unary predicate and takes no Literals.")
+
+class BoundNotNaN(BoundUnaryPredicate[T]):
+    def __new__(cls, term: BoundTerm[T]):
+        bound_type = term.ref().field.field_type
+        if type(bound_type) in {FloatType, DoubleType}:
+            return super().__new__(cls)
+        return AlwaysTrue()
+
+    def __invert__(self) -> BoundIsNaN:
+        return BoundIsNaN(self.term)
 
 
-class NotNaN(UnboundPredicate[T]):
+class IsNaN(UnaryPredicate[T]):
+    as_bound = BoundIsNaN
+
+    def __invert__(self) -> NotNaN:
+        return NotNaN(self.term)
+
+
+class NotNaN(UnaryPredicate[T]):
+    as_bound = BoundNotNaN
+
     def __invert__(self) -> IsNaN:
         return IsNaN(self.term)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("NotNaN is a unary predicate and takes no Literals.")
 
-    def bind(self, schema: Schema, case_sensitive: bool) -> BoundNotNaN[T]:
-        bound_ref = self.term.bind(schema, case_sensitive)
-        return BoundNotNaN(bound_ref)
+@dataclass(frozen=True)
+class SetPredicate(Unbound[T, BooleanExpression], BooleanExpression, ABC):
+    as_bound: ClassVar[type]
+    term: UnboundTerm[T]
+    literals: tuple[Literal[T], ...]
+
+    @abstractmethod
+    def __invert__(self) -> BooleanExpression:
+        ...
 
+    def bind(self, schema: Schema, case_sensitive: bool = True) -> BooleanExpression:
+        bound_term = self.term.bind(schema, case_sensitive)
+        return self.as_bound(bound_term, {lit.to(bound_term.ref().field.field_type) for lit in self.literals})
 
-class BoundNotNaN(BoundPredicate[T]):
-    def __invert__(self) -> BoundIsNaN:
-        return BoundIsNaN(self.term)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("NotNaN is a unary predicate and takes no Literals.")
+@dataclass(frozen=True)
+class BoundSetPredicate(Bound[T], BooleanExpression, ABC):
+    term: BoundTerm[T]
+    literals: set[Literal[T]]
 
+    @abstractmethod
+    def __invert__(self) -> BooleanExpression:
+        ...
 
-class BoundIn(BoundPredicate[T]):
-    def _validate_literals(self):  # pylint: disable=W0238
-        if not self.literals:
-            raise AttributeError("BoundIn must contain at least 1 literal.")
 
-    def __invert__(self) -> BoundNotIn[T]:
-        return BoundNotIn(self.term, *self.literals)
+class BoundIn(BoundSetPredicate[T]):
+    def __new__(cls, term: BoundTerm[T], literals: set[Literal[T]]) -> BooleanExpression:

Review Comment:
   We could also consider wrapping it into positional arguments:
   ```python
   class BoundIn(BoundSetPredicate[T]):
       def __new__(cls, term: BoundTerm[T], *literals: Literal[T]) -> BooleanExpression:
           literals_set = set(literals)
           count = len(literals)
           if count == 0:
               return AlwaysFalse()
           if count == 1:
               return BoundEq(term, literals.pop())
           else:
               return super().__new__(cls)
   
       def __invert__(self) -> BooleanExpression:
           return BoundNotIn(self.term, self.literals)
   ```
   This way we also don't have to care about mutating the set. See comment below 👍🏻 
   
   This way we don't have to pass in a set:
   ```python
   BoundIn[str](
       base.BoundReference(
           field=NestedField(field_id=1, name="foo", field_type=StringType(), required=False),
           accessor=Accessor(position=0, inner=None),
       ),
       StringLiteral("foo"), StringLiteral("bar"), StringLiteral("baz"),
   )
   ```
   Instead of:
   ```python
   BoundIn[str](
       base.BoundReference(
           field=NestedField(field_id=1, name="foo", field_type=StringType(), required=False),
           accessor=Accessor(position=0, inner=None),
       ),
       {StringLiteral("foo"), StringLiteral("bar"), StringLiteral("baz")},
   )
   ```



##########
python/pyiceberg/expressions/base.py:
##########
@@ -342,151 +313,197 @@ def __str__(self) -> str:
 
 
 @dataclass(frozen=True)
-class AlwaysTrue(BooleanExpression, ABC, Singleton):
+class AlwaysTrue(BooleanExpression, Singleton):
     """TRUE expression"""
 
     def __invert__(self) -> AlwaysFalse:
         return AlwaysFalse()
 
 
 @dataclass(frozen=True)
-class AlwaysFalse(BooleanExpression, ABC, Singleton):
+class AlwaysFalse(BooleanExpression, Singleton):
     """FALSE expression"""
 
     def __invert__(self) -> AlwaysTrue:
         return AlwaysTrue()
 
 
-class IsNull(UnboundPredicate[T]):
-    def __invert__(self) -> NotNull:
-        return NotNull(self.term)
+@dataclass(frozen=True)
+class UnaryPredicate(Unbound[T, BooleanExpression], BooleanExpression, ABC):
+    as_bound: ClassVar[type]
+    term: UnboundTerm[T]
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals is not None:
-            raise AttributeError("Null is a unary predicate and takes no Literals.")
+    @abstractmethod
+    def __invert__(self) -> UnaryPredicate:
+        ...
 
-    def bind(self, schema: Schema, case_sensitive: bool) -> BoundIsNull[T]:
-        bound_ref = self.term.bind(schema, case_sensitive)
-        return BoundIsNull(bound_ref)
+    def bind(self, schema: Schema, case_sensitive: bool = True) -> BooleanExpression:
+        bound_term = self.term.bind(schema, case_sensitive)
+        return self.as_bound(bound_term)
 
 
-class BoundIsNull(BoundPredicate[T]):
-    def __invert__(self) -> BoundNotNull:
-        return BoundNotNull(self.term)
+@dataclass(frozen=True)
+class BoundUnaryPredicate(Bound[T], BooleanExpression, ABC):
+    term: BoundTerm[T]
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("Null is a unary predicate and takes no Literals.")
+    @abstractmethod
+    def __invert__(self) -> BoundUnaryPredicate:
+        ...
 
 
-class NotNull(UnboundPredicate[T]):
-    def __invert__(self) -> IsNull:
-        return IsNull(self.term)
+class BoundIsNull(BoundUnaryPredicate[T]):
+    def __new__(cls, term: BoundTerm[T]):
+        if term.ref().field.required:
+            return AlwaysFalse()
+        return super().__new__(cls)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("NotNull is a unary predicate and takes no Literals.")
+    def __invert__(self) -> BoundNotNull:
+        return BoundNotNull(self.term)
 
-    def bind(self, schema: Schema, case_sensitive: bool) -> BoundNotNull[T]:
-        bound_ref = self.term.bind(schema, case_sensitive)
-        return BoundNotNull(bound_ref)
 
+class BoundNotNull(BoundUnaryPredicate[T]):
+    def __new__(cls, term: BoundTerm[T]):
+        if term.ref().field.required:
+            return AlwaysTrue()
+        return super().__new__(cls)
 
-class BoundNotNull(BoundPredicate[T]):
     def __invert__(self) -> BoundIsNull:
         return BoundIsNull(self.term)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("NotNull is a unary predicate and takes no Literals.")
 
+class IsNull(UnaryPredicate[T]):
+    as_bound = BoundIsNull
 
-class IsNaN(UnboundPredicate[T]):
-    def __invert__(self) -> NotNaN:
-        return NotNaN(self.term)
+    def __invert__(self) -> NotNull:
+        return NotNull(self.term)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("IsNaN is a unary predicate and takes no Literals.")
 
-    def bind(self, schema: Schema, case_sensitive: bool) -> BoundIsNaN[T]:
-        bound_ref = self.term.bind(schema, case_sensitive)
-        return BoundIsNaN(bound_ref)
+class NotNull(UnaryPredicate[T]):
+    as_bound = BoundNotNull
+
+    def __invert__(self) -> IsNull:
+        return IsNull(self.term)
+
 
+class BoundIsNaN(BoundUnaryPredicate[T]):
+    def __new__(cls, term: BoundTerm[T]):
+        bound_type = term.ref().field.field_type
+        if type(bound_type) in {FloatType, DoubleType}:
+            return super().__new__(cls)
+        return AlwaysFalse()
 
-class BoundIsNaN(BoundPredicate[T]):
     def __invert__(self) -> BoundNotNaN:
         return BoundNotNaN(self.term)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("IsNaN is a unary predicate and takes no Literals.")
+
+class BoundNotNaN(BoundUnaryPredicate[T]):
+    def __new__(cls, term: BoundTerm[T]):
+        bound_type = term.ref().field.field_type
+        if type(bound_type) in {FloatType, DoubleType}:
+            return super().__new__(cls)
+        return AlwaysTrue()
+
+    def __invert__(self) -> BoundIsNaN:
+        return BoundIsNaN(self.term)
 
 
-class NotNaN(UnboundPredicate[T]):
+class IsNaN(UnaryPredicate[T]):
+    as_bound = BoundIsNaN
+
+    def __invert__(self) -> NotNaN:
+        return NotNaN(self.term)
+
+
+class NotNaN(UnaryPredicate[T]):
+    as_bound = BoundNotNaN
+
     def __invert__(self) -> IsNaN:
         return IsNaN(self.term)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("NotNaN is a unary predicate and takes no Literals.")
 
-    def bind(self, schema: Schema, case_sensitive: bool) -> BoundNotNaN[T]:
-        bound_ref = self.term.bind(schema, case_sensitive)
-        return BoundNotNaN(bound_ref)
+@dataclass(frozen=True)
+class SetPredicate(Unbound[T, BooleanExpression], BooleanExpression, ABC):
+    as_bound: ClassVar[type]
+    term: UnboundTerm[T]
+    literals: tuple[Literal[T], ...]
+
+    @abstractmethod
+    def __invert__(self) -> BooleanExpression:
+        ...
 
+    def bind(self, schema: Schema, case_sensitive: bool = True) -> BooleanExpression:
+        bound_term = self.term.bind(schema, case_sensitive)
+        return self.as_bound(bound_term, {lit.to(bound_term.ref().field.field_type) for lit in self.literals})
 
-class BoundNotNaN(BoundPredicate[T]):
-    def __invert__(self) -> BoundIsNaN:
-        return BoundIsNaN(self.term)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("NotNaN is a unary predicate and takes no Literals.")
+@dataclass(frozen=True)
+class BoundSetPredicate(Bound[T], BooleanExpression, ABC):
+    term: BoundTerm[T]
+    literals: set[Literal[T]]
 
+    @abstractmethod
+    def __invert__(self) -> BooleanExpression:
+        ...
 
-class BoundIn(BoundPredicate[T]):
-    def _validate_literals(self):  # pylint: disable=W0238
-        if not self.literals:
-            raise AttributeError("BoundIn must contain at least 1 literal.")
 
-    def __invert__(self) -> BoundNotIn[T]:
-        return BoundNotIn(self.term, *self.literals)
+class BoundIn(BoundSetPredicate[T]):
+    def __new__(cls, term: BoundTerm[T], literals: set[Literal[T]]) -> BooleanExpression:
+        count = len(literals)
+        if count == 0:
+            return AlwaysFalse()
+        if count == 1:
+            return BoundEq(term, literals.pop())
+        else:
+            return super().__new__(cls)
 
+    def __invert__(self) -> BooleanExpression:
+        return BoundNotIn(self.term, self.literals)
 
-class In(UnboundPredicate[T]):
-    def _validate_literals(self):  # pylint: disable=W0238
-        if not self.literals:
-            raise AttributeError("In must contain at least 1 literal.")
 
-    def __invert__(self) -> NotIn[T]:
-        return NotIn(self.term, *self.literals)
+class BoundNotIn(BoundSetPredicate[T]):
+    def __new__(cls, term: BoundTerm[T], literals: set[Literal[T]]) -> BooleanExpression:
+        count = len(literals)
+        if count == 0:
+            return AlwaysTrue()
+        if count == 1:

Review Comment:
   ```suggestion
           elif count == 1:
   ```



##########
python/pyiceberg/expressions/base.py:
##########
@@ -342,151 +313,197 @@ def __str__(self) -> str:
 
 
 @dataclass(frozen=True)
-class AlwaysTrue(BooleanExpression, ABC, Singleton):
+class AlwaysTrue(BooleanExpression, Singleton):
     """TRUE expression"""
 
     def __invert__(self) -> AlwaysFalse:
         return AlwaysFalse()
 
 
 @dataclass(frozen=True)
-class AlwaysFalse(BooleanExpression, ABC, Singleton):
+class AlwaysFalse(BooleanExpression, Singleton):
     """FALSE expression"""
 
     def __invert__(self) -> AlwaysTrue:
         return AlwaysTrue()
 
 
-class IsNull(UnboundPredicate[T]):
-    def __invert__(self) -> NotNull:
-        return NotNull(self.term)
+@dataclass(frozen=True)
+class UnaryPredicate(Unbound[T, BooleanExpression], BooleanExpression, ABC):
+    as_bound: ClassVar[type]
+    term: UnboundTerm[T]
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals is not None:
-            raise AttributeError("Null is a unary predicate and takes no Literals.")
+    @abstractmethod
+    def __invert__(self) -> UnaryPredicate:
+        ...
 
-    def bind(self, schema: Schema, case_sensitive: bool) -> BoundIsNull[T]:
-        bound_ref = self.term.bind(schema, case_sensitive)
-        return BoundIsNull(bound_ref)
+    def bind(self, schema: Schema, case_sensitive: bool = True) -> BooleanExpression:
+        bound_term = self.term.bind(schema, case_sensitive)
+        return self.as_bound(bound_term)
 
 
-class BoundIsNull(BoundPredicate[T]):
-    def __invert__(self) -> BoundNotNull:
-        return BoundNotNull(self.term)
+@dataclass(frozen=True)
+class BoundUnaryPredicate(Bound[T], BooleanExpression, ABC):
+    term: BoundTerm[T]
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("Null is a unary predicate and takes no Literals.")
+    @abstractmethod
+    def __invert__(self) -> BoundUnaryPredicate:
+        ...
 
 
-class NotNull(UnboundPredicate[T]):
-    def __invert__(self) -> IsNull:
-        return IsNull(self.term)
+class BoundIsNull(BoundUnaryPredicate[T]):
+    def __new__(cls, term: BoundTerm[T]):
+        if term.ref().field.required:
+            return AlwaysFalse()
+        return super().__new__(cls)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("NotNull is a unary predicate and takes no Literals.")
+    def __invert__(self) -> BoundNotNull:
+        return BoundNotNull(self.term)
 
-    def bind(self, schema: Schema, case_sensitive: bool) -> BoundNotNull[T]:
-        bound_ref = self.term.bind(schema, case_sensitive)
-        return BoundNotNull(bound_ref)
 
+class BoundNotNull(BoundUnaryPredicate[T]):
+    def __new__(cls, term: BoundTerm[T]):
+        if term.ref().field.required:
+            return AlwaysTrue()
+        return super().__new__(cls)
 
-class BoundNotNull(BoundPredicate[T]):
     def __invert__(self) -> BoundIsNull:
         return BoundIsNull(self.term)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("NotNull is a unary predicate and takes no Literals.")
 
+class IsNull(UnaryPredicate[T]):
+    as_bound = BoundIsNull
 
-class IsNaN(UnboundPredicate[T]):
-    def __invert__(self) -> NotNaN:
-        return NotNaN(self.term)
+    def __invert__(self) -> NotNull:
+        return NotNull(self.term)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("IsNaN is a unary predicate and takes no Literals.")
 
-    def bind(self, schema: Schema, case_sensitive: bool) -> BoundIsNaN[T]:
-        bound_ref = self.term.bind(schema, case_sensitive)
-        return BoundIsNaN(bound_ref)
+class NotNull(UnaryPredicate[T]):
+    as_bound = BoundNotNull
+
+    def __invert__(self) -> IsNull:
+        return IsNull(self.term)
+
 
+class BoundIsNaN(BoundUnaryPredicate[T]):
+    def __new__(cls, term: BoundTerm[T]):
+        bound_type = term.ref().field.field_type
+        if type(bound_type) in {FloatType, DoubleType}:
+            return super().__new__(cls)
+        return AlwaysFalse()
 
-class BoundIsNaN(BoundPredicate[T]):
     def __invert__(self) -> BoundNotNaN:
         return BoundNotNaN(self.term)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("IsNaN is a unary predicate and takes no Literals.")
+
+class BoundNotNaN(BoundUnaryPredicate[T]):
+    def __new__(cls, term: BoundTerm[T]):
+        bound_type = term.ref().field.field_type
+        if type(bound_type) in {FloatType, DoubleType}:
+            return super().__new__(cls)
+        return AlwaysTrue()
+
+    def __invert__(self) -> BoundIsNaN:
+        return BoundIsNaN(self.term)
 
 
-class NotNaN(UnboundPredicate[T]):
+class IsNaN(UnaryPredicate[T]):
+    as_bound = BoundIsNaN
+
+    def __invert__(self) -> NotNaN:
+        return NotNaN(self.term)
+
+
+class NotNaN(UnaryPredicate[T]):
+    as_bound = BoundNotNaN
+
     def __invert__(self) -> IsNaN:
         return IsNaN(self.term)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("NotNaN is a unary predicate and takes no Literals.")
 
-    def bind(self, schema: Schema, case_sensitive: bool) -> BoundNotNaN[T]:
-        bound_ref = self.term.bind(schema, case_sensitive)
-        return BoundNotNaN(bound_ref)
+@dataclass(frozen=True)
+class SetPredicate(Unbound[T, BooleanExpression], BooleanExpression, ABC):
+    as_bound: ClassVar[type]
+    term: UnboundTerm[T]
+    literals: tuple[Literal[T], ...]
+
+    @abstractmethod
+    def __invert__(self) -> BooleanExpression:
+        ...
 
+    def bind(self, schema: Schema, case_sensitive: bool = True) -> BooleanExpression:
+        bound_term = self.term.bind(schema, case_sensitive)
+        return self.as_bound(bound_term, {lit.to(bound_term.ref().field.field_type) for lit in self.literals})
 
-class BoundNotNaN(BoundPredicate[T]):
-    def __invert__(self) -> BoundIsNaN:
-        return BoundIsNaN(self.term)
 
-    def _validate_literals(self):  # pylint: disable=W0238
-        if self.literals:
-            raise AttributeError("NotNaN is a unary predicate and takes no Literals.")
+@dataclass(frozen=True)
+class BoundSetPredicate(Bound[T], BooleanExpression, ABC):
+    term: BoundTerm[T]
+    literals: set[Literal[T]]
 
+    @abstractmethod
+    def __invert__(self) -> BooleanExpression:
+        ...
 
-class BoundIn(BoundPredicate[T]):
-    def _validate_literals(self):  # pylint: disable=W0238
-        if not self.literals:
-            raise AttributeError("BoundIn must contain at least 1 literal.")
 
-    def __invert__(self) -> BoundNotIn[T]:
-        return BoundNotIn(self.term, *self.literals)
+class BoundIn(BoundSetPredicate[T]):
+    def __new__(cls, term: BoundTerm[T], literals: set[Literal[T]]) -> BooleanExpression:
+        count = len(literals)
+        if count == 0:
+            return AlwaysFalse()
+        if count == 1:
+            return BoundEq(term, literals.pop())
+        else:
+            return super().__new__(cls)
 
+    def __invert__(self) -> BooleanExpression:
+        return BoundNotIn(self.term, self.literals)
 
-class In(UnboundPredicate[T]):
-    def _validate_literals(self):  # pylint: disable=W0238
-        if not self.literals:
-            raise AttributeError("In must contain at least 1 literal.")
 
-    def __invert__(self) -> NotIn[T]:
-        return NotIn(self.term, *self.literals)
+class BoundNotIn(BoundSetPredicate[T]):
+    def __new__(cls, term: BoundTerm[T], literals: set[Literal[T]]) -> BooleanExpression:
+        count = len(literals)
+        if count == 0:
+            return AlwaysTrue()
+        if count == 1:
+            return BoundNotEq(term, literals.pop())
+        else:
+            return super().__new__(cls)
 
-    def bind(self, schema: Schema, case_sensitive: bool) -> BoundIn[T]:
-        bound_ref = self.term.bind(schema, case_sensitive)
-        return BoundIn(bound_ref, *tuple(lit.to(bound_ref.field.field_type) for lit in self.literals))  # type: ignore
+    def __invert__(self) -> BooleanExpression:
+        return BoundIn(self.term, self.literals)
 
 
-class BoundNotIn(BoundPredicate[T]):
-    def _validate_literals(self):  # pylint: disable=W0238
-        if not self.literals:
-            raise AttributeError("BoundNotIn must contain at least 1 literal.")
+class In(SetPredicate[T]):
+    as_bound = BoundIn
 
-    def __invert__(self) -> BoundIn[T]:
-        return BoundIn(self.term, *self.literals)
+    def __new__(cls, term: UnboundTerm[T], literals: tuple[Literal[T], ...]) -> BooleanExpression:
+        count = len(literals)
+        if count == 0:
+            return AlwaysFalse()
+        if count == 1:
+            return Eq(term, literals[0])
+        else:
+            return super().__new__(cls)
 
+    def __invert__(self) -> BooleanExpression:
+        return NotIn(self.term, self.literals)
 
-class NotIn(UnboundPredicate[T]):
-    def _validate_literals(self):  # pylint: disable=W0238
-        if not self.literals:
-            raise AttributeError("NotIn must contain at least 1 literal.")
 
-    def __invert__(self) -> In[T]:
-        return In(self.term, *self.literals)
+class NotIn(SetPredicate[T]):
+    as_bound = BoundNotIn
 
-    def bind(self, schema: Schema, case_sensitive: bool) -> BoundNotIn[T]:
-        bound_ref = self.term.bind(schema, case_sensitive)
-        return BoundNotIn(bound_ref, *tuple(lit.to(bound_ref.field.field_type) for lit in self.literals))  # type: ignore
+    def __new__(cls, term: UnboundTerm[T], literals: tuple[Literal[T], ...]) -> BooleanExpression:

Review Comment:
   ```suggestion
       def __new__(cls, term: UnboundTerm[T], literals: *Literal[T]) -> BooleanExpression:
   ```
   See the comment at `class In(SetPredicate[T]):`



##########
python/pyiceberg/expressions/base.py:
##########
@@ -19,58 +19,22 @@
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
 from functools import reduce, singledispatch
-from typing import Generic, TypeVar
+from typing import (
+    ClassVar,
+    Generic,
+    Literal,

Review Comment:
   I think we want to import our own literal from:
   ```python
   from pyiceberg.expressions.literals import Literal
   ```
   instead of the one from typing.
   



##########
python/pyiceberg/expressions/base.py:
##########
@@ -342,151 +313,197 @@ def __str__(self) -> str:
 
 
 @dataclass(frozen=True)
-class AlwaysTrue(BooleanExpression, ABC, Singleton):
+class AlwaysTrue(BooleanExpression, Singleton):
     """TRUE expression"""
 
     def __invert__(self) -> AlwaysFalse:
         return AlwaysFalse()
 
 
 @dataclass(frozen=True)
-class AlwaysFalse(BooleanExpression, ABC, Singleton):
+class AlwaysFalse(BooleanExpression, Singleton):
     """FALSE expression"""
 
     def __invert__(self) -> AlwaysTrue:
         return AlwaysTrue()
 
 
-class IsNull(UnboundPredicate[T]):
-    def __invert__(self) -> NotNull:
-        return NotNull(self.term)
+@dataclass(frozen=True)

Review Comment:
   An abstract class should not be a dataclass as well, since it cannot be instantiated, it does not carry any data.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@iceberg.apache.org
For additional commands, e-mail: issues-help@iceberg.apache.org