You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@iceberg.apache.org by GitBox <gi...@apache.org> on 2022/01/20 06:38:17 UTC

[GitHub] [iceberg] TGooch44 commented on a change in pull request #2115: [python] Completing ResidualEvaluator Implementation

TGooch44 commented on a change in pull request #2115:
URL: https://github.com/apache/iceberg/pull/2115#discussion_r788397380



##########
File path: python/iceberg/api/expressions/residual_evaluator.py
##########
@@ -15,98 +15,158 @@
 # specific language governing permissions and limitations
 # under the License.
 
+from .expression import Expression
 from .expressions import Expressions, ExpressionVisitors
+from .literals import Literal
 from .predicate import BoundPredicate, Predicate, UnboundPredicate
+from .reference import BoundReference
+from ..partition_spec import PartitionSpec
+from ..struct_like import StructLike
 
 
 class ResidualEvaluator(object):
+    """
+    Finds the residuals for an {@link Expression} the partitions in the given {@link PartitionSpec}.
 
-    def __init__(self, spec, expr):
+    A residual expression is made by partially evaluating an expression using partition values. For
+    example, if a table is partitioned by day(utc_timestamp) and is read with a filter expression
+    utc_timestamp &gt;= a and utc_timestamp &lt;= b, then there are 4 possible residuals expressions
+    for the partition data, d:
+
+
+        + If d > day(a) and d < day(b), the residual is always true
+        + If d == day(a) and d != day(b), the residual is utc_timestamp >= b
+        + if d == day(b) and d != day(a), the residual is utc_timestamp <= b
+        + If d == day(a) == day(b), the residual is utc_timestamp >= a and utc_timestamp <= b
+
+    Partition data is passed using StructLike. Residuals are returned by residualFor(StructLike).
+
+    """
+    @staticmethod
+    def unpartitioned(expr: Expression) -> 'UnpartitionedEvaluator':
+        return UnpartitionedEvaluator(expr)
+
+    @staticmethod
+    def of(spec: PartitionSpec, expr: Expression, case_sensitive=True) -> 'ResidualEvaluator':
+        if len(spec.fields) > 0:
+            return ResidualEvaluator(spec, expr, case_sensitive=case_sensitive)
+        else:
+            return ResidualEvaluator.unpartitioned(expr)
+
+    def __init__(self, spec, expr, case_sensitive=True):
         self._spec = spec
         self._expr = expr
+        self._case_sensitive = case_sensitive
         self.__visitor = None
 
-    def _visitor(self):
+    def _visitor(self) -> 'ResidualVisitor':
         if self.__visitor is None:
-            self.__visitor = ResidualVisitor()
+            self.__visitor = ResidualVisitor(self._spec,
+                                             self._expr,
+                                             self._case_sensitive)
 
         return self.__visitor
 
-    def residual_for(self, partition_data):
+    def residual_for(self, partition_data: StructLike) -> Expression:
+        """
+        Returns a residual expression for the given partition values.
+
+        Parameters
+        ----------
+        partition_data: StructLike
+            partition data values
+
+        Returns
+        -------
+        Expression
+            the residual of this evaluator's expression from the partition values
+        """
         return self._visitor().eval(partition_data)
 
 
 class ResidualVisitor(ExpressionVisitors.BoundExpressionVisitor):
 
-    def __init__(self):
+    def __init__(self, spec, expr, case_sensitive=True):

Review comment:
       Hi Micah,  I'm going to go ahead and close out this PR since the focus has moved on to the redesigned library and this PR is on the legacy implementation, which if I understand correctly is only going to be bug fixes going forward and once there is a viable alternative in the new design, this will be fully deprecated. Sorry for any confusion here.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@iceberg.apache.org
For additional commands, e-mail: issues-help@iceberg.apache.org