You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@iceberg.apache.org by GitBox <gi...@apache.org> on 2022/06/23 05:52:36 UTC

[GitHub] [iceberg] aokolnychyi commented on a diff in pull request #4847: Core: Add reference_snapshot_id column and filter to AllManifest table

aokolnychyi commented on code in PR #4847:
URL: https://github.com/apache/iceberg/pull/4847#discussion_r904520180


##########
core/src/main/java/org/apache/iceberg/AllManifestsTable.java:
##########
@@ -204,4 +228,178 @@ public Iterable<FileScanTask> split(long splitSize) {
       return ImmutableList.of(this); // don't split
     }
   }
+
+  static StaticDataTask.Row manifestFileToRow(PartitionSpec spec, ManifestFile manifest, long referencedSnapshotId) {

Review Comment:
   nit: should it be `referenceSnapshotId` to match the name of the field?



##########
core/src/main/java/org/apache/iceberg/AllManifestsTable.java:
##########
@@ -204,4 +228,178 @@ public Iterable<FileScanTask> split(long splitSize) {
       return ImmutableList.of(this); // don't split
     }
   }
+
+  static StaticDataTask.Row manifestFileToRow(PartitionSpec spec, ManifestFile manifest, long referencedSnapshotId) {
+    return StaticDataTask.Row.of(
+        manifest.content().id(),
+        manifest.path(),
+        manifest.length(),
+        manifest.partitionSpecId(),
+        manifest.snapshotId(),
+        manifest.content() == ManifestContent.DATA ? manifest.addedFilesCount() : 0,
+        manifest.content() == ManifestContent.DATA ? manifest.existingFilesCount() : 0,
+        manifest.content() == ManifestContent.DATA ? manifest.deletedFilesCount() : 0,
+        manifest.content() == ManifestContent.DELETES ? manifest.addedFilesCount() : 0,
+        manifest.content() == ManifestContent.DELETES ? manifest.existingFilesCount() : 0,
+        manifest.content() == ManifestContent.DELETES ? manifest.deletedFilesCount() : 0,
+        ManifestsTable.partitionSummariesToRows(spec, manifest.partitions()),
+        referencedSnapshotId
+    );
+  }
+
+  private static class SnapshotEvaluator extends BoundExpressionVisitor<Boolean> {
+
+    private Expression boundExpr;
+    private Long snapshotId;
+
+    private static final boolean ROWS_MIGHT_MATCH = true;
+    private static final boolean ROWS_CANNOT_MATCH = false;
+
+    private SnapshotEvaluator(Expression expr, Snapshot snap, Types.StructType structType, boolean caseSensitive) {
+      this.boundExpr = Binder.bind(structType, expr, caseSensitive);
+      this.snapshotId = snap.snapshotId();
+    }
+
+    private boolean eval() {
+      return ExpressionVisitors.visitEvaluator(boundExpr, this);
+    }
+
+    @Override
+    public Boolean alwaysTrue() {
+      return ROWS_MIGHT_MATCH;
+    }
+
+    @Override
+    public Boolean alwaysFalse() {
+      return ROWS_CANNOT_MATCH;
+    }
+
+    @Override
+    public Boolean not(Boolean result) {
+      return !result;
+    }
+
+    @Override
+    public Boolean and(Boolean leftResult, Boolean rightResult) {
+      return leftResult && rightResult;
+    }
+
+    @Override
+    public Boolean or(Boolean leftResult, Boolean rightResult) {
+      return leftResult || rightResult;
+    }
+
+    @Override
+    public <T> Boolean isNull(BoundReference<T> ref) {
+      return ROWS_MIGHT_MATCH;
+    }
+
+    @Override
+    public <T> Boolean notNull(BoundReference<T> ref) {
+      return ROWS_MIGHT_MATCH;
+    }
+
+    @Override
+    public <T> Boolean isNaN(BoundReference<T> ref) {
+      if (snapshotIdRef(ref)) {
+        return ROWS_CANNOT_MATCH; // reference_snapshot_id column is never nan
+      } else {
+        return ROWS_MIGHT_MATCH;
+      }
+    }
+
+    @Override
+    public <T> Boolean notNaN(BoundReference<T> ref) {
+      return ROWS_MIGHT_MATCH;
+    }
+
+    @Override
+    public <T> Boolean lt(BoundReference<T> ref, Literal<T> lit) {
+      return compare(ref, lit, compareResult -> compareResult < 0);
+    }
+
+    @Override
+    public <T> Boolean ltEq(BoundReference<T> ref, Literal<T> lit) {
+      return compare(ref, lit, compareResult -> compareResult <= 0);
+    }
+
+    @Override
+    public <T> Boolean gt(BoundReference<T> ref, Literal<T> lit) {
+      return compare(ref, lit, compareResult -> compareResult > 0);
+    }
+
+    @Override
+    public <T> Boolean gtEq(BoundReference<T> ref, Literal<T> lit) {
+      return compare(ref, lit, compareResult -> compareResult >= 0);
+    }
+
+    @Override
+    public <T> Boolean eq(BoundReference<T> ref, Literal<T> lit) {
+      return compare(ref, lit, compareResult -> compareResult == 0);
+    }
+
+    @Override
+    public <T> Boolean notEq(BoundReference<T> ref, Literal<T> lit) {
+      return compare(ref, lit, compareResult -> compareResult != 0);
+    }
+
+    @Override
+    public <T> Boolean in(BoundReference<T> ref, Set<T> literalSet) {
+      Comparator<Object> longComparator = Comparators.forType(Types.LongType.get());
+      if (snapshotIdRef(ref)) {
+        boolean in = literalSet.stream().anyMatch(lit -> longComparator.compare(snapshotId, lit) == 0);
+        if (!in) {
+          return ROWS_CANNOT_MATCH;
+        }
+      }
+      return ROWS_MIGHT_MATCH;
+    }
+
+    @Override
+    public <T> Boolean notIn(BoundReference<T> ref, Set<T> literalSet) {
+      Comparator<Object> longComparator = Comparators.forType(Types.LongType.get());
+      if (snapshotIdRef(ref)) {
+        boolean notIn = literalSet.stream().noneMatch(lit -> longComparator.compare(snapshotId, lit) == 0);
+        if (!notIn) {
+          return ROWS_CANNOT_MATCH;
+        }
+      }
+      return ROWS_MIGHT_MATCH;
+    }
+
+    @Override
+    public <T> Boolean startsWith(BoundReference<T> ref, Literal<T> lit) {
+      return ROWS_MIGHT_MATCH;
+    }
+
+    @Override
+    public <T> Boolean notStartsWith(BoundReference<T> ref, Literal<T> lit) {
+      return ROWS_MIGHT_MATCH;
+    }
+
+    /**
+     * Comparison of snapshot reference and literal
+     * @param ref bound reference, comparison attempted only if reference is for reference_snapshot_id

Review Comment:
   nit: I'd add an empty line before params



##########
core/src/main/java/org/apache/iceberg/AllManifestsTable.java:
##########
@@ -204,4 +228,178 @@ public Iterable<FileScanTask> split(long splitSize) {
       return ImmutableList.of(this); // don't split
     }
   }
+
+  static StaticDataTask.Row manifestFileToRow(PartitionSpec spec, ManifestFile manifest, long referencedSnapshotId) {
+    return StaticDataTask.Row.of(
+        manifest.content().id(),
+        manifest.path(),
+        manifest.length(),
+        manifest.partitionSpecId(),
+        manifest.snapshotId(),
+        manifest.content() == ManifestContent.DATA ? manifest.addedFilesCount() : 0,
+        manifest.content() == ManifestContent.DATA ? manifest.existingFilesCount() : 0,
+        manifest.content() == ManifestContent.DATA ? manifest.deletedFilesCount() : 0,
+        manifest.content() == ManifestContent.DELETES ? manifest.addedFilesCount() : 0,
+        manifest.content() == ManifestContent.DELETES ? manifest.existingFilesCount() : 0,
+        manifest.content() == ManifestContent.DELETES ? manifest.deletedFilesCount() : 0,
+        ManifestsTable.partitionSummariesToRows(spec, manifest.partitions()),
+        referencedSnapshotId
+    );
+  }
+
+  private static class SnapshotEvaluator extends BoundExpressionVisitor<Boolean> {
+
+    private Expression boundExpr;
+    private Long snapshotId;
+
+    private static final boolean ROWS_MIGHT_MATCH = true;
+    private static final boolean ROWS_CANNOT_MATCH = false;
+
+    private SnapshotEvaluator(Expression expr, Snapshot snap, Types.StructType structType, boolean caseSensitive) {
+      this.boundExpr = Binder.bind(structType, expr, caseSensitive);
+      this.snapshotId = snap.snapshotId();
+    }
+
+    private boolean eval() {
+      return ExpressionVisitors.visitEvaluator(boundExpr, this);
+    }
+
+    @Override
+    public Boolean alwaysTrue() {
+      return ROWS_MIGHT_MATCH;
+    }
+
+    @Override
+    public Boolean alwaysFalse() {
+      return ROWS_CANNOT_MATCH;
+    }
+
+    @Override
+    public Boolean not(Boolean result) {
+      return !result;
+    }
+
+    @Override
+    public Boolean and(Boolean leftResult, Boolean rightResult) {
+      return leftResult && rightResult;
+    }
+
+    @Override
+    public Boolean or(Boolean leftResult, Boolean rightResult) {
+      return leftResult || rightResult;
+    }
+
+    @Override
+    public <T> Boolean isNull(BoundReference<T> ref) {
+      return ROWS_MIGHT_MATCH;
+    }
+
+    @Override
+    public <T> Boolean notNull(BoundReference<T> ref) {
+      return ROWS_MIGHT_MATCH;
+    }
+
+    @Override
+    public <T> Boolean isNaN(BoundReference<T> ref) {
+      if (snapshotIdRef(ref)) {
+        return ROWS_CANNOT_MATCH; // reference_snapshot_id column is never nan
+      } else {
+        return ROWS_MIGHT_MATCH;
+      }
+    }
+
+    @Override
+    public <T> Boolean notNaN(BoundReference<T> ref) {
+      return ROWS_MIGHT_MATCH;
+    }
+
+    @Override
+    public <T> Boolean lt(BoundReference<T> ref, Literal<T> lit) {
+      return compare(ref, lit, compareResult -> compareResult < 0);
+    }
+
+    @Override
+    public <T> Boolean ltEq(BoundReference<T> ref, Literal<T> lit) {
+      return compare(ref, lit, compareResult -> compareResult <= 0);
+    }
+
+    @Override
+    public <T> Boolean gt(BoundReference<T> ref, Literal<T> lit) {
+      return compare(ref, lit, compareResult -> compareResult > 0);
+    }
+
+    @Override
+    public <T> Boolean gtEq(BoundReference<T> ref, Literal<T> lit) {
+      return compare(ref, lit, compareResult -> compareResult >= 0);
+    }
+
+    @Override
+    public <T> Boolean eq(BoundReference<T> ref, Literal<T> lit) {
+      return compare(ref, lit, compareResult -> compareResult == 0);
+    }
+
+    @Override
+    public <T> Boolean notEq(BoundReference<T> ref, Literal<T> lit) {
+      return compare(ref, lit, compareResult -> compareResult != 0);
+    }
+
+    @Override
+    public <T> Boolean in(BoundReference<T> ref, Set<T> literalSet) {
+      Comparator<Object> longComparator = Comparators.forType(Types.LongType.get());

Review Comment:
   nit: Can we move this line into the if block not to instantiate it all the time?



##########
core/src/main/java/org/apache/iceberg/AllManifestsTable.java:
##########
@@ -204,4 +228,178 @@ public Iterable<FileScanTask> split(long splitSize) {
       return ImmutableList.of(this); // don't split
     }
   }
+
+  static StaticDataTask.Row manifestFileToRow(PartitionSpec spec, ManifestFile manifest, long referencedSnapshotId) {
+    return StaticDataTask.Row.of(
+        manifest.content().id(),
+        manifest.path(),
+        manifest.length(),
+        manifest.partitionSpecId(),
+        manifest.snapshotId(),
+        manifest.content() == ManifestContent.DATA ? manifest.addedFilesCount() : 0,
+        manifest.content() == ManifestContent.DATA ? manifest.existingFilesCount() : 0,
+        manifest.content() == ManifestContent.DATA ? manifest.deletedFilesCount() : 0,
+        manifest.content() == ManifestContent.DELETES ? manifest.addedFilesCount() : 0,
+        manifest.content() == ManifestContent.DELETES ? manifest.existingFilesCount() : 0,
+        manifest.content() == ManifestContent.DELETES ? manifest.deletedFilesCount() : 0,
+        ManifestsTable.partitionSummariesToRows(spec, manifest.partitions()),
+        referencedSnapshotId
+    );
+  }
+
+  private static class SnapshotEvaluator extends BoundExpressionVisitor<Boolean> {
+
+    private Expression boundExpr;
+    private Long snapshotId;
+
+    private static final boolean ROWS_MIGHT_MATCH = true;
+    private static final boolean ROWS_CANNOT_MATCH = false;
+
+    private SnapshotEvaluator(Expression expr, Snapshot snap, Types.StructType structType, boolean caseSensitive) {
+      this.boundExpr = Binder.bind(structType, expr, caseSensitive);
+      this.snapshotId = snap.snapshotId();
+    }
+
+    private boolean eval() {
+      return ExpressionVisitors.visitEvaluator(boundExpr, this);
+    }
+
+    @Override
+    public Boolean alwaysTrue() {
+      return ROWS_MIGHT_MATCH;
+    }
+
+    @Override
+    public Boolean alwaysFalse() {
+      return ROWS_CANNOT_MATCH;
+    }
+
+    @Override
+    public Boolean not(Boolean result) {
+      return !result;
+    }
+
+    @Override
+    public Boolean and(Boolean leftResult, Boolean rightResult) {
+      return leftResult && rightResult;
+    }
+
+    @Override
+    public Boolean or(Boolean leftResult, Boolean rightResult) {
+      return leftResult || rightResult;
+    }
+
+    @Override
+    public <T> Boolean isNull(BoundReference<T> ref) {
+      return ROWS_MIGHT_MATCH;
+    }
+
+    @Override
+    public <T> Boolean notNull(BoundReference<T> ref) {
+      return ROWS_MIGHT_MATCH;
+    }
+
+    @Override
+    public <T> Boolean isNaN(BoundReference<T> ref) {
+      if (snapshotIdRef(ref)) {
+        return ROWS_CANNOT_MATCH; // reference_snapshot_id column is never nan
+      } else {
+        return ROWS_MIGHT_MATCH;
+      }
+    }
+
+    @Override
+    public <T> Boolean notNaN(BoundReference<T> ref) {
+      return ROWS_MIGHT_MATCH;
+    }
+
+    @Override
+    public <T> Boolean lt(BoundReference<T> ref, Literal<T> lit) {
+      return compare(ref, lit, compareResult -> compareResult < 0);
+    }
+
+    @Override
+    public <T> Boolean ltEq(BoundReference<T> ref, Literal<T> lit) {
+      return compare(ref, lit, compareResult -> compareResult <= 0);
+    }
+
+    @Override
+    public <T> Boolean gt(BoundReference<T> ref, Literal<T> lit) {
+      return compare(ref, lit, compareResult -> compareResult > 0);
+    }
+
+    @Override
+    public <T> Boolean gtEq(BoundReference<T> ref, Literal<T> lit) {
+      return compare(ref, lit, compareResult -> compareResult >= 0);
+    }
+
+    @Override
+    public <T> Boolean eq(BoundReference<T> ref, Literal<T> lit) {
+      return compare(ref, lit, compareResult -> compareResult == 0);
+    }
+
+    @Override
+    public <T> Boolean notEq(BoundReference<T> ref, Literal<T> lit) {
+      return compare(ref, lit, compareResult -> compareResult != 0);
+    }
+
+    @Override
+    public <T> Boolean in(BoundReference<T> ref, Set<T> literalSet) {
+      Comparator<Object> longComparator = Comparators.forType(Types.LongType.get());
+      if (snapshotIdRef(ref)) {
+        boolean in = literalSet.stream().anyMatch(lit -> longComparator.compare(snapshotId, lit) == 0);
+        if (!in) {
+          return ROWS_CANNOT_MATCH;
+        }
+      }
+      return ROWS_MIGHT_MATCH;
+    }
+
+    @Override
+    public <T> Boolean notIn(BoundReference<T> ref, Set<T> literalSet) {
+      Comparator<Object> longComparator = Comparators.forType(Types.LongType.get());

Review Comment:
   nit: Same here



##########
core/src/main/java/org/apache/iceberg/AllManifestsTable.java:
##########
@@ -204,4 +228,178 @@ public Iterable<FileScanTask> split(long splitSize) {
       return ImmutableList.of(this); // don't split
     }
   }
+
+  static StaticDataTask.Row manifestFileToRow(PartitionSpec spec, ManifestFile manifest, long referencedSnapshotId) {
+    return StaticDataTask.Row.of(
+        manifest.content().id(),
+        manifest.path(),
+        manifest.length(),
+        manifest.partitionSpecId(),
+        manifest.snapshotId(),
+        manifest.content() == ManifestContent.DATA ? manifest.addedFilesCount() : 0,
+        manifest.content() == ManifestContent.DATA ? manifest.existingFilesCount() : 0,
+        manifest.content() == ManifestContent.DATA ? manifest.deletedFilesCount() : 0,
+        manifest.content() == ManifestContent.DELETES ? manifest.addedFilesCount() : 0,
+        manifest.content() == ManifestContent.DELETES ? manifest.existingFilesCount() : 0,
+        manifest.content() == ManifestContent.DELETES ? manifest.deletedFilesCount() : 0,
+        ManifestsTable.partitionSummariesToRows(spec, manifest.partitions()),
+        referencedSnapshotId
+    );
+  }
+
+  private static class SnapshotEvaluator extends BoundExpressionVisitor<Boolean> {
+
+    private Expression boundExpr;
+    private Long snapshotId;
+
+    private static final boolean ROWS_MIGHT_MATCH = true;
+    private static final boolean ROWS_CANNOT_MATCH = false;
+
+    private SnapshotEvaluator(Expression expr, Snapshot snap, Types.StructType structType, boolean caseSensitive) {
+      this.boundExpr = Binder.bind(structType, expr, caseSensitive);
+      this.snapshotId = snap.snapshotId();
+    }
+
+    private boolean eval() {
+      return ExpressionVisitors.visitEvaluator(boundExpr, this);
+    }
+
+    @Override
+    public Boolean alwaysTrue() {
+      return ROWS_MIGHT_MATCH;
+    }
+
+    @Override
+    public Boolean alwaysFalse() {
+      return ROWS_CANNOT_MATCH;
+    }
+
+    @Override
+    public Boolean not(Boolean result) {
+      return !result;
+    }
+
+    @Override
+    public Boolean and(Boolean leftResult, Boolean rightResult) {
+      return leftResult && rightResult;
+    }
+
+    @Override
+    public Boolean or(Boolean leftResult, Boolean rightResult) {
+      return leftResult || rightResult;
+    }
+
+    @Override
+    public <T> Boolean isNull(BoundReference<T> ref) {
+      return ROWS_MIGHT_MATCH;
+    }
+
+    @Override
+    public <T> Boolean notNull(BoundReference<T> ref) {
+      return ROWS_MIGHT_MATCH;
+    }
+
+    @Override
+    public <T> Boolean isNaN(BoundReference<T> ref) {
+      if (snapshotIdRef(ref)) {
+        return ROWS_CANNOT_MATCH; // reference_snapshot_id column is never nan
+      } else {
+        return ROWS_MIGHT_MATCH;
+      }
+    }
+
+    @Override
+    public <T> Boolean notNaN(BoundReference<T> ref) {
+      return ROWS_MIGHT_MATCH;
+    }
+
+    @Override
+    public <T> Boolean lt(BoundReference<T> ref, Literal<T> lit) {
+      return compare(ref, lit, compareResult -> compareResult < 0);
+    }
+
+    @Override
+    public <T> Boolean ltEq(BoundReference<T> ref, Literal<T> lit) {
+      return compare(ref, lit, compareResult -> compareResult <= 0);
+    }
+
+    @Override
+    public <T> Boolean gt(BoundReference<T> ref, Literal<T> lit) {
+      return compare(ref, lit, compareResult -> compareResult > 0);
+    }
+
+    @Override
+    public <T> Boolean gtEq(BoundReference<T> ref, Literal<T> lit) {
+      return compare(ref, lit, compareResult -> compareResult >= 0);
+    }
+
+    @Override
+    public <T> Boolean eq(BoundReference<T> ref, Literal<T> lit) {
+      return compare(ref, lit, compareResult -> compareResult == 0);
+    }
+
+    @Override
+    public <T> Boolean notEq(BoundReference<T> ref, Literal<T> lit) {
+      return compare(ref, lit, compareResult -> compareResult != 0);
+    }
+
+    @Override
+    public <T> Boolean in(BoundReference<T> ref, Set<T> literalSet) {
+      Comparator<Object> longComparator = Comparators.forType(Types.LongType.get());

Review Comment:
   `Comparator<Object>` looked suspicious at first but I think it should work.



##########
core/src/main/java/org/apache/iceberg/AllManifestsTable.java:
##########
@@ -204,4 +228,178 @@ public Iterable<FileScanTask> split(long splitSize) {
       return ImmutableList.of(this); // don't split
     }
   }
+
+  static StaticDataTask.Row manifestFileToRow(PartitionSpec spec, ManifestFile manifest, long referencedSnapshotId) {
+    return StaticDataTask.Row.of(
+        manifest.content().id(),
+        manifest.path(),
+        manifest.length(),
+        manifest.partitionSpecId(),
+        manifest.snapshotId(),
+        manifest.content() == ManifestContent.DATA ? manifest.addedFilesCount() : 0,
+        manifest.content() == ManifestContent.DATA ? manifest.existingFilesCount() : 0,
+        manifest.content() == ManifestContent.DATA ? manifest.deletedFilesCount() : 0,
+        manifest.content() == ManifestContent.DELETES ? manifest.addedFilesCount() : 0,
+        manifest.content() == ManifestContent.DELETES ? manifest.existingFilesCount() : 0,
+        manifest.content() == ManifestContent.DELETES ? manifest.deletedFilesCount() : 0,
+        ManifestsTable.partitionSummariesToRows(spec, manifest.partitions()),
+        referencedSnapshotId
+    );
+  }
+
+  private static class SnapshotEvaluator extends BoundExpressionVisitor<Boolean> {
+
+    private Expression boundExpr;
+    private Long snapshotId;
+
+    private static final boolean ROWS_MIGHT_MATCH = true;
+    private static final boolean ROWS_CANNOT_MATCH = false;
+
+    private SnapshotEvaluator(Expression expr, Snapshot snap, Types.StructType structType, boolean caseSensitive) {
+      this.boundExpr = Binder.bind(structType, expr, caseSensitive);
+      this.snapshotId = snap.snapshotId();
+    }
+
+    private boolean eval() {
+      return ExpressionVisitors.visitEvaluator(boundExpr, this);
+    }
+
+    @Override
+    public Boolean alwaysTrue() {
+      return ROWS_MIGHT_MATCH;
+    }
+
+    @Override
+    public Boolean alwaysFalse() {
+      return ROWS_CANNOT_MATCH;
+    }
+
+    @Override
+    public Boolean not(Boolean result) {
+      return !result;
+    }
+
+    @Override
+    public Boolean and(Boolean leftResult, Boolean rightResult) {
+      return leftResult && rightResult;
+    }
+
+    @Override
+    public Boolean or(Boolean leftResult, Boolean rightResult) {
+      return leftResult || rightResult;
+    }
+
+    @Override
+    public <T> Boolean isNull(BoundReference<T> ref) {
+      return ROWS_MIGHT_MATCH;
+    }
+
+    @Override
+    public <T> Boolean notNull(BoundReference<T> ref) {
+      return ROWS_MIGHT_MATCH;
+    }
+
+    @Override
+    public <T> Boolean isNaN(BoundReference<T> ref) {
+      if (snapshotIdRef(ref)) {
+        return ROWS_CANNOT_MATCH; // reference_snapshot_id column is never nan
+      } else {
+        return ROWS_MIGHT_MATCH;
+      }
+    }
+
+    @Override
+    public <T> Boolean notNaN(BoundReference<T> ref) {
+      return ROWS_MIGHT_MATCH;
+    }
+
+    @Override
+    public <T> Boolean lt(BoundReference<T> ref, Literal<T> lit) {
+      return compare(ref, lit, compareResult -> compareResult < 0);
+    }
+
+    @Override
+    public <T> Boolean ltEq(BoundReference<T> ref, Literal<T> lit) {
+      return compare(ref, lit, compareResult -> compareResult <= 0);
+    }
+
+    @Override
+    public <T> Boolean gt(BoundReference<T> ref, Literal<T> lit) {
+      return compare(ref, lit, compareResult -> compareResult > 0);
+    }
+
+    @Override
+    public <T> Boolean gtEq(BoundReference<T> ref, Literal<T> lit) {
+      return compare(ref, lit, compareResult -> compareResult >= 0);
+    }
+
+    @Override
+    public <T> Boolean eq(BoundReference<T> ref, Literal<T> lit) {
+      return compare(ref, lit, compareResult -> compareResult == 0);
+    }
+
+    @Override
+    public <T> Boolean notEq(BoundReference<T> ref, Literal<T> lit) {
+      return compare(ref, lit, compareResult -> compareResult != 0);
+    }
+
+    @Override
+    public <T> Boolean in(BoundReference<T> ref, Set<T> literalSet) {
+      Comparator<Object> longComparator = Comparators.forType(Types.LongType.get());
+      if (snapshotIdRef(ref)) {
+        boolean in = literalSet.stream().anyMatch(lit -> longComparator.compare(snapshotId, lit) == 0);
+        if (!in) {
+          return ROWS_CANNOT_MATCH;
+        }
+      }
+      return ROWS_MIGHT_MATCH;
+    }
+
+    @Override
+    public <T> Boolean notIn(BoundReference<T> ref, Set<T> literalSet) {
+      Comparator<Object> longComparator = Comparators.forType(Types.LongType.get());
+      if (snapshotIdRef(ref)) {
+        boolean notIn = literalSet.stream().noneMatch(lit -> longComparator.compare(snapshotId, lit) == 0);
+        if (!notIn) {
+          return ROWS_CANNOT_MATCH;
+        }
+      }
+      return ROWS_MIGHT_MATCH;
+    }
+
+    @Override
+    public <T> Boolean startsWith(BoundReference<T> ref, Literal<T> lit) {
+      return ROWS_MIGHT_MATCH;
+    }
+
+    @Override
+    public <T> Boolean notStartsWith(BoundReference<T> ref, Literal<T> lit) {
+      return ROWS_MIGHT_MATCH;
+    }
+
+    /**
+     * Comparison of snapshot reference and literal
+     * @param ref bound reference, comparison attempted only if reference is for reference_snapshot_id
+     * @param lit literal value to compare with snapshot id.
+     * @param compareResult function to apply to comparison result, which returns true if rows might match
+     * @return boolean true if rows might match, false if cannot match
+     */
+    private <T> Boolean compare(BoundReference<T> ref,
+                                Literal<T> lit,
+                                Function<Integer, Boolean> compareResult) {

Review Comment:
   nit: is there a better name than compareResult for this function?



##########
core/src/main/java/org/apache/iceberg/AllManifestsTable.java:
##########
@@ -204,4 +228,178 @@ public Iterable<FileScanTask> split(long splitSize) {
       return ImmutableList.of(this); // don't split
     }
   }
+
+  static StaticDataTask.Row manifestFileToRow(PartitionSpec spec, ManifestFile manifest, long referencedSnapshotId) {
+    return StaticDataTask.Row.of(
+        manifest.content().id(),
+        manifest.path(),
+        manifest.length(),
+        manifest.partitionSpecId(),
+        manifest.snapshotId(),
+        manifest.content() == ManifestContent.DATA ? manifest.addedFilesCount() : 0,
+        manifest.content() == ManifestContent.DATA ? manifest.existingFilesCount() : 0,
+        manifest.content() == ManifestContent.DATA ? manifest.deletedFilesCount() : 0,
+        manifest.content() == ManifestContent.DELETES ? manifest.addedFilesCount() : 0,
+        manifest.content() == ManifestContent.DELETES ? manifest.existingFilesCount() : 0,
+        manifest.content() == ManifestContent.DELETES ? manifest.deletedFilesCount() : 0,
+        ManifestsTable.partitionSummariesToRows(spec, manifest.partitions()),
+        referencedSnapshotId
+    );
+  }
+
+  private static class SnapshotEvaluator extends BoundExpressionVisitor<Boolean> {
+
+    private Expression boundExpr;
+    private Long snapshotId;
+
+    private static final boolean ROWS_MIGHT_MATCH = true;
+    private static final boolean ROWS_CANNOT_MATCH = false;
+
+    private SnapshotEvaluator(Expression expr, Snapshot snap, Types.StructType structType, boolean caseSensitive) {
+      this.boundExpr = Binder.bind(structType, expr, caseSensitive);
+      this.snapshotId = snap.snapshotId();
+    }
+
+    private boolean eval() {
+      return ExpressionVisitors.visitEvaluator(boundExpr, this);
+    }
+
+    @Override
+    public Boolean alwaysTrue() {
+      return ROWS_MIGHT_MATCH;
+    }
+
+    @Override
+    public Boolean alwaysFalse() {
+      return ROWS_CANNOT_MATCH;
+    }
+
+    @Override
+    public Boolean not(Boolean result) {
+      return !result;
+    }
+
+    @Override
+    public Boolean and(Boolean leftResult, Boolean rightResult) {
+      return leftResult && rightResult;
+    }
+
+    @Override
+    public Boolean or(Boolean leftResult, Boolean rightResult) {
+      return leftResult || rightResult;
+    }
+
+    @Override
+    public <T> Boolean isNull(BoundReference<T> ref) {
+      return ROWS_MIGHT_MATCH;
+    }
+
+    @Override
+    public <T> Boolean notNull(BoundReference<T> ref) {
+      return ROWS_MIGHT_MATCH;
+    }
+
+    @Override
+    public <T> Boolean isNaN(BoundReference<T> ref) {
+      if (snapshotIdRef(ref)) {
+        return ROWS_CANNOT_MATCH; // reference_snapshot_id column is never nan
+      } else {
+        return ROWS_MIGHT_MATCH;
+      }
+    }
+
+    @Override
+    public <T> Boolean notNaN(BoundReference<T> ref) {
+      return ROWS_MIGHT_MATCH;
+    }
+
+    @Override
+    public <T> Boolean lt(BoundReference<T> ref, Literal<T> lit) {
+      return compare(ref, lit, compareResult -> compareResult < 0);
+    }
+
+    @Override
+    public <T> Boolean ltEq(BoundReference<T> ref, Literal<T> lit) {
+      return compare(ref, lit, compareResult -> compareResult <= 0);
+    }
+
+    @Override
+    public <T> Boolean gt(BoundReference<T> ref, Literal<T> lit) {
+      return compare(ref, lit, compareResult -> compareResult > 0);
+    }
+
+    @Override
+    public <T> Boolean gtEq(BoundReference<T> ref, Literal<T> lit) {
+      return compare(ref, lit, compareResult -> compareResult >= 0);
+    }
+
+    @Override
+    public <T> Boolean eq(BoundReference<T> ref, Literal<T> lit) {
+      return compare(ref, lit, compareResult -> compareResult == 0);
+    }
+
+    @Override
+    public <T> Boolean notEq(BoundReference<T> ref, Literal<T> lit) {
+      return compare(ref, lit, compareResult -> compareResult != 0);
+    }
+
+    @Override
+    public <T> Boolean in(BoundReference<T> ref, Set<T> literalSet) {
+      Comparator<Object> longComparator = Comparators.forType(Types.LongType.get());
+      if (snapshotIdRef(ref)) {
+        boolean in = literalSet.stream().anyMatch(lit -> longComparator.compare(snapshotId, lit) == 0);

Review Comment:
   nit: If you want to, you can use `noneMatch` to avoid the negation in `if`:
   
   ```
   boolean noneMatch = literalSet.stream().noneMatch(lit -> comparator.compare(snapshotId, lit) == 0);
   if (noneMatch) {
     return ROWS_CANNOT_MATCH;
   }
   ```



##########
core/src/main/java/org/apache/iceberg/AllManifestsTable.java:
##########
@@ -204,4 +228,178 @@ public Iterable<FileScanTask> split(long splitSize) {
       return ImmutableList.of(this); // don't split
     }
   }
+
+  static StaticDataTask.Row manifestFileToRow(PartitionSpec spec, ManifestFile manifest, long referencedSnapshotId) {
+    return StaticDataTask.Row.of(
+        manifest.content().id(),
+        manifest.path(),
+        manifest.length(),
+        manifest.partitionSpecId(),
+        manifest.snapshotId(),
+        manifest.content() == ManifestContent.DATA ? manifest.addedFilesCount() : 0,
+        manifest.content() == ManifestContent.DATA ? manifest.existingFilesCount() : 0,
+        manifest.content() == ManifestContent.DATA ? manifest.deletedFilesCount() : 0,
+        manifest.content() == ManifestContent.DELETES ? manifest.addedFilesCount() : 0,
+        manifest.content() == ManifestContent.DELETES ? manifest.existingFilesCount() : 0,
+        manifest.content() == ManifestContent.DELETES ? manifest.deletedFilesCount() : 0,
+        ManifestsTable.partitionSummariesToRows(spec, manifest.partitions()),
+        referencedSnapshotId
+    );
+  }
+
+  private static class SnapshotEvaluator extends BoundExpressionVisitor<Boolean> {
+
+    private Expression boundExpr;
+    private Long snapshotId;
+
+    private static final boolean ROWS_MIGHT_MATCH = true;
+    private static final boolean ROWS_CANNOT_MATCH = false;
+
+    private SnapshotEvaluator(Expression expr, Snapshot snap, Types.StructType structType, boolean caseSensitive) {
+      this.boundExpr = Binder.bind(structType, expr, caseSensitive);
+      this.snapshotId = snap.snapshotId();
+    }
+
+    private boolean eval() {
+      return ExpressionVisitors.visitEvaluator(boundExpr, this);
+    }
+
+    @Override
+    public Boolean alwaysTrue() {
+      return ROWS_MIGHT_MATCH;
+    }
+
+    @Override
+    public Boolean alwaysFalse() {
+      return ROWS_CANNOT_MATCH;
+    }
+
+    @Override
+    public Boolean not(Boolean result) {
+      return !result;
+    }
+
+    @Override
+    public Boolean and(Boolean leftResult, Boolean rightResult) {
+      return leftResult && rightResult;
+    }
+
+    @Override
+    public Boolean or(Boolean leftResult, Boolean rightResult) {
+      return leftResult || rightResult;
+    }
+
+    @Override
+    public <T> Boolean isNull(BoundReference<T> ref) {
+      return ROWS_MIGHT_MATCH;
+    }
+
+    @Override
+    public <T> Boolean notNull(BoundReference<T> ref) {
+      return ROWS_MIGHT_MATCH;
+    }
+
+    @Override
+    public <T> Boolean isNaN(BoundReference<T> ref) {
+      if (snapshotIdRef(ref)) {
+        return ROWS_CANNOT_MATCH; // reference_snapshot_id column is never nan
+      } else {
+        return ROWS_MIGHT_MATCH;
+      }
+    }
+
+    @Override
+    public <T> Boolean notNaN(BoundReference<T> ref) {
+      return ROWS_MIGHT_MATCH;
+    }
+
+    @Override
+    public <T> Boolean lt(BoundReference<T> ref, Literal<T> lit) {
+      return compare(ref, lit, compareResult -> compareResult < 0);
+    }
+
+    @Override
+    public <T> Boolean ltEq(BoundReference<T> ref, Literal<T> lit) {
+      return compare(ref, lit, compareResult -> compareResult <= 0);
+    }
+
+    @Override
+    public <T> Boolean gt(BoundReference<T> ref, Literal<T> lit) {
+      return compare(ref, lit, compareResult -> compareResult > 0);
+    }
+
+    @Override
+    public <T> Boolean gtEq(BoundReference<T> ref, Literal<T> lit) {
+      return compare(ref, lit, compareResult -> compareResult >= 0);
+    }
+
+    @Override
+    public <T> Boolean eq(BoundReference<T> ref, Literal<T> lit) {
+      return compare(ref, lit, compareResult -> compareResult == 0);
+    }
+
+    @Override
+    public <T> Boolean notEq(BoundReference<T> ref, Literal<T> lit) {
+      return compare(ref, lit, compareResult -> compareResult != 0);
+    }
+
+    @Override
+    public <T> Boolean in(BoundReference<T> ref, Set<T> literalSet) {
+      Comparator<Object> longComparator = Comparators.forType(Types.LongType.get());
+      if (snapshotIdRef(ref)) {
+        boolean in = literalSet.stream().anyMatch(lit -> longComparator.compare(snapshotId, lit) == 0);
+        if (!in) {
+          return ROWS_CANNOT_MATCH;
+        }
+      }
+      return ROWS_MIGHT_MATCH;
+    }
+
+    @Override
+    public <T> Boolean notIn(BoundReference<T> ref, Set<T> literalSet) {
+      Comparator<Object> longComparator = Comparators.forType(Types.LongType.get());
+      if (snapshotIdRef(ref)) {
+        boolean notIn = literalSet.stream().noneMatch(lit -> longComparator.compare(snapshotId, lit) == 0);
+        if (!notIn) {
+          return ROWS_CANNOT_MATCH;
+        }
+      }
+      return ROWS_MIGHT_MATCH;
+    }
+
+    @Override
+    public <T> Boolean startsWith(BoundReference<T> ref, Literal<T> lit) {
+      return ROWS_MIGHT_MATCH;
+    }
+
+    @Override
+    public <T> Boolean notStartsWith(BoundReference<T> ref, Literal<T> lit) {
+      return ROWS_MIGHT_MATCH;
+    }
+
+    /**
+     * Comparison of snapshot reference and literal
+     * @param ref bound reference, comparison attempted only if reference is for reference_snapshot_id
+     * @param lit literal value to compare with snapshot id.
+     * @param compareResult function to apply to comparison result, which returns true if rows might match
+     * @return boolean true if rows might match, false if cannot match
+     */
+    private <T> Boolean compare(BoundReference<T> ref,
+                                Literal<T> lit,
+                                Function<Integer, Boolean> compareResult) {
+
+      if (snapshotIdRef(ref)) {
+        Literal<Long> longLit = lit.to(Types.LongType.get());
+        int cmp = longLit.comparator().compare(snapshotId, longLit.value());
+        if (!compareResult.apply(cmp)) {
+          return ROWS_CANNOT_MATCH;
+        }
+      }
+      return ROWS_MIGHT_MATCH;
+    }
+
+    private <T> boolean snapshotIdRef(BoundReference<T> ref) {

Review Comment:
   nit: will it be a bit more readable if we added `is` to the method name? I know we don't always do that but I feel like it would make sense. What do you think?



##########
core/src/main/java/org/apache/iceberg/AllManifestsTable.java:
##########
@@ -204,4 +228,178 @@ public Iterable<FileScanTask> split(long splitSize) {
       return ImmutableList.of(this); // don't split
     }
   }
+
+  static StaticDataTask.Row manifestFileToRow(PartitionSpec spec, ManifestFile manifest, long referencedSnapshotId) {
+    return StaticDataTask.Row.of(
+        manifest.content().id(),
+        manifest.path(),
+        manifest.length(),
+        manifest.partitionSpecId(),
+        manifest.snapshotId(),
+        manifest.content() == ManifestContent.DATA ? manifest.addedFilesCount() : 0,
+        manifest.content() == ManifestContent.DATA ? manifest.existingFilesCount() : 0,
+        manifest.content() == ManifestContent.DATA ? manifest.deletedFilesCount() : 0,
+        manifest.content() == ManifestContent.DELETES ? manifest.addedFilesCount() : 0,
+        manifest.content() == ManifestContent.DELETES ? manifest.existingFilesCount() : 0,
+        manifest.content() == ManifestContent.DELETES ? manifest.deletedFilesCount() : 0,
+        ManifestsTable.partitionSummariesToRows(spec, manifest.partitions()),
+        referencedSnapshotId
+    );
+  }
+
+  private static class SnapshotEvaluator extends BoundExpressionVisitor<Boolean> {

Review Comment:
   What about following what we did in `Evaluator`? The main idea is to have an evaluator that you instantiate once. That evaluator would have `eval(snapshot)` method and an inner visitor class. That way, we can do the binding only once instead of per snapshot and reuse the same evaluator for all snapshots.
   
   Let me know what you think about the `Evaluator` class.



##########
core/src/main/java/org/apache/iceberg/AllManifestsTable.java:
##########
@@ -204,4 +228,178 @@ public Iterable<FileScanTask> split(long splitSize) {
       return ImmutableList.of(this); // don't split
     }
   }
+
+  static StaticDataTask.Row manifestFileToRow(PartitionSpec spec, ManifestFile manifest, long referencedSnapshotId) {
+    return StaticDataTask.Row.of(
+        manifest.content().id(),
+        manifest.path(),
+        manifest.length(),
+        manifest.partitionSpecId(),
+        manifest.snapshotId(),
+        manifest.content() == ManifestContent.DATA ? manifest.addedFilesCount() : 0,
+        manifest.content() == ManifestContent.DATA ? manifest.existingFilesCount() : 0,
+        manifest.content() == ManifestContent.DATA ? manifest.deletedFilesCount() : 0,
+        manifest.content() == ManifestContent.DELETES ? manifest.addedFilesCount() : 0,
+        manifest.content() == ManifestContent.DELETES ? manifest.existingFilesCount() : 0,
+        manifest.content() == ManifestContent.DELETES ? manifest.deletedFilesCount() : 0,
+        ManifestsTable.partitionSummariesToRows(spec, manifest.partitions()),
+        referencedSnapshotId
+    );
+  }
+
+  private static class SnapshotEvaluator extends BoundExpressionVisitor<Boolean> {
+
+    private Expression boundExpr;
+    private Long snapshotId;
+
+    private static final boolean ROWS_MIGHT_MATCH = true;
+    private static final boolean ROWS_CANNOT_MATCH = false;
+
+    private SnapshotEvaluator(Expression expr, Snapshot snap, Types.StructType structType, boolean caseSensitive) {
+      this.boundExpr = Binder.bind(structType, expr, caseSensitive);
+      this.snapshotId = snap.snapshotId();
+    }
+
+    private boolean eval() {
+      return ExpressionVisitors.visitEvaluator(boundExpr, this);
+    }
+
+    @Override
+    public Boolean alwaysTrue() {
+      return ROWS_MIGHT_MATCH;
+    }
+
+    @Override
+    public Boolean alwaysFalse() {
+      return ROWS_CANNOT_MATCH;
+    }
+
+    @Override
+    public Boolean not(Boolean result) {
+      return !result;
+    }
+
+    @Override
+    public Boolean and(Boolean leftResult, Boolean rightResult) {
+      return leftResult && rightResult;
+    }
+
+    @Override
+    public Boolean or(Boolean leftResult, Boolean rightResult) {
+      return leftResult || rightResult;
+    }
+
+    @Override
+    public <T> Boolean isNull(BoundReference<T> ref) {
+      return ROWS_MIGHT_MATCH;
+    }
+
+    @Override
+    public <T> Boolean notNull(BoundReference<T> ref) {
+      return ROWS_MIGHT_MATCH;
+    }
+
+    @Override
+    public <T> Boolean isNaN(BoundReference<T> ref) {
+      if (snapshotIdRef(ref)) {
+        return ROWS_CANNOT_MATCH; // reference_snapshot_id column is never nan
+      } else {
+        return ROWS_MIGHT_MATCH;
+      }
+    }
+
+    @Override
+    public <T> Boolean notNaN(BoundReference<T> ref) {
+      return ROWS_MIGHT_MATCH;
+    }
+
+    @Override
+    public <T> Boolean lt(BoundReference<T> ref, Literal<T> lit) {
+      return compare(ref, lit, compareResult -> compareResult < 0);
+    }
+
+    @Override
+    public <T> Boolean ltEq(BoundReference<T> ref, Literal<T> lit) {
+      return compare(ref, lit, compareResult -> compareResult <= 0);
+    }
+
+    @Override
+    public <T> Boolean gt(BoundReference<T> ref, Literal<T> lit) {
+      return compare(ref, lit, compareResult -> compareResult > 0);
+    }
+
+    @Override
+    public <T> Boolean gtEq(BoundReference<T> ref, Literal<T> lit) {
+      return compare(ref, lit, compareResult -> compareResult >= 0);
+    }
+
+    @Override
+    public <T> Boolean eq(BoundReference<T> ref, Literal<T> lit) {
+      return compare(ref, lit, compareResult -> compareResult == 0);
+    }
+
+    @Override
+    public <T> Boolean notEq(BoundReference<T> ref, Literal<T> lit) {
+      return compare(ref, lit, compareResult -> compareResult != 0);
+    }
+
+    @Override
+    public <T> Boolean in(BoundReference<T> ref, Set<T> literalSet) {
+      Comparator<Object> longComparator = Comparators.forType(Types.LongType.get());
+      if (snapshotIdRef(ref)) {
+        boolean in = literalSet.stream().anyMatch(lit -> longComparator.compare(snapshotId, lit) == 0);
+        if (!in) {
+          return ROWS_CANNOT_MATCH;
+        }
+      }
+      return ROWS_MIGHT_MATCH;
+    }
+
+    @Override
+    public <T> Boolean notIn(BoundReference<T> ref, Set<T> literalSet) {
+      Comparator<Object> longComparator = Comparators.forType(Types.LongType.get());
+      if (snapshotIdRef(ref)) {
+        boolean notIn = literalSet.stream().noneMatch(lit -> longComparator.compare(snapshotId, lit) == 0);

Review Comment:
   nit: Same here, you can use `anyMatch` instead.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@iceberg.apache.org
For additional commands, e-mail: issues-help@iceberg.apache.org