You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iceberg.apache.org by bl...@apache.org on 2019/08/12 18:59:22 UTC

[incubator-iceberg] branch master updated: Supports startsWith predicates (#327)

This is an automated email from the ASF dual-hosted git repository.

blue pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-iceberg.git


The following commit(s) were added to refs/heads/master by this push:
     new 5cfc119  Supports startsWith predicates (#327)
5cfc119 is described below

commit 5cfc119e589a39726d00371ed9d11d3a21b21ee4
Author: Sujith Jay Nair <su...@sujithjay.com>
AuthorDate: Mon Aug 12 20:59:17 2019 +0200

    Supports startsWith predicates (#327)
    
    Co-authored-by: Renato Marroquin <ma...@inf.ethz.ch>
    Co-authored-by: Lior Baber <li...@gmail.com>
    Co-authored-by: Sujith Jay Nair <su...@sujithjay.com>
---
 .../org/apache/iceberg/expressions/Evaluator.java  |  5 ++
 .../org/apache/iceberg/expressions/Expression.java |  3 +-
 .../iceberg/expressions/ExpressionVisitors.java    |  6 ++
 .../apache/iceberg/expressions/Expressions.java    |  4 +
 .../org/apache/iceberg/expressions/Predicate.java  |  2 +
 .../iceberg/expressions/ResidualEvaluator.java     |  6 ++
 .../java/org/apache/iceberg/transforms/Bucket.java |  1 +
 .../apache/iceberg/transforms/ProjectionUtil.java  |  2 +
 .../org/apache/iceberg/transforms/Truncate.java    | 31 +++++--
 .../iceberg/expressions/TestExpressionBinding.java | 13 +++
 .../apache/iceberg/transforms/TestStartsWith.java  | 96 ++++++++++++++++++++++
 .../iceberg/transforms/TestTruncatesResiduals.java |  6 ++
 12 files changed, 166 insertions(+), 9 deletions(-)

diff --git a/api/src/main/java/org/apache/iceberg/expressions/Evaluator.java b/api/src/main/java/org/apache/iceberg/expressions/Evaluator.java
index e96a528..70dfbd1 100644
--- a/api/src/main/java/org/apache/iceberg/expressions/Evaluator.java
+++ b/api/src/main/java/org/apache/iceberg/expressions/Evaluator.java
@@ -142,5 +142,10 @@ public class Evaluator implements Serializable {
     public <T> Boolean notIn(BoundReference<T> ref, Literal<T> lit) {
       return !in(ref, lit);
     }
+
+    @Override
+    public <T> Boolean startsWith(BoundReference<T> ref, Literal<T> lit) {
+      return ((String) ref.get(struct)).startsWith((String) lit.value());
+    }
   }
 }
diff --git a/api/src/main/java/org/apache/iceberg/expressions/Expression.java b/api/src/main/java/org/apache/iceberg/expressions/Expression.java
index 124e612..20b1abc 100644
--- a/api/src/main/java/org/apache/iceberg/expressions/Expression.java
+++ b/api/src/main/java/org/apache/iceberg/expressions/Expression.java
@@ -40,7 +40,8 @@ public interface Expression extends Serializable {
     NOT_IN,
     NOT,
     AND,
-    OR;
+    OR,
+    STARTS_WITH;
 
     /**
      * @return the operation used when this is negated
diff --git a/api/src/main/java/org/apache/iceberg/expressions/ExpressionVisitors.java b/api/src/main/java/org/apache/iceberg/expressions/ExpressionVisitors.java
index cc45531..fe2b742 100644
--- a/api/src/main/java/org/apache/iceberg/expressions/ExpressionVisitors.java
+++ b/api/src/main/java/org/apache/iceberg/expressions/ExpressionVisitors.java
@@ -97,6 +97,10 @@ public class ExpressionVisitors {
       return null;
     }
 
+    public <T> R startsWith(BoundReference<T> ref, Literal<T> lit) {
+      throw new UnsupportedOperationException("Unsupported operation.");
+    }
+
     @Override
     public <T> R predicate(BoundPredicate<T> pred) {
       switch (pred.op()) {
@@ -120,6 +124,8 @@ public class ExpressionVisitors {
           return in(pred.ref(), pred.literal());
         case NOT_IN:
           return notIn(pred.ref(), pred.literal());
+        case STARTS_WITH:
+          return startsWith(pred.ref(),  pred.literal());
         default:
           throw new UnsupportedOperationException(
               "Unknown operation for predicate: " + pred.op());
diff --git a/api/src/main/java/org/apache/iceberg/expressions/Expressions.java b/api/src/main/java/org/apache/iceberg/expressions/Expressions.java
index d280f89..b99fffe 100644
--- a/api/src/main/java/org/apache/iceberg/expressions/Expressions.java
+++ b/api/src/main/java/org/apache/iceberg/expressions/Expressions.java
@@ -105,6 +105,10 @@ public class Expressions {
     return new UnboundPredicate<>(Expression.Operation.NOT_EQ, ref(name), value);
   }
 
+  public static UnboundPredicate<String> startsWith(String name, String value) {
+    return new UnboundPredicate<>(Expression.Operation.STARTS_WITH, ref(name), value);
+  }
+
   public static <T> UnboundPredicate<T> predicate(Operation op, String name, T value) {
     Preconditions.checkArgument(op != Operation.IS_NULL && op != Operation.NOT_NULL,
         "Cannot create %s predicate inclusive a value", op);
diff --git a/api/src/main/java/org/apache/iceberg/expressions/Predicate.java b/api/src/main/java/org/apache/iceberg/expressions/Predicate.java
index 12923ef..bc368d1 100644
--- a/api/src/main/java/org/apache/iceberg/expressions/Predicate.java
+++ b/api/src/main/java/org/apache/iceberg/expressions/Predicate.java
@@ -62,6 +62,8 @@ public abstract class Predicate<T, R extends Reference> implements Expression {
         return String.valueOf(ref()) + " == " + literal();
       case NOT_EQ:
         return String.valueOf(ref()) + " != " + literal();
+      case STARTS_WITH:
+        return ref() + " startsWith \"" + literal() + "\"";
 //      case IN:
 //        break;
 //      case NOT_IN:
diff --git a/api/src/main/java/org/apache/iceberg/expressions/ResidualEvaluator.java b/api/src/main/java/org/apache/iceberg/expressions/ResidualEvaluator.java
index dd3a0b8..f62c149 100644
--- a/api/src/main/java/org/apache/iceberg/expressions/ResidualEvaluator.java
+++ b/api/src/main/java/org/apache/iceberg/expressions/ResidualEvaluator.java
@@ -195,6 +195,12 @@ public class ResidualEvaluator implements Serializable {
     }
 
     @Override
+    public <T> Expression startsWith(BoundReference<T> ref, Literal<T> lit) {
+      Comparator<T> cmp = lit.comparator();
+      return  ((String) ref.get(struct)).startsWith((String) lit.value()) ? alwaysTrue() : alwaysFalse();
+    }
+
+    @Override
     @SuppressWarnings("unchecked")
     public <T> Expression predicate(BoundPredicate<T> pred) {
       /**
diff --git a/api/src/main/java/org/apache/iceberg/transforms/Bucket.java b/api/src/main/java/org/apache/iceberg/transforms/Bucket.java
index 9448d02..d088119 100644
--- a/api/src/main/java/org/apache/iceberg/transforms/Bucket.java
+++ b/api/src/main/java/org/apache/iceberg/transforms/Bucket.java
@@ -113,6 +113,7 @@ abstract class Bucket<T> implements Transform<T, Integer> {
             predicate.op(), name, apply(predicate.literal().value()));
 //      case IN:
 //        return Expressions.predicate();
+      case STARTS_WITH:
       default:
         // comparison predicates can't be projected, notEq can't be projected
         // TODO: small ranges can be projected.
diff --git a/api/src/main/java/org/apache/iceberg/transforms/ProjectionUtil.java b/api/src/main/java/org/apache/iceberg/transforms/ProjectionUtil.java
index 84f604c..d7b4d39 100644
--- a/api/src/main/java/org/apache/iceberg/transforms/ProjectionUtil.java
+++ b/api/src/main/java/org/apache/iceberg/transforms/ProjectionUtil.java
@@ -189,6 +189,8 @@ class ProjectionUtil {
         return predicate(Expression.Operation.GT_EQ, name, transform.apply(boundary));
       case EQ:
         return predicate(Expression.Operation.EQ, name, transform.apply(boundary));
+      case STARTS_WITH:
+        return predicate(Expression.Operation.STARTS_WITH, name, transform.apply(boundary));
 //        case IN: // TODO
 //          return Expressions.predicate(Operation.IN, name, transform.apply(boundary));
       default:
diff --git a/api/src/main/java/org/apache/iceberg/transforms/Truncate.java b/api/src/main/java/org/apache/iceberg/transforms/Truncate.java
index d7d55dd..148f205 100644
--- a/api/src/main/java/org/apache/iceberg/transforms/Truncate.java
+++ b/api/src/main/java/org/apache/iceberg/transforms/Truncate.java
@@ -213,20 +213,35 @@ abstract class Truncate<T> implements Transform<T, T> {
 
     @Override
     public UnboundPredicate<CharSequence> project(String name,
-                                                  BoundPredicate<CharSequence> pred) {
-      if (pred.op() == NOT_NULL || pred.op() == IS_NULL) {
-        return Expressions.predicate(pred.op(), name);
+                                                  BoundPredicate<CharSequence> predicate) {
+      switch (predicate.op()) {
+        case NOT_NULL:
+        case IS_NULL:
+          return Expressions.predicate(predicate.op(), name);
+        case STARTS_WITH:
+        default:
+          return ProjectionUtil.truncateArray(name, predicate, this);
       }
-      return ProjectionUtil.truncateArray(name, pred, this);
     }
 
     @Override
     public UnboundPredicate<CharSequence> projectStrict(String name,
-                                                        BoundPredicate<CharSequence> pred) {
-      if (pred.op() == NOT_NULL || pred.op() == IS_NULL) {
-        return Expressions.predicate(pred.op(), name);
+                                                        BoundPredicate<CharSequence> predicate) {
+      switch (predicate.op()) {
+        case IS_NULL:
+        case NOT_NULL:
+          return Expressions.predicate(predicate.op(), name);
+        case STARTS_WITH:
+          if (predicate.literal().value().length() < width()) {
+            return Expressions.predicate(predicate.op(), name, predicate.literal().value());
+          } else if (predicate.literal().value().length() == width()) {
+            return Expressions.equal(name, predicate.literal().value());
+          } else {
+            return null;
+          }
+        default:
+          return ProjectionUtil.truncateArrayStrict(name, predicate, this);
       }
-      return ProjectionUtil.truncateArrayStrict(name, pred, this);
     }
 
     @Override
diff --git a/api/src/test/java/org/apache/iceberg/expressions/TestExpressionBinding.java b/api/src/test/java/org/apache/iceberg/expressions/TestExpressionBinding.java
index 3e84699..fc4fc39 100644
--- a/api/src/test/java/org/apache/iceberg/expressions/TestExpressionBinding.java
+++ b/api/src/test/java/org/apache/iceberg/expressions/TestExpressionBinding.java
@@ -34,6 +34,7 @@ import static org.apache.iceberg.expressions.Expressions.greaterThan;
 import static org.apache.iceberg.expressions.Expressions.lessThan;
 import static org.apache.iceberg.expressions.Expressions.not;
 import static org.apache.iceberg.expressions.Expressions.or;
+import static org.apache.iceberg.expressions.Expressions.startsWith;
 import static org.apache.iceberg.types.Types.NestedField.required;
 
 public class TestExpressionBinding {
@@ -132,6 +133,18 @@ public class TestExpressionBinding {
   }
 
   @Test
+  public void testStartsWith() {
+    StructType struct = StructType.of(required(0, "s", Types.StringType.get()));
+    Expression expr = startsWith("s", "abc");
+    Expression boundExpr = Binder.bind(struct, expr);
+    TestHelpers.assertAllReferencesBound("StartsWith", boundExpr);
+    // make sure the expression is a StartsWith
+    BoundPredicate<?> pred = TestHelpers.assertAndUnwrap(boundExpr, BoundPredicate.class);
+    Assert.assertEquals("Should be right operation", Expression.Operation.STARTS_WITH, pred.op());
+    Assert.assertEquals("Should bind s correctly", 0, pred.ref().fieldId());
+  }
+
+  @Test
   public void testAlwaysTrue() {
     Assert.assertEquals("Should not change alwaysTrue",
         alwaysTrue(),
diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestStartsWith.java b/api/src/test/java/org/apache/iceberg/transforms/TestStartsWith.java
new file mode 100644
index 0000000..38aa991
--- /dev/null
+++ b/api/src/test/java/org/apache/iceberg/transforms/TestStartsWith.java
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.transforms;
+
+import org.apache.iceberg.PartitionSpec;
+import org.apache.iceberg.Schema;
+import org.apache.iceberg.TestHelpers;
+import org.apache.iceberg.expressions.Binder;
+import org.apache.iceberg.expressions.BoundPredicate;
+import org.apache.iceberg.expressions.Evaluator;
+import org.apache.iceberg.expressions.Expression;
+import org.apache.iceberg.expressions.False;
+import org.apache.iceberg.expressions.Literal;
+import org.apache.iceberg.expressions.Projections;
+import org.apache.iceberg.expressions.UnboundPredicate;
+import org.apache.iceberg.types.Types;
+import org.junit.Assert;
+import org.junit.Test;
+
+import static org.apache.iceberg.TestHelpers.assertAndUnwrapUnbound;
+import static org.apache.iceberg.expressions.Expressions.startsWith;
+import static org.apache.iceberg.types.Types.NestedField.optional;
+
+public class TestStartsWith {
+
+  private static final String COLUMN = "someStringCol";
+  private static final Schema SCHEMA = new Schema(optional(1, COLUMN, Types.StringType.get()));
+
+  @Test
+  public void testTruncateProjections() {
+    PartitionSpec spec = PartitionSpec.builderFor(SCHEMA).truncate(COLUMN, 4).build();
+
+    assertProjectionInclusive(spec, startsWith(COLUMN, "ab"), "ab", Expression.Operation.STARTS_WITH);
+    assertProjectionInclusive(spec, startsWith(COLUMN, "abab"), "abab", Expression.Operation.STARTS_WITH);
+    assertProjectionInclusive(spec, startsWith(COLUMN, "ababab"), "abab", Expression.Operation.STARTS_WITH);
+
+    assertProjectionStrict(spec, startsWith(COLUMN, "ab"), "ab", Expression.Operation.STARTS_WITH);
+    assertProjectionStrict(spec, startsWith(COLUMN, "abab"), "abab", Expression.Operation.EQ);
+
+    Expression projection = Projections.strict(spec).project(startsWith(COLUMN, "ababab"));
+    Assert.assertTrue(projection instanceof False);
+  }
+
+  @Test
+  public void testTruncateString() {
+    Truncate<String> trunc = Truncate.get(Types.StringType.get(), 2);
+    Expression expr = startsWith(COLUMN, "abcde");
+    BoundPredicate<String> boundExpr = (BoundPredicate<String>) Binder.bind(SCHEMA.asStruct(),  expr, false);
+
+    UnboundPredicate<String> projected = trunc.project(COLUMN, boundExpr);
+    Evaluator evaluator = new Evaluator(SCHEMA.asStruct(), projected);
+
+    Assert.assertTrue("startsWith(abcde, truncate(abcde,2))  => true",
+        evaluator.eval(TestHelpers.Row.of("abcde")));
+  }
+
+  private void assertProjectionInclusive(PartitionSpec spec, UnboundPredicate<?> filter,
+                                         String expectedLiteral, Expression.Operation expectedOp) {
+    Expression projection = Projections.inclusive(spec).project(filter);
+    assertProjection(spec, expectedLiteral, projection, expectedOp);
+  }
+
+  private void assertProjectionStrict(PartitionSpec spec, UnboundPredicate<?> filter,
+                                         String expectedLiteral, Expression.Operation expectedOp) {
+    Expression projection = Projections.strict(spec).project(filter);
+    assertProjection(spec, expectedLiteral, projection, expectedOp);
+  }
+
+  private void assertProjection(PartitionSpec spec, String expectedLiteral, Expression projection,
+                                Expression.Operation expectedOp) {
+    UnboundPredicate<?> predicate = assertAndUnwrapUnbound(projection);
+    Literal literal = predicate.literal();
+    Truncate<CharSequence> transform = (Truncate<CharSequence>) spec.getFieldsBySourceId(1).get(0).transform();
+    String output = transform.toHumanString((String) literal.value());
+
+    Assert.assertEquals(expectedOp, predicate.op());
+    Assert.assertEquals(expectedLiteral, output);
+  }
+}
diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestTruncatesResiduals.java b/api/src/test/java/org/apache/iceberg/transforms/TestTruncatesResiduals.java
index 1ed1f4c..f2160d5 100644
--- a/api/src/test/java/org/apache/iceberg/transforms/TestTruncatesResiduals.java
+++ b/api/src/test/java/org/apache/iceberg/transforms/TestTruncatesResiduals.java
@@ -36,6 +36,7 @@ import static org.apache.iceberg.expressions.Expressions.greaterThanOrEqual;
 import static org.apache.iceberg.expressions.Expressions.lessThan;
 import static org.apache.iceberg.expressions.Expressions.lessThanOrEqual;
 import static org.apache.iceberg.expressions.Expressions.notEqual;
+import static org.apache.iceberg.expressions.Expressions.startsWith;
 
 public class TestTruncatesResiduals {
 
@@ -173,5 +174,10 @@ public class TestTruncatesResiduals {
     assertResidualValue(spec, notEqual("value", "bcd"), "ab", Expression.Operation.TRUE);
     assertResidualPredicate(spec, notEqual("value", "bcd"), "bc");
     assertResidualValue(spec, notEqual("value", "bcd"), "cd", Expression.Operation.TRUE);
+
+    // starts with
+    assertResidualValue(spec, startsWith("value", "bcd"), "ab", Expression.Operation.FALSE);
+    assertResidualPredicate(spec, startsWith("value", "bcd"), "bc");
+    assertResidualValue(spec, startsWith("value", "bcd"), "cd", Expression.Operation.FALSE);
   }
 }