You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iceberg.apache.org by bl...@apache.org on 2019/08/12 18:59:22 UTC
[incubator-iceberg] branch master updated: Supports startsWith
predicates (#327)
This is an automated email from the ASF dual-hosted git repository.
blue pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-iceberg.git
The following commit(s) were added to refs/heads/master by this push:
new 5cfc119 Supports startsWith predicates (#327)
5cfc119 is described below
commit 5cfc119e589a39726d00371ed9d11d3a21b21ee4
Author: Sujith Jay Nair <su...@sujithjay.com>
AuthorDate: Mon Aug 12 20:59:17 2019 +0200
Supports startsWith predicates (#327)
Co-authored-by: Renato Marroquin <ma...@inf.ethz.ch>
Co-authored-by: Lior Baber <li...@gmail.com>
Co-authored-by: Sujith Jay Nair <su...@sujithjay.com>
---
.../org/apache/iceberg/expressions/Evaluator.java | 5 ++
.../org/apache/iceberg/expressions/Expression.java | 3 +-
.../iceberg/expressions/ExpressionVisitors.java | 6 ++
.../apache/iceberg/expressions/Expressions.java | 4 +
.../org/apache/iceberg/expressions/Predicate.java | 2 +
.../iceberg/expressions/ResidualEvaluator.java | 6 ++
.../java/org/apache/iceberg/transforms/Bucket.java | 1 +
.../apache/iceberg/transforms/ProjectionUtil.java | 2 +
.../org/apache/iceberg/transforms/Truncate.java | 31 +++++--
.../iceberg/expressions/TestExpressionBinding.java | 13 +++
.../apache/iceberg/transforms/TestStartsWith.java | 96 ++++++++++++++++++++++
.../iceberg/transforms/TestTruncatesResiduals.java | 6 ++
12 files changed, 166 insertions(+), 9 deletions(-)
diff --git a/api/src/main/java/org/apache/iceberg/expressions/Evaluator.java b/api/src/main/java/org/apache/iceberg/expressions/Evaluator.java
index e96a528..70dfbd1 100644
--- a/api/src/main/java/org/apache/iceberg/expressions/Evaluator.java
+++ b/api/src/main/java/org/apache/iceberg/expressions/Evaluator.java
@@ -142,5 +142,10 @@ public class Evaluator implements Serializable {
public <T> Boolean notIn(BoundReference<T> ref, Literal<T> lit) {
return !in(ref, lit);
}
+
+ @Override
+ public <T> Boolean startsWith(BoundReference<T> ref, Literal<T> lit) {
+ return ((String) ref.get(struct)).startsWith((String) lit.value());
+ }
}
}
diff --git a/api/src/main/java/org/apache/iceberg/expressions/Expression.java b/api/src/main/java/org/apache/iceberg/expressions/Expression.java
index 124e612..20b1abc 100644
--- a/api/src/main/java/org/apache/iceberg/expressions/Expression.java
+++ b/api/src/main/java/org/apache/iceberg/expressions/Expression.java
@@ -40,7 +40,8 @@ public interface Expression extends Serializable {
NOT_IN,
NOT,
AND,
- OR;
+ OR,
+ STARTS_WITH;
/**
* @return the operation used when this is negated
diff --git a/api/src/main/java/org/apache/iceberg/expressions/ExpressionVisitors.java b/api/src/main/java/org/apache/iceberg/expressions/ExpressionVisitors.java
index cc45531..fe2b742 100644
--- a/api/src/main/java/org/apache/iceberg/expressions/ExpressionVisitors.java
+++ b/api/src/main/java/org/apache/iceberg/expressions/ExpressionVisitors.java
@@ -97,6 +97,10 @@ public class ExpressionVisitors {
return null;
}
+ public <T> R startsWith(BoundReference<T> ref, Literal<T> lit) {
+ throw new UnsupportedOperationException("Unsupported operation.");
+ }
+
@Override
public <T> R predicate(BoundPredicate<T> pred) {
switch (pred.op()) {
@@ -120,6 +124,8 @@ public class ExpressionVisitors {
return in(pred.ref(), pred.literal());
case NOT_IN:
return notIn(pred.ref(), pred.literal());
+ case STARTS_WITH:
+ return startsWith(pred.ref(), pred.literal());
default:
throw new UnsupportedOperationException(
"Unknown operation for predicate: " + pred.op());
diff --git a/api/src/main/java/org/apache/iceberg/expressions/Expressions.java b/api/src/main/java/org/apache/iceberg/expressions/Expressions.java
index d280f89..b99fffe 100644
--- a/api/src/main/java/org/apache/iceberg/expressions/Expressions.java
+++ b/api/src/main/java/org/apache/iceberg/expressions/Expressions.java
@@ -105,6 +105,10 @@ public class Expressions {
return new UnboundPredicate<>(Expression.Operation.NOT_EQ, ref(name), value);
}
+ public static UnboundPredicate<String> startsWith(String name, String value) {
+ return new UnboundPredicate<>(Expression.Operation.STARTS_WITH, ref(name), value);
+ }
+
public static <T> UnboundPredicate<T> predicate(Operation op, String name, T value) {
Preconditions.checkArgument(op != Operation.IS_NULL && op != Operation.NOT_NULL,
"Cannot create %s predicate inclusive a value", op);
diff --git a/api/src/main/java/org/apache/iceberg/expressions/Predicate.java b/api/src/main/java/org/apache/iceberg/expressions/Predicate.java
index 12923ef..bc368d1 100644
--- a/api/src/main/java/org/apache/iceberg/expressions/Predicate.java
+++ b/api/src/main/java/org/apache/iceberg/expressions/Predicate.java
@@ -62,6 +62,8 @@ public abstract class Predicate<T, R extends Reference> implements Expression {
return String.valueOf(ref()) + " == " + literal();
case NOT_EQ:
return String.valueOf(ref()) + " != " + literal();
+ case STARTS_WITH:
+ return ref() + " startsWith \"" + literal() + "\"";
// case IN:
// break;
// case NOT_IN:
diff --git a/api/src/main/java/org/apache/iceberg/expressions/ResidualEvaluator.java b/api/src/main/java/org/apache/iceberg/expressions/ResidualEvaluator.java
index dd3a0b8..f62c149 100644
--- a/api/src/main/java/org/apache/iceberg/expressions/ResidualEvaluator.java
+++ b/api/src/main/java/org/apache/iceberg/expressions/ResidualEvaluator.java
@@ -195,6 +195,12 @@ public class ResidualEvaluator implements Serializable {
}
@Override
+ public <T> Expression startsWith(BoundReference<T> ref, Literal<T> lit) {
+ Comparator<T> cmp = lit.comparator();
+ return ((String) ref.get(struct)).startsWith((String) lit.value()) ? alwaysTrue() : alwaysFalse();
+ }
+
+ @Override
@SuppressWarnings("unchecked")
public <T> Expression predicate(BoundPredicate<T> pred) {
/**
diff --git a/api/src/main/java/org/apache/iceberg/transforms/Bucket.java b/api/src/main/java/org/apache/iceberg/transforms/Bucket.java
index 9448d02..d088119 100644
--- a/api/src/main/java/org/apache/iceberg/transforms/Bucket.java
+++ b/api/src/main/java/org/apache/iceberg/transforms/Bucket.java
@@ -113,6 +113,7 @@ abstract class Bucket<T> implements Transform<T, Integer> {
predicate.op(), name, apply(predicate.literal().value()));
// case IN:
// return Expressions.predicate();
+ case STARTS_WITH:
default:
// comparison predicates can't be projected, notEq can't be projected
// TODO: small ranges can be projected.
diff --git a/api/src/main/java/org/apache/iceberg/transforms/ProjectionUtil.java b/api/src/main/java/org/apache/iceberg/transforms/ProjectionUtil.java
index 84f604c..d7b4d39 100644
--- a/api/src/main/java/org/apache/iceberg/transforms/ProjectionUtil.java
+++ b/api/src/main/java/org/apache/iceberg/transforms/ProjectionUtil.java
@@ -189,6 +189,8 @@ class ProjectionUtil {
return predicate(Expression.Operation.GT_EQ, name, transform.apply(boundary));
case EQ:
return predicate(Expression.Operation.EQ, name, transform.apply(boundary));
+ case STARTS_WITH:
+ return predicate(Expression.Operation.STARTS_WITH, name, transform.apply(boundary));
// case IN: // TODO
// return Expressions.predicate(Operation.IN, name, transform.apply(boundary));
default:
diff --git a/api/src/main/java/org/apache/iceberg/transforms/Truncate.java b/api/src/main/java/org/apache/iceberg/transforms/Truncate.java
index d7d55dd..148f205 100644
--- a/api/src/main/java/org/apache/iceberg/transforms/Truncate.java
+++ b/api/src/main/java/org/apache/iceberg/transforms/Truncate.java
@@ -213,20 +213,35 @@ abstract class Truncate<T> implements Transform<T, T> {
@Override
public UnboundPredicate<CharSequence> project(String name,
- BoundPredicate<CharSequence> pred) {
- if (pred.op() == NOT_NULL || pred.op() == IS_NULL) {
- return Expressions.predicate(pred.op(), name);
+ BoundPredicate<CharSequence> predicate) {
+ switch (predicate.op()) {
+ case NOT_NULL:
+ case IS_NULL:
+ return Expressions.predicate(predicate.op(), name);
+ case STARTS_WITH:
+ default:
+ return ProjectionUtil.truncateArray(name, predicate, this);
}
- return ProjectionUtil.truncateArray(name, pred, this);
}
@Override
public UnboundPredicate<CharSequence> projectStrict(String name,
- BoundPredicate<CharSequence> pred) {
- if (pred.op() == NOT_NULL || pred.op() == IS_NULL) {
- return Expressions.predicate(pred.op(), name);
+ BoundPredicate<CharSequence> predicate) {
+ switch (predicate.op()) {
+ case IS_NULL:
+ case NOT_NULL:
+ return Expressions.predicate(predicate.op(), name);
+ case STARTS_WITH:
+ if (predicate.literal().value().length() < width()) {
+ return Expressions.predicate(predicate.op(), name, predicate.literal().value());
+ } else if (predicate.literal().value().length() == width()) {
+ return Expressions.equal(name, predicate.literal().value());
+ } else {
+ return null;
+ }
+ default:
+ return ProjectionUtil.truncateArrayStrict(name, predicate, this);
}
- return ProjectionUtil.truncateArrayStrict(name, pred, this);
}
@Override
diff --git a/api/src/test/java/org/apache/iceberg/expressions/TestExpressionBinding.java b/api/src/test/java/org/apache/iceberg/expressions/TestExpressionBinding.java
index 3e84699..fc4fc39 100644
--- a/api/src/test/java/org/apache/iceberg/expressions/TestExpressionBinding.java
+++ b/api/src/test/java/org/apache/iceberg/expressions/TestExpressionBinding.java
@@ -34,6 +34,7 @@ import static org.apache.iceberg.expressions.Expressions.greaterThan;
import static org.apache.iceberg.expressions.Expressions.lessThan;
import static org.apache.iceberg.expressions.Expressions.not;
import static org.apache.iceberg.expressions.Expressions.or;
+import static org.apache.iceberg.expressions.Expressions.startsWith;
import static org.apache.iceberg.types.Types.NestedField.required;
public class TestExpressionBinding {
@@ -132,6 +133,18 @@ public class TestExpressionBinding {
}
@Test
+ public void testStartsWith() {
+ StructType struct = StructType.of(required(0, "s", Types.StringType.get()));
+ Expression expr = startsWith("s", "abc");
+ Expression boundExpr = Binder.bind(struct, expr);
+ TestHelpers.assertAllReferencesBound("StartsWith", boundExpr);
+ // make sure the expression is a StartsWith
+ BoundPredicate<?> pred = TestHelpers.assertAndUnwrap(boundExpr, BoundPredicate.class);
+ Assert.assertEquals("Should be right operation", Expression.Operation.STARTS_WITH, pred.op());
+ Assert.assertEquals("Should bind s correctly", 0, pred.ref().fieldId());
+ }
+
+ @Test
public void testAlwaysTrue() {
Assert.assertEquals("Should not change alwaysTrue",
alwaysTrue(),
diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestStartsWith.java b/api/src/test/java/org/apache/iceberg/transforms/TestStartsWith.java
new file mode 100644
index 0000000..38aa991
--- /dev/null
+++ b/api/src/test/java/org/apache/iceberg/transforms/TestStartsWith.java
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.transforms;
+
+import org.apache.iceberg.PartitionSpec;
+import org.apache.iceberg.Schema;
+import org.apache.iceberg.TestHelpers;
+import org.apache.iceberg.expressions.Binder;
+import org.apache.iceberg.expressions.BoundPredicate;
+import org.apache.iceberg.expressions.Evaluator;
+import org.apache.iceberg.expressions.Expression;
+import org.apache.iceberg.expressions.False;
+import org.apache.iceberg.expressions.Literal;
+import org.apache.iceberg.expressions.Projections;
+import org.apache.iceberg.expressions.UnboundPredicate;
+import org.apache.iceberg.types.Types;
+import org.junit.Assert;
+import org.junit.Test;
+
+import static org.apache.iceberg.TestHelpers.assertAndUnwrapUnbound;
+import static org.apache.iceberg.expressions.Expressions.startsWith;
+import static org.apache.iceberg.types.Types.NestedField.optional;
+
+public class TestStartsWith {
+
+ private static final String COLUMN = "someStringCol";
+ private static final Schema SCHEMA = new Schema(optional(1, COLUMN, Types.StringType.get()));
+
+ @Test
+ public void testTruncateProjections() {
+ PartitionSpec spec = PartitionSpec.builderFor(SCHEMA).truncate(COLUMN, 4).build();
+
+ assertProjectionInclusive(spec, startsWith(COLUMN, "ab"), "ab", Expression.Operation.STARTS_WITH);
+ assertProjectionInclusive(spec, startsWith(COLUMN, "abab"), "abab", Expression.Operation.STARTS_WITH);
+ assertProjectionInclusive(spec, startsWith(COLUMN, "ababab"), "abab", Expression.Operation.STARTS_WITH);
+
+ assertProjectionStrict(spec, startsWith(COLUMN, "ab"), "ab", Expression.Operation.STARTS_WITH);
+ assertProjectionStrict(spec, startsWith(COLUMN, "abab"), "abab", Expression.Operation.EQ);
+
+ Expression projection = Projections.strict(spec).project(startsWith(COLUMN, "ababab"));
+ Assert.assertTrue(projection instanceof False);
+ }
+
+ @Test
+ public void testTruncateString() {
+ Truncate<String> trunc = Truncate.get(Types.StringType.get(), 2);
+ Expression expr = startsWith(COLUMN, "abcde");
+ BoundPredicate<String> boundExpr = (BoundPredicate<String>) Binder.bind(SCHEMA.asStruct(), expr, false);
+
+ UnboundPredicate<String> projected = trunc.project(COLUMN, boundExpr);
+ Evaluator evaluator = new Evaluator(SCHEMA.asStruct(), projected);
+
+ Assert.assertTrue("startsWith(abcde, truncate(abcde,2)) => true",
+ evaluator.eval(TestHelpers.Row.of("abcde")));
+ }
+
+ private void assertProjectionInclusive(PartitionSpec spec, UnboundPredicate<?> filter,
+ String expectedLiteral, Expression.Operation expectedOp) {
+ Expression projection = Projections.inclusive(spec).project(filter);
+ assertProjection(spec, expectedLiteral, projection, expectedOp);
+ }
+
+ private void assertProjectionStrict(PartitionSpec spec, UnboundPredicate<?> filter,
+ String expectedLiteral, Expression.Operation expectedOp) {
+ Expression projection = Projections.strict(spec).project(filter);
+ assertProjection(spec, expectedLiteral, projection, expectedOp);
+ }
+
+ private void assertProjection(PartitionSpec spec, String expectedLiteral, Expression projection,
+ Expression.Operation expectedOp) {
+ UnboundPredicate<?> predicate = assertAndUnwrapUnbound(projection);
+ Literal literal = predicate.literal();
+ Truncate<CharSequence> transform = (Truncate<CharSequence>) spec.getFieldsBySourceId(1).get(0).transform();
+ String output = transform.toHumanString((String) literal.value());
+
+ Assert.assertEquals(expectedOp, predicate.op());
+ Assert.assertEquals(expectedLiteral, output);
+ }
+}
diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestTruncatesResiduals.java b/api/src/test/java/org/apache/iceberg/transforms/TestTruncatesResiduals.java
index 1ed1f4c..f2160d5 100644
--- a/api/src/test/java/org/apache/iceberg/transforms/TestTruncatesResiduals.java
+++ b/api/src/test/java/org/apache/iceberg/transforms/TestTruncatesResiduals.java
@@ -36,6 +36,7 @@ import static org.apache.iceberg.expressions.Expressions.greaterThanOrEqual;
import static org.apache.iceberg.expressions.Expressions.lessThan;
import static org.apache.iceberg.expressions.Expressions.lessThanOrEqual;
import static org.apache.iceberg.expressions.Expressions.notEqual;
+import static org.apache.iceberg.expressions.Expressions.startsWith;
public class TestTruncatesResiduals {
@@ -173,5 +174,10 @@ public class TestTruncatesResiduals {
assertResidualValue(spec, notEqual("value", "bcd"), "ab", Expression.Operation.TRUE);
assertResidualPredicate(spec, notEqual("value", "bcd"), "bc");
assertResidualValue(spec, notEqual("value", "bcd"), "cd", Expression.Operation.TRUE);
+
+ // starts with
+ assertResidualValue(spec, startsWith("value", "bcd"), "ab", Expression.Operation.FALSE);
+ assertResidualPredicate(spec, startsWith("value", "bcd"), "bc");
+ assertResidualValue(spec, startsWith("value", "bcd"), "cd", Expression.Operation.FALSE);
}
}