You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iceberg.apache.org by bl...@apache.org on 2020/04/15 23:45:51 UTC

[incubator-iceberg] branch master updated: Add void transform that always produces null (#924)

This is an automated email from the ASF dual-hosted git repository.

blue pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-iceberg.git


The following commit(s) were added to refs/heads/master by this push:
     new e0a3304  Add void transform that always produces null (#924)
e0a3304 is described below

commit e0a3304f265c6be4e1d5873f5931a5b54d993534
Author: Ryan Blue <rd...@users.noreply.github.com>
AuthorDate: Wed Apr 15 16:45:44 2020 -0700

    Add void transform that always produces null (#924)
---
 .../java/org/apache/iceberg/PartitionSpec.java     | 11 ++++
 .../iceberg/transforms/SerializationProxies.java   | 51 +++++++++++++++
 .../org/apache/iceberg/transforms/Transforms.java  | 14 ++++
 .../apache/iceberg/transforms/VoidTransform.java   | 76 ++++++++++++++++++++++
 .../org/apache/iceberg/PartitionSpecTestBase.java  | 76 ++++++++++++++++++++++
 .../apache/iceberg/TestTransformSerialization.java | 55 +---------------
 .../apache/iceberg/TestPartitionSpecParser.java    | 13 +++-
 7 files changed, 242 insertions(+), 54 deletions(-)

diff --git a/api/src/main/java/org/apache/iceberg/PartitionSpec.java b/api/src/main/java/org/apache/iceberg/PartitionSpec.java
index 3742f7d..9ccce0d 100644
--- a/api/src/main/java/org/apache/iceberg/PartitionSpec.java
+++ b/api/src/main/java/org/apache/iceberg/PartitionSpec.java
@@ -456,6 +456,17 @@ public class PartitionSpec implements Serializable {
       return truncate(sourceName, width, sourceName + "_trunc");
     }
 
+    public Builder alwaysNull(String sourceName, String targetName) {
+      checkAndAddPartitionName(targetName);
+      Types.NestedField sourceColumn = findSourceColumn(sourceName);
+      fields.add(new PartitionField(sourceColumn.fieldId(), nextFieldId(), targetName, Transforms.alwaysNull()));
+      return this;
+    }
+
+    public Builder alwaysNull(String sourceName) {
+      return alwaysNull(sourceName, sourceName + "_null");
+    }
+
     // add a partition field with an auto-increment partition field id starting from PARTITION_DATA_ID_START
     Builder add(int sourceId, String name, String transform) {
       return add(sourceId, nextFieldId(), name, transform);
diff --git a/api/src/main/java/org/apache/iceberg/transforms/SerializationProxies.java b/api/src/main/java/org/apache/iceberg/transforms/SerializationProxies.java
new file mode 100644
index 0000000..db12159
--- /dev/null
+++ b/api/src/main/java/org/apache/iceberg/transforms/SerializationProxies.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.transforms;
+
+import java.io.ObjectStreamException;
+import java.io.Serializable;
+
+/**
+ * Stand-in classes for expression classes in Java Serialization.
+ * <p>
+ * These are used so that transform classes can be singletons and use identical equality.
+ */
+class SerializationProxies {
+  private SerializationProxies() {
+  }
+
+  static class VoidTransformProxy implements Serializable {
+    private static final VoidTransformProxy INSTANCE = new VoidTransformProxy();
+
+    static VoidTransformProxy get() {
+      return INSTANCE;
+    }
+
+    /**
+     * Constructor for Java serialization.
+     */
+    VoidTransformProxy() {
+    }
+
+    Object readResolve() throws ObjectStreamException {
+      return VoidTransform.get();
+    }
+  }
+}
diff --git a/api/src/main/java/org/apache/iceberg/transforms/Transforms.java b/api/src/main/java/org/apache/iceberg/transforms/Transforms.java
index ee92c29..cd370d4 100644
--- a/api/src/main/java/org/apache/iceberg/transforms/Transforms.java
+++ b/api/src/main/java/org/apache/iceberg/transforms/Transforms.java
@@ -67,6 +67,10 @@ public class Transforms {
       // fall through to return unknown transform
     }
 
+    if (transform.equalsIgnoreCase("void")) {
+      return VoidTransform.get();
+    }
+
     return new UnknownTransform<>(type, transform);
   }
 
@@ -178,4 +182,14 @@ public class Transforms {
   public static <T> Transform<T, T> truncate(Type type, int width) {
     return Truncate.get(type, width);
   }
+
+  /**
+   * Returns a {@link Transform} that always produces null.
+   *
+   * @param <T> Java type accepted by the transform.
+   * @return a transform that always produces null (the void transform).
+   */
+  public static <T> Transform<T, Void> alwaysNull() {
+    return VoidTransform.get();
+  }
 }
diff --git a/api/src/main/java/org/apache/iceberg/transforms/VoidTransform.java b/api/src/main/java/org/apache/iceberg/transforms/VoidTransform.java
new file mode 100644
index 0000000..d2ecbda
--- /dev/null
+++ b/api/src/main/java/org/apache/iceberg/transforms/VoidTransform.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.transforms;
+
+import java.io.ObjectStreamException;
+import org.apache.iceberg.expressions.BoundPredicate;
+import org.apache.iceberg.expressions.UnboundPredicate;
+import org.apache.iceberg.types.Type;
+
+class VoidTransform<S> implements Transform<S, Void> {
+  private static final VoidTransform<Object> INSTANCE = new VoidTransform<>();
+
+  @SuppressWarnings("unchecked")
+  static <T> VoidTransform<T> get() {
+    return (VoidTransform<T>) INSTANCE;
+  }
+
+  private VoidTransform() {
+  }
+
+  @Override
+  public Void apply(Object value) {
+    return null;
+  }
+
+  @Override
+  public boolean canTransform(Type type) {
+    return true;
+  }
+
+  @Override
+  public Type getResultType(Type sourceType) {
+    return sourceType;
+  }
+
+  @Override
+  public UnboundPredicate<Void> projectStrict(String name, BoundPredicate<S> predicate) {
+    return null;
+  }
+
+  @Override
+  public UnboundPredicate<Void> project(String name, BoundPredicate<S> predicate) {
+    return null;
+  }
+
+  @Override
+  public String toHumanString(Void value) {
+    return "null";
+  }
+
+  @Override
+  public String toString() {
+    return "void";
+  }
+
+  Object writeReplace() throws ObjectStreamException {
+    return SerializationProxies.VoidTransformProxy.get();
+  }
+}
diff --git a/api/src/test/java/org/apache/iceberg/PartitionSpecTestBase.java b/api/src/test/java/org/apache/iceberg/PartitionSpecTestBase.java
new file mode 100644
index 0000000..d8d6483
--- /dev/null
+++ b/api/src/test/java/org/apache/iceberg/PartitionSpecTestBase.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg;
+
+import org.apache.iceberg.types.Types;
+
+@SuppressWarnings("checkstyle:HideUtilityClassConstructor")
+public class PartitionSpecTestBase {
+  public static final Schema SCHEMA = new Schema(
+      Types.NestedField.required(1, "i", Types.IntegerType.get()),
+      Types.NestedField.required(2, "l", Types.LongType.get()),
+      Types.NestedField.required(3, "d", Types.DateType.get()),
+      Types.NestedField.required(4, "t", Types.TimeType.get()),
+      Types.NestedField.required(5, "ts", Types.TimestampType.withoutZone()),
+      Types.NestedField.required(6, "dec", Types.DecimalType.of(9, 2)),
+      Types.NestedField.required(7, "s", Types.StringType.get()),
+      Types.NestedField.required(8, "u", Types.UUIDType.get()),
+      Types.NestedField.required(9, "f", Types.FixedType.ofLength(3)),
+      Types.NestedField.required(10, "b", Types.BinaryType.get())
+  );
+
+  // a spec with all of the allowed transform/type pairs
+  public static final PartitionSpec[] SPECS = new PartitionSpec[] {
+      PartitionSpec.builderFor(SCHEMA).identity("i").build(),
+      PartitionSpec.builderFor(SCHEMA).identity("l").build(),
+      PartitionSpec.builderFor(SCHEMA).identity("d").build(),
+      PartitionSpec.builderFor(SCHEMA).identity("t").build(),
+      PartitionSpec.builderFor(SCHEMA).identity("ts").build(),
+      PartitionSpec.builderFor(SCHEMA).identity("dec").build(),
+      PartitionSpec.builderFor(SCHEMA).identity("s").build(),
+      PartitionSpec.builderFor(SCHEMA).identity("u").build(),
+      PartitionSpec.builderFor(SCHEMA).identity("f").build(),
+      PartitionSpec.builderFor(SCHEMA).identity("b").build(),
+      PartitionSpec.builderFor(SCHEMA).bucket("i", 128).build(),
+      PartitionSpec.builderFor(SCHEMA).bucket("l", 128).build(),
+      PartitionSpec.builderFor(SCHEMA).bucket("d", 128).build(),
+      PartitionSpec.builderFor(SCHEMA).bucket("t", 128).build(),
+      PartitionSpec.builderFor(SCHEMA).bucket("ts", 128).build(),
+      PartitionSpec.builderFor(SCHEMA).bucket("dec", 128).build(),
+      PartitionSpec.builderFor(SCHEMA).bucket("s", 128).build(),
+      PartitionSpec.builderFor(SCHEMA).bucket("u", 128).build(),
+      PartitionSpec.builderFor(SCHEMA).bucket("f", 128).build(),
+      PartitionSpec.builderFor(SCHEMA).bucket("b", 128).build(),
+      PartitionSpec.builderFor(SCHEMA).year("d").build(),
+      PartitionSpec.builderFor(SCHEMA).month("d").build(),
+      PartitionSpec.builderFor(SCHEMA).day("d").build(),
+      PartitionSpec.builderFor(SCHEMA).year("ts").build(),
+      PartitionSpec.builderFor(SCHEMA).month("ts").build(),
+      PartitionSpec.builderFor(SCHEMA).day("ts").build(),
+      PartitionSpec.builderFor(SCHEMA).hour("ts").build(),
+      PartitionSpec.builderFor(SCHEMA).truncate("i", 10).build(),
+      PartitionSpec.builderFor(SCHEMA).truncate("l", 10).build(),
+      PartitionSpec.builderFor(SCHEMA).truncate("dec", 10).build(),
+      PartitionSpec.builderFor(SCHEMA).truncate("s", 10).build(),
+      PartitionSpec.builderFor(SCHEMA).add(6, "dec_unsupported", "unsupported").build(),
+      PartitionSpec.builderFor(SCHEMA).add(6, 1111, "dec_unsupported", "unsupported").build(),
+      PartitionSpec.builderFor(SCHEMA).alwaysNull("ts").build(),
+  };
+}
diff --git a/api/src/test/java/org/apache/iceberg/TestTransformSerialization.java b/api/src/test/java/org/apache/iceberg/TestTransformSerialization.java
index cb4616c..74f4ea8 100644
--- a/api/src/test/java/org/apache/iceberg/TestTransformSerialization.java
+++ b/api/src/test/java/org/apache/iceberg/TestTransformSerialization.java
@@ -19,64 +19,13 @@
 
 package org.apache.iceberg;
 
-import org.apache.iceberg.types.Types;
 import org.junit.Assert;
 import org.junit.Test;
 
-public class TestTransformSerialization {
+public class TestTransformSerialization extends PartitionSpecTestBase {
   @Test
   public void testTransforms() throws Exception {
-    Schema schema = new Schema(
-        Types.NestedField.required(1, "i", Types.IntegerType.get()),
-        Types.NestedField.required(2, "l", Types.LongType.get()),
-        Types.NestedField.required(3, "d", Types.DateType.get()),
-        Types.NestedField.required(4, "t", Types.TimeType.get()),
-        Types.NestedField.required(5, "ts", Types.TimestampType.withoutZone()),
-        Types.NestedField.required(6, "dec", Types.DecimalType.of(9, 2)),
-        Types.NestedField.required(7, "s", Types.StringType.get()),
-        Types.NestedField.required(8, "u", Types.UUIDType.get()),
-        Types.NestedField.required(9, "f", Types.FixedType.ofLength(3)),
-        Types.NestedField.required(10, "b", Types.BinaryType.get())
-    );
-
-    // a spec with all of the allowed transform/type pairs
-    PartitionSpec[] specs = new PartitionSpec[] {
-        PartitionSpec.builderFor(schema).identity("i").build(),
-        PartitionSpec.builderFor(schema).identity("l").build(),
-        PartitionSpec.builderFor(schema).identity("d").build(),
-        PartitionSpec.builderFor(schema).identity("t").build(),
-        PartitionSpec.builderFor(schema).identity("ts").build(),
-        PartitionSpec.builderFor(schema).identity("dec").build(),
-        PartitionSpec.builderFor(schema).identity("s").build(),
-        PartitionSpec.builderFor(schema).identity("u").build(),
-        PartitionSpec.builderFor(schema).identity("f").build(),
-        PartitionSpec.builderFor(schema).identity("b").build(),
-        PartitionSpec.builderFor(schema).bucket("i", 128).build(),
-        PartitionSpec.builderFor(schema).bucket("l", 128).build(),
-        PartitionSpec.builderFor(schema).bucket("d", 128).build(),
-        PartitionSpec.builderFor(schema).bucket("t", 128).build(),
-        PartitionSpec.builderFor(schema).bucket("ts", 128).build(),
-        PartitionSpec.builderFor(schema).bucket("dec", 128).build(),
-        PartitionSpec.builderFor(schema).bucket("s", 128).build(),
-        PartitionSpec.builderFor(schema).bucket("u", 128).build(),
-        PartitionSpec.builderFor(schema).bucket("f", 128).build(),
-        PartitionSpec.builderFor(schema).bucket("b", 128).build(),
-        PartitionSpec.builderFor(schema).year("d").build(),
-        PartitionSpec.builderFor(schema).month("d").build(),
-        PartitionSpec.builderFor(schema).day("d").build(),
-        PartitionSpec.builderFor(schema).year("ts").build(),
-        PartitionSpec.builderFor(schema).month("ts").build(),
-        PartitionSpec.builderFor(schema).day("ts").build(),
-        PartitionSpec.builderFor(schema).hour("ts").build(),
-        PartitionSpec.builderFor(schema).truncate("i", 10).build(),
-        PartitionSpec.builderFor(schema).truncate("l", 10).build(),
-        PartitionSpec.builderFor(schema).truncate("dec", 10).build(),
-        PartitionSpec.builderFor(schema).truncate("s", 10).build(),
-        PartitionSpec.builderFor(schema).add(6, "dec_unsupported", "unsupported").build(),
-        PartitionSpec.builderFor(schema).add(6, 1111, "dec_unsupported", "unsupported").build(),
-    };
-
-    for (PartitionSpec spec : specs) {
+    for (PartitionSpec spec : SPECS) {
       Assert.assertEquals("Deserialization should produce equal partition spec",
           spec, TestHelpers.roundTripSerialize(spec));
     }
diff --git a/core/src/test/java/org/apache/iceberg/TestPartitionSpecParser.java b/core/src/test/java/org/apache/iceberg/TestPartitionSpecParser.java
index dea0d37..c31f894 100644
--- a/core/src/test/java/org/apache/iceberg/TestPartitionSpecParser.java
+++ b/core/src/test/java/org/apache/iceberg/TestPartitionSpecParser.java
@@ -23,7 +23,6 @@ import org.junit.Assert;
 import org.junit.Test;
 
 public class TestPartitionSpecParser extends TableTestBase {
-
   @Test
   public void testToJsonForV1Table() {
     String expected = "{\n" +
@@ -108,4 +107,16 @@ public class TestPartitionSpecParser extends TableTestBase {
     Assert.assertEquals(1000, spec.fields().get(0).fieldId());
     Assert.assertEquals(1001, spec.fields().get(1).fieldId());
   }
+
+  @Test
+  public void testTransforms() {
+    for (PartitionSpec spec : PartitionSpecTestBase.SPECS) {
+      Assert.assertEquals("To/from JSON should produce equal partition spec",
+          spec, roundTripJSON(spec));
+    }
+  }
+
+  private static PartitionSpec roundTripJSON(PartitionSpec spec) {
+    return PartitionSpecParser.fromJson(PartitionSpecTestBase.SCHEMA, PartitionSpecParser.toJson(spec));
+  }
 }