You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iceberg.apache.org by bl...@apache.org on 2019/09/27 16:54:04 UTC

[incubator-iceberg] branch master updated: Add custom field names to PartitionSpec builder (#498)

This is an automated email from the ASF dual-hosted git repository.

blue pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-iceberg.git


The following commit(s) were added to refs/heads/master by this push:
     new 4bb65c1  Add custom field names to PartitionSpec builder (#498)
4bb65c1 is described below

commit 4bb65c18e9dd89698891be99cd9ac0efab85cf73
Author: Gautam <ga...@gmail.com>
AuthorDate: Fri Sep 27 22:23:59 2019 +0530

    Add custom field names to PartitionSpec builder (#498)
---
 .../java/org/apache/iceberg/PartitionSpec.java     | 93 +++++++++++++++-------
 .../iceberg/TestPartitionSpecValidation.java       | 62 ++++++++++++++-
 2 files changed, 126 insertions(+), 29 deletions(-)

diff --git a/api/src/main/java/org/apache/iceberg/PartitionSpec.java b/api/src/main/java/org/apache/iceberg/PartitionSpec.java
index 1e4c727..9f1bfc0 100644
--- a/api/src/main/java/org/apache/iceberg/PartitionSpec.java
+++ b/api/src/main/java/org/apache/iceberg/PartitionSpec.java
@@ -332,6 +332,21 @@ public class PartitionSpec implements Serializable {
     }
 
     private void checkAndAddPartitionName(String name) {
+      checkAndAddPartitionName(name, null);
+    }
+
+    private void checkAndAddPartitionName(String name, Integer identitySourceColumnId) {
+      Types.NestedField schemaField = schema.findField(name);
+      if (identitySourceColumnId != null) {
+        // for identity transform case we allow  conflicts between partition and schema field name as
+        //   long as they are sourced from the same schema field
+        Preconditions.checkArgument(schemaField == null || schemaField.fieldId() == identitySourceColumnId,
+            "Cannot create identity partition sourced from different field in schema: %s", name);
+      } else {
+        // for all other transforms we don't allow conflicts between partition name and schema field name
+        Preconditions.checkArgument(schemaField == null,
+            "Cannot create partition from name that exists in schema: %s", name);
+      }
       Preconditions.checkArgument(name != null && !name.isEmpty(),
           "Cannot use empty or null partition name: %s", name);
       Preconditions.checkArgument(!partitionNames.contains(name),
@@ -357,79 +372,101 @@ public class PartitionSpec implements Serializable {
       return sourceColumn;
     }
 
-    public Builder identity(String sourceName) {
-      checkAndAddPartitionName(sourceName);
+    Builder identity(String sourceName, String targetName) {
       Types.NestedField sourceColumn = findSourceColumn(sourceName);
+      checkAndAddPartitionName(targetName, sourceColumn.fieldId());
       fields.add(new PartitionField(
-          sourceColumn.fieldId(), sourceName, Transforms.identity(sourceColumn.type())));
+          sourceColumn.fieldId(), targetName, Transforms.identity(sourceColumn.type())));
       return this;
     }
 
-    public Builder year(String sourceName) {
-      String name = sourceName + "_year";
-      checkAndAddPartitionName(name);
+    public Builder identity(String sourceName) {
+      return identity(sourceName, sourceName);
+    }
+
+    public Builder year(String sourceName, String targetName) {
+      checkAndAddPartitionName(targetName);
       Types.NestedField sourceColumn = findSourceColumn(sourceName);
       PartitionField field = new PartitionField(
-          sourceColumn.fieldId(), name, Transforms.year(sourceColumn.type()));
+          sourceColumn.fieldId(), targetName, Transforms.year(sourceColumn.type()));
       checkForRedundantPartitions(field);
       fields.add(field);
       return this;
     }
 
-    public Builder month(String sourceName) {
-      String name = sourceName + "_month";
-      checkAndAddPartitionName(name);
+    public Builder year(String sourceName) {
+      return year(sourceName, sourceName + "_year");
+    }
+
+    public Builder month(String sourceName, String targetName) {
+      checkAndAddPartitionName(targetName);
       Types.NestedField sourceColumn = findSourceColumn(sourceName);
       PartitionField field = new PartitionField(
-          sourceColumn.fieldId(), name, Transforms.month(sourceColumn.type()));
+          sourceColumn.fieldId(), targetName, Transforms.month(sourceColumn.type()));
       checkForRedundantPartitions(field);
       fields.add(field);
       return this;
     }
 
-    public Builder day(String sourceName) {
-      String name = sourceName + "_day";
-      checkAndAddPartitionName(name);
+    public Builder month(String sourceName) {
+      return month(sourceName, sourceName + "_month");
+    }
+
+    public Builder day(String sourceName, String targetName) {
+      checkAndAddPartitionName(targetName);
       Types.NestedField sourceColumn = findSourceColumn(sourceName);
       PartitionField field = new PartitionField(
-          sourceColumn.fieldId(), name, Transforms.day(sourceColumn.type()));
+          sourceColumn.fieldId(), targetName, Transforms.day(sourceColumn.type()));
       checkForRedundantPartitions(field);
       fields.add(field);
       return this;
     }
 
-    public Builder hour(String sourceName) {
-      String name = sourceName + "_hour";
-      checkAndAddPartitionName(name);
+    public Builder day(String sourceName) {
+      return day(sourceName, sourceName + "_day");
+    }
+
+    public Builder hour(String sourceName, String targetName) {
+      checkAndAddPartitionName(targetName);
       Types.NestedField sourceColumn = findSourceColumn(sourceName);
       PartitionField field = new PartitionField(
-          sourceColumn.fieldId(), name, Transforms.hour(sourceColumn.type()));
+          sourceColumn.fieldId(), targetName, Transforms.hour(sourceColumn.type()));
       checkForRedundantPartitions(field);
       fields.add(field);
       return this;
     }
 
-    public Builder bucket(String sourceName, int numBuckets) {
-      String name = sourceName + "_bucket";
-      checkAndAddPartitionName(name);
+    public Builder hour(String sourceName) {
+      return hour(sourceName, sourceName + "_hour");
+    }
+
+    public Builder bucket(String sourceName, int numBuckets, String targetName) {
+      checkAndAddPartitionName(targetName);
       Types.NestedField sourceColumn = findSourceColumn(sourceName);
       fields.add(new PartitionField(
-          sourceColumn.fieldId(), name, Transforms.bucket(sourceColumn.type(), numBuckets)));
+          sourceColumn.fieldId(), targetName, Transforms.bucket(sourceColumn.type(), numBuckets)));
       return this;
     }
 
-    public Builder truncate(String sourceName, int width) {
-      String name = sourceName + "_trunc";
-      checkAndAddPartitionName(name);
+    public Builder bucket(String sourceName, int numBuckets) {
+      return bucket(sourceName, numBuckets, sourceName + "_bucket");
+    }
+
+    public Builder truncate(String sourceName, int width, String targetName) {
+      checkAndAddPartitionName(targetName);
       Types.NestedField sourceColumn = findSourceColumn(sourceName);
       fields.add(new PartitionField(
-          sourceColumn.fieldId(), name, Transforms.truncate(sourceColumn.type(), width)));
+          sourceColumn.fieldId(), targetName, Transforms.truncate(sourceColumn.type(), width)));
       return this;
     }
 
+    public Builder truncate(String sourceName, int width) {
+      return truncate(sourceName, width, sourceName + "_trunc");
+    }
+
     Builder add(int sourceId, String name, String transform) {
-      checkAndAddPartitionName(name);
       Types.NestedField column = schema.findField(sourceId);
+      checkAndAddPartitionName(name, column.fieldId());
       Preconditions.checkNotNull(column, "Cannot find source column: %d", sourceId);
       fields.add(new PartitionField(sourceId, name, Transforms.fromString(column.type(), transform)));
       return this;
diff --git a/api/src/test/java/org/apache/iceberg/TestPartitionSpecValidation.java b/api/src/test/java/org/apache/iceberg/TestPartitionSpecValidation.java
index 94644b8..48e2d75 100644
--- a/api/src/test/java/org/apache/iceberg/TestPartitionSpecValidation.java
+++ b/api/src/test/java/org/apache/iceberg/TestPartitionSpecValidation.java
@@ -21,6 +21,7 @@ package org.apache.iceberg;
 
 import org.apache.iceberg.types.Types;
 import org.apache.iceberg.types.Types.NestedField;
+import org.junit.Assert;
 import org.junit.Test;
 
 public class TestPartitionSpecValidation {
@@ -29,7 +30,8 @@ public class TestPartitionSpecValidation {
       NestedField.required(2, "ts", Types.TimestampType.withZone()),
       NestedField.required(3, "another_ts", Types.TimestampType.withZone()),
       NestedField.required(4, "d", Types.TimestampType.withZone()),
-      NestedField.required(5, "another_d", Types.TimestampType.withZone())
+      NestedField.required(5, "another_d", Types.TimestampType.withZone()),
+      NestedField.required(6, "s", Types.StringType.get())
   );
 
   @Test
@@ -121,6 +123,64 @@ public class TestPartitionSpecValidation {
     PartitionSpec.builderFor(SCHEMA).hour("d").hour("another_d").build();
   }
 
+
+  @Test
+  public void testSettingPartitionTransformsWithCustomTargetNames() {
+    Assert.assertEquals(PartitionSpec.builderFor(SCHEMA).year("ts", "custom_year")
+        .build().fields().get(0).name(), "custom_year");
+    Assert.assertEquals(PartitionSpec.builderFor(SCHEMA).month("ts", "custom_month")
+        .build().fields().get(0).name(), "custom_month");
+    Assert.assertEquals(PartitionSpec.builderFor(SCHEMA).day("ts", "custom_day")
+        .build().fields().get(0).name(), "custom_day");
+    Assert.assertEquals(PartitionSpec.builderFor(SCHEMA).hour("ts", "custom_hour")
+        .build().fields().get(0).name(), "custom_hour");
+    Assert.assertEquals(PartitionSpec.builderFor(SCHEMA)
+        .bucket("ts", 4, "custom_bucket")
+        .build().fields().get(0).name(), "custom_bucket");
+    Assert.assertEquals(PartitionSpec.builderFor(SCHEMA)
+        .truncate("s", 1, "custom_truncate")
+        .build().fields().get(0).name(), "custom_truncate");
+  }
+
+  @Test
+  public void testSettingPartitionTransformsWithCustomTargetNamesThatAlreadyExist() {
+
+    AssertHelpers.assertThrows("Should not allow target column name that exists in schema",
+        IllegalArgumentException.class,
+        "Cannot create partition from name that exists in schema: another_ts",
+        () -> PartitionSpec.builderFor(SCHEMA).year("ts", "another_ts"));
+
+    AssertHelpers.assertThrows("Should not allow target column name that exists in schema",
+        IllegalArgumentException.class,
+        "Cannot create partition from name that exists in schema: another_ts",
+        () -> PartitionSpec.builderFor(SCHEMA).month("ts", "another_ts"));
+
+    AssertHelpers.assertThrows("Should not allow target column name that exists in schema",
+        IllegalArgumentException.class,
+        "Cannot create partition from name that exists in schema: another_ts",
+        () -> PartitionSpec.builderFor(SCHEMA).day("ts", "another_ts"));
+
+    AssertHelpers.assertThrows("Should not allow target column name that exists in schema",
+        IllegalArgumentException.class,
+        "Cannot create partition from name that exists in schema: another_ts",
+        () -> PartitionSpec.builderFor(SCHEMA).hour("ts", "another_ts"));
+
+    AssertHelpers.assertThrows("Should not allow target column name that exists in schema",
+        IllegalArgumentException.class,
+        "Cannot create partition from name that exists in schema: another_ts",
+        () -> PartitionSpec.builderFor(SCHEMA).truncate("ts", 2, "another_ts"));
+
+    AssertHelpers.assertThrows("Should not allow target column name that exists in schema",
+        IllegalArgumentException.class,
+        "Cannot create partition from name that exists in schema: another_ts",
+        () -> PartitionSpec.builderFor(SCHEMA).bucket("ts", 4, "another_ts"));
+
+    AssertHelpers.assertThrows("Should not allow target column name sourced from a different column",
+        IllegalArgumentException.class,
+        "Cannot create identity partition sourced from different field in schema: another_ts",
+        () -> PartitionSpec.builderFor(SCHEMA).identity("ts", "another_ts"));
+  }
+
   @Test
   public void testMissingSourceColumn() {
     AssertHelpers.assertThrows("Should detect missing source column",