You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iceberg.apache.org by bl...@apache.org on 2019/09/27 16:54:04 UTC
[incubator-iceberg] branch master updated: Add custom field names
to PartitionSpec builder (#498)
This is an automated email from the ASF dual-hosted git repository.
blue pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-iceberg.git
The following commit(s) were added to refs/heads/master by this push:
new 4bb65c1 Add custom field names to PartitionSpec builder (#498)
4bb65c1 is described below
commit 4bb65c18e9dd89698891be99cd9ac0efab85cf73
Author: Gautam <ga...@gmail.com>
AuthorDate: Fri Sep 27 22:23:59 2019 +0530
Add custom field names to PartitionSpec builder (#498)
---
.../java/org/apache/iceberg/PartitionSpec.java | 93 +++++++++++++++-------
.../iceberg/TestPartitionSpecValidation.java | 62 ++++++++++++++-
2 files changed, 126 insertions(+), 29 deletions(-)
diff --git a/api/src/main/java/org/apache/iceberg/PartitionSpec.java b/api/src/main/java/org/apache/iceberg/PartitionSpec.java
index 1e4c727..9f1bfc0 100644
--- a/api/src/main/java/org/apache/iceberg/PartitionSpec.java
+++ b/api/src/main/java/org/apache/iceberg/PartitionSpec.java
@@ -332,6 +332,21 @@ public class PartitionSpec implements Serializable {
}
private void checkAndAddPartitionName(String name) {
+ checkAndAddPartitionName(name, null);
+ }
+
+ private void checkAndAddPartitionName(String name, Integer identitySourceColumnId) {
+ Types.NestedField schemaField = schema.findField(name);
+ if (identitySourceColumnId != null) {
+ // for identity transform case we allow conflicts between partition and schema field name as
+ // long as they are sourced from the same schema field
+ Preconditions.checkArgument(schemaField == null || schemaField.fieldId() == identitySourceColumnId,
+ "Cannot create identity partition sourced from different field in schema: %s", name);
+ } else {
+ // for all other transforms we don't allow conflicts between partition name and schema field name
+ Preconditions.checkArgument(schemaField == null,
+ "Cannot create partition from name that exists in schema: %s", name);
+ }
Preconditions.checkArgument(name != null && !name.isEmpty(),
"Cannot use empty or null partition name: %s", name);
Preconditions.checkArgument(!partitionNames.contains(name),
@@ -357,79 +372,101 @@ public class PartitionSpec implements Serializable {
return sourceColumn;
}
- public Builder identity(String sourceName) {
- checkAndAddPartitionName(sourceName);
+ Builder identity(String sourceName, String targetName) {
Types.NestedField sourceColumn = findSourceColumn(sourceName);
+ checkAndAddPartitionName(targetName, sourceColumn.fieldId());
fields.add(new PartitionField(
- sourceColumn.fieldId(), sourceName, Transforms.identity(sourceColumn.type())));
+ sourceColumn.fieldId(), targetName, Transforms.identity(sourceColumn.type())));
return this;
}
- public Builder year(String sourceName) {
- String name = sourceName + "_year";
- checkAndAddPartitionName(name);
+ public Builder identity(String sourceName) {
+ return identity(sourceName, sourceName);
+ }
+
+ public Builder year(String sourceName, String targetName) {
+ checkAndAddPartitionName(targetName);
Types.NestedField sourceColumn = findSourceColumn(sourceName);
PartitionField field = new PartitionField(
- sourceColumn.fieldId(), name, Transforms.year(sourceColumn.type()));
+ sourceColumn.fieldId(), targetName, Transforms.year(sourceColumn.type()));
checkForRedundantPartitions(field);
fields.add(field);
return this;
}
- public Builder month(String sourceName) {
- String name = sourceName + "_month";
- checkAndAddPartitionName(name);
+ public Builder year(String sourceName) {
+ return year(sourceName, sourceName + "_year");
+ }
+
+ public Builder month(String sourceName, String targetName) {
+ checkAndAddPartitionName(targetName);
Types.NestedField sourceColumn = findSourceColumn(sourceName);
PartitionField field = new PartitionField(
- sourceColumn.fieldId(), name, Transforms.month(sourceColumn.type()));
+ sourceColumn.fieldId(), targetName, Transforms.month(sourceColumn.type()));
checkForRedundantPartitions(field);
fields.add(field);
return this;
}
- public Builder day(String sourceName) {
- String name = sourceName + "_day";
- checkAndAddPartitionName(name);
+ public Builder month(String sourceName) {
+ return month(sourceName, sourceName + "_month");
+ }
+
+ public Builder day(String sourceName, String targetName) {
+ checkAndAddPartitionName(targetName);
Types.NestedField sourceColumn = findSourceColumn(sourceName);
PartitionField field = new PartitionField(
- sourceColumn.fieldId(), name, Transforms.day(sourceColumn.type()));
+ sourceColumn.fieldId(), targetName, Transforms.day(sourceColumn.type()));
checkForRedundantPartitions(field);
fields.add(field);
return this;
}
- public Builder hour(String sourceName) {
- String name = sourceName + "_hour";
- checkAndAddPartitionName(name);
+ public Builder day(String sourceName) {
+ return day(sourceName, sourceName + "_day");
+ }
+
+ public Builder hour(String sourceName, String targetName) {
+ checkAndAddPartitionName(targetName);
Types.NestedField sourceColumn = findSourceColumn(sourceName);
PartitionField field = new PartitionField(
- sourceColumn.fieldId(), name, Transforms.hour(sourceColumn.type()));
+ sourceColumn.fieldId(), targetName, Transforms.hour(sourceColumn.type()));
checkForRedundantPartitions(field);
fields.add(field);
return this;
}
- public Builder bucket(String sourceName, int numBuckets) {
- String name = sourceName + "_bucket";
- checkAndAddPartitionName(name);
+ public Builder hour(String sourceName) {
+ return hour(sourceName, sourceName + "_hour");
+ }
+
+ public Builder bucket(String sourceName, int numBuckets, String targetName) {
+ checkAndAddPartitionName(targetName);
Types.NestedField sourceColumn = findSourceColumn(sourceName);
fields.add(new PartitionField(
- sourceColumn.fieldId(), name, Transforms.bucket(sourceColumn.type(), numBuckets)));
+ sourceColumn.fieldId(), targetName, Transforms.bucket(sourceColumn.type(), numBuckets)));
return this;
}
- public Builder truncate(String sourceName, int width) {
- String name = sourceName + "_trunc";
- checkAndAddPartitionName(name);
+ public Builder bucket(String sourceName, int numBuckets) {
+ return bucket(sourceName, numBuckets, sourceName + "_bucket");
+ }
+
+ public Builder truncate(String sourceName, int width, String targetName) {
+ checkAndAddPartitionName(targetName);
Types.NestedField sourceColumn = findSourceColumn(sourceName);
fields.add(new PartitionField(
- sourceColumn.fieldId(), name, Transforms.truncate(sourceColumn.type(), width)));
+ sourceColumn.fieldId(), targetName, Transforms.truncate(sourceColumn.type(), width)));
return this;
}
+ public Builder truncate(String sourceName, int width) {
+ return truncate(sourceName, width, sourceName + "_trunc");
+ }
+
Builder add(int sourceId, String name, String transform) {
- checkAndAddPartitionName(name);
Types.NestedField column = schema.findField(sourceId);
+ checkAndAddPartitionName(name, column.fieldId());
Preconditions.checkNotNull(column, "Cannot find source column: %d", sourceId);
fields.add(new PartitionField(sourceId, name, Transforms.fromString(column.type(), transform)));
return this;
diff --git a/api/src/test/java/org/apache/iceberg/TestPartitionSpecValidation.java b/api/src/test/java/org/apache/iceberg/TestPartitionSpecValidation.java
index 94644b8..48e2d75 100644
--- a/api/src/test/java/org/apache/iceberg/TestPartitionSpecValidation.java
+++ b/api/src/test/java/org/apache/iceberg/TestPartitionSpecValidation.java
@@ -21,6 +21,7 @@ package org.apache.iceberg;
import org.apache.iceberg.types.Types;
import org.apache.iceberg.types.Types.NestedField;
+import org.junit.Assert;
import org.junit.Test;
public class TestPartitionSpecValidation {
@@ -29,7 +30,8 @@ public class TestPartitionSpecValidation {
NestedField.required(2, "ts", Types.TimestampType.withZone()),
NestedField.required(3, "another_ts", Types.TimestampType.withZone()),
NestedField.required(4, "d", Types.TimestampType.withZone()),
- NestedField.required(5, "another_d", Types.TimestampType.withZone())
+ NestedField.required(5, "another_d", Types.TimestampType.withZone()),
+ NestedField.required(6, "s", Types.StringType.get())
);
@Test
@@ -121,6 +123,64 @@ public class TestPartitionSpecValidation {
PartitionSpec.builderFor(SCHEMA).hour("d").hour("another_d").build();
}
+
+ @Test
+ public void testSettingPartitionTransformsWithCustomTargetNames() {
+ Assert.assertEquals(PartitionSpec.builderFor(SCHEMA).year("ts", "custom_year")
+ .build().fields().get(0).name(), "custom_year");
+ Assert.assertEquals(PartitionSpec.builderFor(SCHEMA).month("ts", "custom_month")
+ .build().fields().get(0).name(), "custom_month");
+ Assert.assertEquals(PartitionSpec.builderFor(SCHEMA).day("ts", "custom_day")
+ .build().fields().get(0).name(), "custom_day");
+ Assert.assertEquals(PartitionSpec.builderFor(SCHEMA).hour("ts", "custom_hour")
+ .build().fields().get(0).name(), "custom_hour");
+ Assert.assertEquals(PartitionSpec.builderFor(SCHEMA)
+ .bucket("ts", 4, "custom_bucket")
+ .build().fields().get(0).name(), "custom_bucket");
+ Assert.assertEquals(PartitionSpec.builderFor(SCHEMA)
+ .truncate("s", 1, "custom_truncate")
+ .build().fields().get(0).name(), "custom_truncate");
+ }
+
+ @Test
+ public void testSettingPartitionTransformsWithCustomTargetNamesThatAlreadyExist() {
+
+ AssertHelpers.assertThrows("Should not allow target column name that exists in schema",
+ IllegalArgumentException.class,
+ "Cannot create partition from name that exists in schema: another_ts",
+ () -> PartitionSpec.builderFor(SCHEMA).year("ts", "another_ts"));
+
+ AssertHelpers.assertThrows("Should not allow target column name that exists in schema",
+ IllegalArgumentException.class,
+ "Cannot create partition from name that exists in schema: another_ts",
+ () -> PartitionSpec.builderFor(SCHEMA).month("ts", "another_ts"));
+
+ AssertHelpers.assertThrows("Should not allow target column name that exists in schema",
+ IllegalArgumentException.class,
+ "Cannot create partition from name that exists in schema: another_ts",
+ () -> PartitionSpec.builderFor(SCHEMA).day("ts", "another_ts"));
+
+ AssertHelpers.assertThrows("Should not allow target column name that exists in schema",
+ IllegalArgumentException.class,
+ "Cannot create partition from name that exists in schema: another_ts",
+ () -> PartitionSpec.builderFor(SCHEMA).hour("ts", "another_ts"));
+
+ AssertHelpers.assertThrows("Should not allow target column name that exists in schema",
+ IllegalArgumentException.class,
+ "Cannot create partition from name that exists in schema: another_ts",
+ () -> PartitionSpec.builderFor(SCHEMA).truncate("ts", 2, "another_ts"));
+
+ AssertHelpers.assertThrows("Should not allow target column name that exists in schema",
+ IllegalArgumentException.class,
+ "Cannot create partition from name that exists in schema: another_ts",
+ () -> PartitionSpec.builderFor(SCHEMA).bucket("ts", 4, "another_ts"));
+
+ AssertHelpers.assertThrows("Should not allow target column name sourced from a different column",
+ IllegalArgumentException.class,
+ "Cannot create identity partition sourced from different field in schema: another_ts",
+ () -> PartitionSpec.builderFor(SCHEMA).identity("ts", "another_ts"));
+ }
+
@Test
public void testMissingSourceColumn() {
AssertHelpers.assertThrows("Should detect missing source column",