You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iceberg.apache.org by bl...@apache.org on 2022/11/20 20:02:45 UTC

[iceberg] branch master updated: API: Fix Transform backward compatibility in PartitionSpec (#6220)

This is an automated email from the ASF dual-hosted git repository.

blue pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git


The following commit(s) were added to refs/heads/master by this push:
     new 29187353e1 API: Fix Transform backward compatibility in PartitionSpec (#6220)
29187353e1 is described below

commit 29187353e18a0ec477f638a00353340f24fb704e
Author: Fokko Driesprong <fo...@apache.org>
AuthorDate: Sun Nov 20 21:02:40 2022 +0100

    API: Fix Transform backward compatibility in PartitionSpec (#6220)
---
 .../java/org/apache/iceberg/PartitionField.java    |  2 +-
 .../java/org/apache/iceberg/PartitionSpec.java     | 39 ++++++++++++++++++----
 .../main/java/org/apache/iceberg/SortField.java    |  2 +-
 .../org/apache/iceberg/UnboundPartitionSpec.java   | 12 +++++--
 .../apache/iceberg/transforms/ProjectionUtil.java  |  4 ++-
 .../org/apache/iceberg/util/SortOrderUtil.java     | 18 +++++-----
 6 files changed, 55 insertions(+), 22 deletions(-)

diff --git a/api/src/main/java/org/apache/iceberg/PartitionField.java b/api/src/main/java/org/apache/iceberg/PartitionField.java
index 5956e01d7b..3ed765a898 100644
--- a/api/src/main/java/org/apache/iceberg/PartitionField.java
+++ b/api/src/main/java/org/apache/iceberg/PartitionField.java
@@ -73,7 +73,7 @@ public class PartitionField implements Serializable {
     return sourceId == that.sourceId
         && fieldId == that.fieldId
         && name.equals(that.name)
-        && transform.equals(that.transform);
+        && transform.toString().equals(that.transform.toString());
   }
 
   @Override
diff --git a/api/src/main/java/org/apache/iceberg/PartitionSpec.java b/api/src/main/java/org/apache/iceberg/PartitionSpec.java
index 8474ca154a..aa6a6051e5 100644
--- a/api/src/main/java/org/apache/iceberg/PartitionSpec.java
+++ b/api/src/main/java/org/apache/iceberg/PartitionSpec.java
@@ -419,7 +419,10 @@ public class PartitionSpec implements Serializable {
       checkAndAddPartitionName(targetName, sourceColumn.fieldId());
       PartitionField field =
           new PartitionField(
-              sourceColumn.fieldId(), nextFieldId(), targetName, Transforms.identity());
+              sourceColumn.fieldId(),
+              nextFieldId(),
+              targetName,
+              Transforms.identity(sourceColumn.type()));
       checkForRedundantPartitions(field);
       fields.add(field);
       return this;
@@ -433,7 +436,11 @@ public class PartitionSpec implements Serializable {
       checkAndAddPartitionName(targetName);
       Types.NestedField sourceColumn = findSourceColumn(sourceName);
       PartitionField field =
-          new PartitionField(sourceColumn.fieldId(), nextFieldId(), targetName, Transforms.year());
+          new PartitionField(
+              sourceColumn.fieldId(),
+              nextFieldId(),
+              targetName,
+              Transforms.year(sourceColumn.type()));
       checkForRedundantPartitions(field);
       fields.add(field);
       return this;
@@ -447,7 +454,11 @@ public class PartitionSpec implements Serializable {
       checkAndAddPartitionName(targetName);
       Types.NestedField sourceColumn = findSourceColumn(sourceName);
       PartitionField field =
-          new PartitionField(sourceColumn.fieldId(), nextFieldId(), targetName, Transforms.month());
+          new PartitionField(
+              sourceColumn.fieldId(),
+              nextFieldId(),
+              targetName,
+              Transforms.month(sourceColumn.type()));
       checkForRedundantPartitions(field);
       fields.add(field);
       return this;
@@ -461,7 +472,11 @@ public class PartitionSpec implements Serializable {
       checkAndAddPartitionName(targetName);
       Types.NestedField sourceColumn = findSourceColumn(sourceName);
       PartitionField field =
-          new PartitionField(sourceColumn.fieldId(), nextFieldId(), targetName, Transforms.day());
+          new PartitionField(
+              sourceColumn.fieldId(),
+              nextFieldId(),
+              targetName,
+              Transforms.day(sourceColumn.type()));
       checkForRedundantPartitions(field);
       fields.add(field);
       return this;
@@ -475,7 +490,11 @@ public class PartitionSpec implements Serializable {
       checkAndAddPartitionName(targetName);
       Types.NestedField sourceColumn = findSourceColumn(sourceName);
       PartitionField field =
-          new PartitionField(sourceColumn.fieldId(), nextFieldId(), targetName, Transforms.hour());
+          new PartitionField(
+              sourceColumn.fieldId(),
+              nextFieldId(),
+              targetName,
+              Transforms.hour(sourceColumn.type()));
       checkForRedundantPartitions(field);
       fields.add(field);
       return this;
@@ -490,7 +509,10 @@ public class PartitionSpec implements Serializable {
       Types.NestedField sourceColumn = findSourceColumn(sourceName);
       fields.add(
           new PartitionField(
-              sourceColumn.fieldId(), nextFieldId(), targetName, Transforms.bucket(numBuckets)));
+              sourceColumn.fieldId(),
+              nextFieldId(),
+              targetName,
+              Transforms.bucket(sourceColumn.type(), numBuckets)));
       return this;
     }
 
@@ -503,7 +525,10 @@ public class PartitionSpec implements Serializable {
       Types.NestedField sourceColumn = findSourceColumn(sourceName);
       fields.add(
           new PartitionField(
-              sourceColumn.fieldId(), nextFieldId(), targetName, Transforms.truncate(width)));
+              sourceColumn.fieldId(),
+              nextFieldId(),
+              targetName,
+              Transforms.truncate(sourceColumn.type(), width)));
       return this;
     }
 
diff --git a/api/src/main/java/org/apache/iceberg/SortField.java b/api/src/main/java/org/apache/iceberg/SortField.java
index 91a82b2bd6..d7f110a26e 100644
--- a/api/src/main/java/org/apache/iceberg/SortField.java
+++ b/api/src/main/java/org/apache/iceberg/SortField.java
@@ -96,7 +96,7 @@ public class SortField implements Serializable {
     }
 
     SortField that = (SortField) other;
-    return transform.equals(that.transform)
+    return transform.toString().equals(that.transform.toString())
         && sourceId == that.sourceId
         && direction == that.direction
         && nullOrder == that.nullOrder;
diff --git a/api/src/main/java/org/apache/iceberg/UnboundPartitionSpec.java b/api/src/main/java/org/apache/iceberg/UnboundPartitionSpec.java
index 27ed738882..cc8526f907 100644
--- a/api/src/main/java/org/apache/iceberg/UnboundPartitionSpec.java
+++ b/api/src/main/java/org/apache/iceberg/UnboundPartitionSpec.java
@@ -22,6 +22,7 @@ import java.util.List;
 import org.apache.iceberg.relocated.com.google.common.collect.Lists;
 import org.apache.iceberg.transforms.Transform;
 import org.apache.iceberg.transforms.Transforms;
+import org.apache.iceberg.types.Type;
 
 public class UnboundPartitionSpec {
 
@@ -53,10 +54,17 @@ public class UnboundPartitionSpec {
     PartitionSpec.Builder builder = PartitionSpec.builderFor(schema).withSpecId(specId);
 
     for (UnboundPartitionField field : fields) {
+      Type fieldType = schema.findType(field.sourceId);
+      Transform<?, ?> transform;
+      if (fieldType != null) {
+        transform = Transforms.fromString(fieldType, field.transform.toString());
+      } else {
+        transform = Transforms.fromString(field.transform.toString());
+      }
       if (field.partitionId != null) {
-        builder.add(field.sourceId, field.partitionId, field.name, field.transform);
+        builder.add(field.sourceId, field.partitionId, field.name, transform);
       } else {
-        builder.add(field.sourceId, field.name, field.transform);
+        builder.add(field.sourceId, field.name, transform);
       }
     }
 
diff --git a/api/src/main/java/org/apache/iceberg/transforms/ProjectionUtil.java b/api/src/main/java/org/apache/iceberg/transforms/ProjectionUtil.java
index 732336d510..679f80a6f2 100644
--- a/api/src/main/java/org/apache/iceberg/transforms/ProjectionUtil.java
+++ b/api/src/main/java/org/apache/iceberg/transforms/ProjectionUtil.java
@@ -231,7 +231,9 @@ class ProjectionUtil {
   static <T> UnboundPredicate<T> projectTransformPredicate(
       Transform<?, T> transform, String partitionName, BoundPredicate<?> pred) {
     if (pred.term() instanceof BoundTransform
-        && transform.equals(((BoundTransform<?, ?>) pred.term()).transform())) {
+        && transform
+            .toString()
+            .equals(((BoundTransform<?, ?>) pred.term()).transform().toString())) {
       // the bound value must be a T because the transform matches
       return (UnboundPredicate<T>) removeTransform(partitionName, pred);
     }
diff --git a/core/src/main/java/org/apache/iceberg/util/SortOrderUtil.java b/core/src/main/java/org/apache/iceberg/util/SortOrderUtil.java
index 2edbe7fb28..37e0c1fffa 100644
--- a/core/src/main/java/org/apache/iceberg/util/SortOrderUtil.java
+++ b/core/src/main/java/org/apache/iceberg/util/SortOrderUtil.java
@@ -32,7 +32,6 @@ import org.apache.iceberg.Table;
 import org.apache.iceberg.expressions.Expressions;
 import org.apache.iceberg.relocated.com.google.common.collect.Maps;
 import org.apache.iceberg.transforms.SortOrderVisitor;
-import org.apache.iceberg.transforms.Transform;
 
 public class SortOrderUtil {
 
@@ -67,7 +66,7 @@ public class SortOrderUtil {
 
     // make a map of the partition fields that need to be included in the clustering produced by the
     // sort order
-    Map<Pair<Transform<?, ?>, Integer>, PartitionField> requiredClusteringFields =
+    Map<Pair<String, Integer>, PartitionField> requiredClusteringFields =
         requiredClusteringFields(spec);
 
     // remove any partition fields that are clustered by the sort order by iterating over a prefix
@@ -75,8 +74,8 @@ public class SortOrderUtil {
     // this will stop when a non-partition field is found, or when the sort field only satisfies the
     // partition field.
     for (SortField sortField : sortOrder.fields()) {
-      Pair<Transform<?, ?>, Integer> sourceAndTransform =
-          Pair.of(sortField.transform(), sortField.sourceId());
+      Pair<String, Integer> sourceAndTransform =
+          Pair.of(sortField.transform().toString(), sortField.sourceId());
       if (requiredClusteringFields.containsKey(sourceAndTransform)) {
         requiredClusteringFields.remove(sourceAndTransform);
         continue; // keep processing the prefix
@@ -87,7 +86,7 @@ public class SortOrderUtil {
       for (PartitionField field : spec.fields()) {
         if (sortField.sourceId() == field.sourceId()
             && sortField.transform().satisfiesOrderOf(field.transform())) {
-          requiredClusteringFields.remove(Pair.of(field.transform(), field.sourceId()));
+          requiredClusteringFields.remove(Pair.of(field.transform().toString(), field.sourceId()));
         }
       }
 
@@ -107,14 +106,13 @@ public class SortOrderUtil {
     return builder.build();
   }
 
-  private static Map<Pair<Transform<?, ?>, Integer>, PartitionField> requiredClusteringFields(
+  private static Map<Pair<String, Integer>, PartitionField> requiredClusteringFields(
       PartitionSpec spec) {
-    Map<Pair<Transform<?, ?>, Integer>, PartitionField> requiredClusteringFields =
-        Maps.newLinkedHashMap();
+    Map<Pair<String, Integer>, PartitionField> requiredClusteringFields = Maps.newLinkedHashMap();
     for (PartitionField partField : spec.fields()) {
       if (!partField.transform().toString().equals("void")) {
         requiredClusteringFields.put(
-            Pair.of(partField.transform(), partField.sourceId()), partField);
+            Pair.of(partField.transform().toString(), partField.sourceId()), partField);
       }
     }
 
@@ -125,7 +123,7 @@ public class SortOrderUtil {
         if (!partField.equals(field)
             && partField.sourceId() == field.sourceId()
             && partField.transform().satisfiesOrderOf(field.transform())) {
-          requiredClusteringFields.remove(Pair.of(field.transform(), field.sourceId()));
+          requiredClusteringFields.remove(Pair.of(field.transform().toString(), field.sourceId()));
         }
       }
     }