You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by pv...@apache.org on 2022/02/17 06:28:28 UTC
[hive] branch master updated: HIVE-25961: Altering partition specification parameters for Iceberg tables are not working (Peter Vary reviewed by Laszlo Pinter) (#3035)

This is an automated email from the ASF dual-hosted git repository.

pvary pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 970c41c  HIVE-25961: Altering partition specification parameters for Iceberg tables are not working (Peter Vary reviewed by Laszlo Pinter) (#3035)
970c41c is described below

commit 970c41c302719cdca9bb9d004c6beca19451188b
Author: pvary <pv...@cloudera.com>
AuthorDate: Thu Feb 17 07:28:08 2022 +0100

    HIVE-25961: Altering partition specification parameters for Iceberg tables are not working (Peter Vary reviewed by Laszlo Pinter) (#3035)
---
 .../apache/iceberg/mr/hive/IcebergTableUtil.java   |  73 ++++++--------
 .../hive/TestHiveIcebergStorageHandlerNoScan.java  | 111 +++++++++++++++++++++
 .../hadoop/hive/ql/parse/PartitionTransform.java   |   5 +-
 3 files changed, 143 insertions(+), 46 deletions(-)

diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/IcebergTableUtil.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/IcebergTableUtil.java
index 9a1f316..63ddfc3 100644
--- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/IcebergTableUtil.java
+++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/IcebergTableUtil.java
@@ -21,14 +21,11 @@ package org.apache.iceberg.mr.hive;
 
 import java.util.List;
 import java.util.Properties;
-import java.util.stream.Collectors;
-import java.util.stream.IntStream;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
 import org.apache.hadoop.hive.ql.QueryState;
 import org.apache.hadoop.hive.ql.parse.PartitionTransformSpec;
 import org.apache.hadoop.hive.ql.session.SessionStateUtil;
-import org.apache.iceberg.PartitionField;
 import org.apache.iceberg.PartitionSpec;
 import org.apache.iceberg.Schema;
 import org.apache.iceberg.Table;
@@ -146,51 +143,39 @@ public class IcebergTableUtil {
       return;
     }
 
-    List<String> newPartitionNames =
-        newPartitionSpec.fields().stream().map(PartitionField::name).collect(Collectors.toList());
-    List<String> currentPartitionNames = table.spec().fields().stream().map(PartitionField::name)
-        .collect(Collectors.toList());
-    List<String> intersectingPartitionNames =
-        currentPartitionNames.stream().filter(newPartitionNames::contains).collect(Collectors.toList());
+    // delete every field from the old partition spec
+    UpdatePartitionSpec updatePartitionSpec = table.updateSpec().caseSensitive(false);
+    table.spec().fields().forEach(field -> updatePartitionSpec.removeField(field.name()));
 
-    // delete those partitions which are not present among the new partion spec
-    UpdatePartitionSpec updatePartitionSpec = table.updateSpec();
-    currentPartitionNames.stream().filter(p -> !intersectingPartitionNames.contains(p))
-        .forEach(updatePartitionSpec::removeField);
-    updatePartitionSpec.apply();
-
-    // add new partitions which are not yet present
     List<PartitionTransformSpec> partitionTransformSpecList = SessionStateUtil
         .getResource(configuration, hive_metastoreConstants.PARTITION_TRANSFORM_SPEC)
         .map(o -> (List<PartitionTransformSpec>) o).orElseGet(() -> null);
-    IntStream.range(0, partitionTransformSpecList.size())
-        .filter(i -> !intersectingPartitionNames.contains(newPartitionSpec.fields().get(i).name()))
-        .forEach(i -> {
-          PartitionTransformSpec spec = partitionTransformSpecList.get(i);
-          switch (spec.getTransformType()) {
-            case IDENTITY:
-              updatePartitionSpec.addField(spec.getColumnName());
-              break;
-            case YEAR:
-              updatePartitionSpec.addField(Expressions.year(spec.getColumnName()));
-              break;
-            case MONTH:
-              updatePartitionSpec.addField(Expressions.month(spec.getColumnName()));
-              break;
-            case DAY:
-              updatePartitionSpec.addField(Expressions.day(spec.getColumnName()));
-              break;
-            case HOUR:
-              updatePartitionSpec.addField(Expressions.hour(spec.getColumnName()));
-              break;
-            case TRUNCATE:
-              updatePartitionSpec.addField(Expressions.truncate(spec.getColumnName(), spec.getTransformParam().get()));
-              break;
-            case BUCKET:
-              updatePartitionSpec.addField(Expressions.bucket(spec.getColumnName(), spec.getTransformParam().get()));
-              break;
-          }
-        });
+
+    partitionTransformSpecList.forEach(spec -> {
+      switch (spec.getTransformType()) {
+        case IDENTITY:
+          updatePartitionSpec.addField(spec.getColumnName());
+          break;
+        case YEAR:
+          updatePartitionSpec.addField(Expressions.year(spec.getColumnName()));
+          break;
+        case MONTH:
+          updatePartitionSpec.addField(Expressions.month(spec.getColumnName()));
+          break;
+        case DAY:
+          updatePartitionSpec.addField(Expressions.day(spec.getColumnName()));
+          break;
+        case HOUR:
+          updatePartitionSpec.addField(Expressions.hour(spec.getColumnName()));
+          break;
+        case TRUNCATE:
+          updatePartitionSpec.addField(Expressions.truncate(spec.getColumnName(), spec.getTransformParam().get()));
+          break;
+        case BUCKET:
+          updatePartitionSpec.addField(Expressions.bucket(spec.getColumnName(), spec.getTransformParam().get()));
+          break;
+      }
+    });
 
     updatePartitionSpec.commit();
   }
diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java
index fc90722..136ad4f 100644
--- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java
+++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java
@@ -286,6 +286,117 @@ public class TestHiveIcebergStorageHandlerNoScan {
   }
 
   @Test
+  public void testSetPartitionTransformSameField() {
+    Schema schema = new Schema(
+        optional(1, "id", Types.LongType.get()),
+        optional(2, "truncate_field", Types.StringType.get()),
+        optional(3, "bucket_field", Types.StringType.get())
+    );
+
+    TableIdentifier identifier = TableIdentifier.of("default", "part_test");
+    shell.executeStatement("CREATE EXTERNAL TABLE " + identifier +
+        " PARTITIONED BY SPEC (truncate(2, truncate_field), bucket(2, bucket_field))" +
+        " STORED BY ICEBERG " +
+        testTables.locationForCreateTableSQL(identifier) +
+        "TBLPROPERTIES ('" + InputFormatConfig.TABLE_SCHEMA + "'='" +
+        SchemaParser.toJson(schema) + "', " +
+        "'" + InputFormatConfig.CATALOG_NAME + "'='" + testTables.catalogName() + "')");
+
+    PartitionSpec spec = PartitionSpec.builderFor(schema)
+        .truncate("truncate_field", 2)
+        .bucket("bucket_field", 2)
+        .build();
+
+    Table table = testTables.loadTable(identifier);
+    Assert.assertEquals(spec, table.spec());
+
+    // Change one, keep one
+    shell.executeStatement("ALTER TABLE default.part_test " +
+        "SET PARTITION SPEC (truncate(3, truncate_field), bucket(2, bucket_field) )");
+
+    spec = PartitionSpec.builderFor(schema)
+        .withSpecId(1)
+        .alwaysNull("truncate_field", "truncate_field_trunc")
+        .bucket("bucket_field", 2)
+        .truncate("truncate_field", 3, "truncate_field_trunc_3")
+        .build();
+
+    table.refresh();
+    Assert.assertEquals(spec, table.spec());
+
+    // Change one again, keep the other one
+    shell.executeStatement("ALTER TABLE default.part_test " +
+        "SET PARTITION SPEC (truncate(4, truncate_field), bucket(2, bucket_field) )");
+
+    spec = PartitionSpec.builderFor(schema)
+        .withSpecId(2)
+        .alwaysNull("truncate_field", "truncate_field_trunc")
+        .bucket("bucket_field", 2)
+        .alwaysNull("truncate_field", "truncate_field_trunc_3")
+        .truncate("truncate_field", 4, "truncate_field_trunc_4")
+        .build();
+
+    table.refresh();
+    Assert.assertEquals(spec, table.spec());
+
+    // Keep the already changed, change the other one (change the order of clauses in the spec)
+    shell.executeStatement("ALTER TABLE default.part_test " +
+        "SET PARTITION SPEC (bucket(3, bucket_field), truncate(4, truncate_field))");
+
+    spec = PartitionSpec.builderFor(schema)
+        .withSpecId(3)
+        .alwaysNull("truncate_field", "truncate_field_trunc")
+        .alwaysNull("bucket_field", "bucket_field_bucket")
+        .alwaysNull("truncate_field", "truncate_field_trunc_3")
+        .truncate("truncate_field", 4, "truncate_field_trunc_4")
+        .bucket("bucket_field", 3, "bucket_field_bucket_3")
+        .build();
+
+    table.refresh();
+    Assert.assertEquals(spec, table.spec());
+  }
+
+  @Test
+  public void testSetPartitionTransformCaseSensitive() {
+    Schema schema = new Schema(
+        optional(1, "id", Types.LongType.get()),
+        optional(2, "truncate_field", Types.StringType.get()),
+        optional(3, "bucket_field", Types.StringType.get())
+    );
+
+    TableIdentifier identifier = TableIdentifier.of("default", "part_test");
+    shell.executeStatement("CREATE EXTERNAL TABLE " + identifier +
+        " PARTITIONED BY SPEC (truncate(2, truncate_field), bucket(2, bucket_field))" +
+        " STORED BY ICEBERG " +
+        testTables.locationForCreateTableSQL(identifier) +
+        "TBLPROPERTIES ('" + InputFormatConfig.TABLE_SCHEMA + "'='" +
+        SchemaParser.toJson(schema) + "', " +
+        "'" + InputFormatConfig.CATALOG_NAME + "'='" + testTables.catalogName() + "')");
+
+    PartitionSpec spec = PartitionSpec.builderFor(schema)
+        .truncate("truncate_field", 2)
+        .bucket("bucket_field", 2)
+        .build();
+
+    Table table = testTables.loadTable(identifier);
+    Assert.assertEquals(spec, table.spec());
+
+    shell.executeStatement("ALTER TABLE default.part_test " +
+        "SET PARTITION SPEC (truncaTe(3, truncate_Field), buCket(3, bUckeT_field))");
+
+    spec = PartitionSpec.builderFor(schema)
+        .withSpecId(1)
+        .alwaysNull("truncate_field", "truncate_field_trunc")
+        .alwaysNull("bucket_field", "bucket_field_bucket")
+        .truncate("truncate_field", 3, "truncate_field_trunc_3")
+        .bucket("bucket_field", 3, "bucket_field_bucket_3")
+        .build();
+
+    table.refresh();
+    Assert.assertEquals(spec, table.spec());
+  }
+
+  @Test
   public void testCreateDropTable() throws TException, IOException, InterruptedException {
     TableIdentifier identifier = TableIdentifier.of("default", "customers");
 
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/PartitionTransform.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/PartitionTransform.java
index 8013ca0..50a6371 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/PartitionTransform.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/PartitionTransform.java
@@ -22,6 +22,7 @@ import org.apache.hadoop.hive.ql.parse.PartitionTransformSpec.TransformType;
 
 import java.util.ArrayList;
 import java.util.List;
+import java.util.Locale;
 import java.util.Map;
 import java.util.Optional;
 import java.util.stream.Collectors;
@@ -68,14 +69,14 @@ public class PartitionTransform {
           case HiveParser.TOK_MONTH:
           case HiveParser.TOK_DAY:
           case HiveParser.TOK_HOUR:
-            spec.setColumnName(grandChild.getChild(0).getText());
+            spec.setColumnName(grandChild.getChild(0).getText().toLowerCase());
             spec.setTransformType(TRANSFORMS.get(grandChild.getToken().getType()));
             break;
           case HiveParser.TOK_TRUNCATE:
           case HiveParser.TOK_BUCKET:
             spec.setTransformType(TRANSFORMS.get(grandChild.getToken().getType()));
             spec.setTransformParam(Optional.ofNullable(Integer.valueOf(grandChild.getChild(0).getText())));
-            spec.setColumnName(grandChild.getChild(1).getText());
+            spec.setColumnName(grandChild.getChild(1).getText().toLowerCase());
             break;
         }
       }