You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by pv...@apache.org on 2022/02/17 06:28:28 UTC
[hive] branch master updated: HIVE-25961: Altering partition specification parameters for Iceberg tables are not working (Peter Vary reviewed by Laszlo Pinter) (#3035)
This is an automated email from the ASF dual-hosted git repository.
pvary pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 970c41c HIVE-25961: Altering partition specification parameters for Iceberg tables are not working (Peter Vary reviewed by Laszlo Pinter) (#3035)
970c41c is described below
commit 970c41c302719cdca9bb9d004c6beca19451188b
Author: pvary <pv...@cloudera.com>
AuthorDate: Thu Feb 17 07:28:08 2022 +0100
HIVE-25961: Altering partition specification parameters for Iceberg tables are not working (Peter Vary reviewed by Laszlo Pinter) (#3035)
---
.../apache/iceberg/mr/hive/IcebergTableUtil.java | 73 ++++++--------
.../hive/TestHiveIcebergStorageHandlerNoScan.java | 111 +++++++++++++++++++++
.../hadoop/hive/ql/parse/PartitionTransform.java | 5 +-
3 files changed, 143 insertions(+), 46 deletions(-)
diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/IcebergTableUtil.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/IcebergTableUtil.java
index 9a1f316..63ddfc3 100644
--- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/IcebergTableUtil.java
+++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/IcebergTableUtil.java
@@ -21,14 +21,11 @@ package org.apache.iceberg.mr.hive;
import java.util.List;
import java.util.Properties;
-import java.util.stream.Collectors;
-import java.util.stream.IntStream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
import org.apache.hadoop.hive.ql.QueryState;
import org.apache.hadoop.hive.ql.parse.PartitionTransformSpec;
import org.apache.hadoop.hive.ql.session.SessionStateUtil;
-import org.apache.iceberg.PartitionField;
import org.apache.iceberg.PartitionSpec;
import org.apache.iceberg.Schema;
import org.apache.iceberg.Table;
@@ -146,51 +143,39 @@ public class IcebergTableUtil {
return;
}
- List<String> newPartitionNames =
- newPartitionSpec.fields().stream().map(PartitionField::name).collect(Collectors.toList());
- List<String> currentPartitionNames = table.spec().fields().stream().map(PartitionField::name)
- .collect(Collectors.toList());
- List<String> intersectingPartitionNames =
- currentPartitionNames.stream().filter(newPartitionNames::contains).collect(Collectors.toList());
+ // delete every field from the old partition spec
+ UpdatePartitionSpec updatePartitionSpec = table.updateSpec().caseSensitive(false);
+ table.spec().fields().forEach(field -> updatePartitionSpec.removeField(field.name()));
- // delete those partitions which are not present among the new partion spec
- UpdatePartitionSpec updatePartitionSpec = table.updateSpec();
- currentPartitionNames.stream().filter(p -> !intersectingPartitionNames.contains(p))
- .forEach(updatePartitionSpec::removeField);
- updatePartitionSpec.apply();
-
- // add new partitions which are not yet present
List<PartitionTransformSpec> partitionTransformSpecList = SessionStateUtil
.getResource(configuration, hive_metastoreConstants.PARTITION_TRANSFORM_SPEC)
.map(o -> (List<PartitionTransformSpec>) o).orElseGet(() -> null);
- IntStream.range(0, partitionTransformSpecList.size())
- .filter(i -> !intersectingPartitionNames.contains(newPartitionSpec.fields().get(i).name()))
- .forEach(i -> {
- PartitionTransformSpec spec = partitionTransformSpecList.get(i);
- switch (spec.getTransformType()) {
- case IDENTITY:
- updatePartitionSpec.addField(spec.getColumnName());
- break;
- case YEAR:
- updatePartitionSpec.addField(Expressions.year(spec.getColumnName()));
- break;
- case MONTH:
- updatePartitionSpec.addField(Expressions.month(spec.getColumnName()));
- break;
- case DAY:
- updatePartitionSpec.addField(Expressions.day(spec.getColumnName()));
- break;
- case HOUR:
- updatePartitionSpec.addField(Expressions.hour(spec.getColumnName()));
- break;
- case TRUNCATE:
- updatePartitionSpec.addField(Expressions.truncate(spec.getColumnName(), spec.getTransformParam().get()));
- break;
- case BUCKET:
- updatePartitionSpec.addField(Expressions.bucket(spec.getColumnName(), spec.getTransformParam().get()));
- break;
- }
- });
+
+ partitionTransformSpecList.forEach(spec -> {
+ switch (spec.getTransformType()) {
+ case IDENTITY:
+ updatePartitionSpec.addField(spec.getColumnName());
+ break;
+ case YEAR:
+ updatePartitionSpec.addField(Expressions.year(spec.getColumnName()));
+ break;
+ case MONTH:
+ updatePartitionSpec.addField(Expressions.month(spec.getColumnName()));
+ break;
+ case DAY:
+ updatePartitionSpec.addField(Expressions.day(spec.getColumnName()));
+ break;
+ case HOUR:
+ updatePartitionSpec.addField(Expressions.hour(spec.getColumnName()));
+ break;
+ case TRUNCATE:
+ updatePartitionSpec.addField(Expressions.truncate(spec.getColumnName(), spec.getTransformParam().get()));
+ break;
+ case BUCKET:
+ updatePartitionSpec.addField(Expressions.bucket(spec.getColumnName(), spec.getTransformParam().get()));
+ break;
+ }
+ });
updatePartitionSpec.commit();
}
diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java
index fc90722..136ad4f 100644
--- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java
+++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java
@@ -286,6 +286,117 @@ public class TestHiveIcebergStorageHandlerNoScan {
}
@Test
+ public void testSetPartitionTransformSameField() {
+ Schema schema = new Schema(
+ optional(1, "id", Types.LongType.get()),
+ optional(2, "truncate_field", Types.StringType.get()),
+ optional(3, "bucket_field", Types.StringType.get())
+ );
+
+ TableIdentifier identifier = TableIdentifier.of("default", "part_test");
+ shell.executeStatement("CREATE EXTERNAL TABLE " + identifier +
+ " PARTITIONED BY SPEC (truncate(2, truncate_field), bucket(2, bucket_field))" +
+ " STORED BY ICEBERG " +
+ testTables.locationForCreateTableSQL(identifier) +
+ "TBLPROPERTIES ('" + InputFormatConfig.TABLE_SCHEMA + "'='" +
+ SchemaParser.toJson(schema) + "', " +
+ "'" + InputFormatConfig.CATALOG_NAME + "'='" + testTables.catalogName() + "')");
+
+ PartitionSpec spec = PartitionSpec.builderFor(schema)
+ .truncate("truncate_field", 2)
+ .bucket("bucket_field", 2)
+ .build();
+
+ Table table = testTables.loadTable(identifier);
+ Assert.assertEquals(spec, table.spec());
+
+ // Change one, keep one
+ shell.executeStatement("ALTER TABLE default.part_test " +
+ "SET PARTITION SPEC (truncate(3, truncate_field), bucket(2, bucket_field) )");
+
+ spec = PartitionSpec.builderFor(schema)
+ .withSpecId(1)
+ .alwaysNull("truncate_field", "truncate_field_trunc")
+ .bucket("bucket_field", 2)
+ .truncate("truncate_field", 3, "truncate_field_trunc_3")
+ .build();
+
+ table.refresh();
+ Assert.assertEquals(spec, table.spec());
+
+ // Change one again, keep the other one
+ shell.executeStatement("ALTER TABLE default.part_test " +
+ "SET PARTITION SPEC (truncate(4, truncate_field), bucket(2, bucket_field) )");
+
+ spec = PartitionSpec.builderFor(schema)
+ .withSpecId(2)
+ .alwaysNull("truncate_field", "truncate_field_trunc")
+ .bucket("bucket_field", 2)
+ .alwaysNull("truncate_field", "truncate_field_trunc_3")
+ .truncate("truncate_field", 4, "truncate_field_trunc_4")
+ .build();
+
+ table.refresh();
+ Assert.assertEquals(spec, table.spec());
+
+ // Keep the already changed, change the other one (change the order of clauses in the spec)
+ shell.executeStatement("ALTER TABLE default.part_test " +
+ "SET PARTITION SPEC (bucket(3, bucket_field), truncate(4, truncate_field))");
+
+ spec = PartitionSpec.builderFor(schema)
+ .withSpecId(3)
+ .alwaysNull("truncate_field", "truncate_field_trunc")
+ .alwaysNull("bucket_field", "bucket_field_bucket")
+ .alwaysNull("truncate_field", "truncate_field_trunc_3")
+ .truncate("truncate_field", 4, "truncate_field_trunc_4")
+ .bucket("bucket_field", 3, "bucket_field_bucket_3")
+ .build();
+
+ table.refresh();
+ Assert.assertEquals(spec, table.spec());
+ }
+
+ @Test
+ public void testSetPartitionTransformCaseSensitive() {
+ Schema schema = new Schema(
+ optional(1, "id", Types.LongType.get()),
+ optional(2, "truncate_field", Types.StringType.get()),
+ optional(3, "bucket_field", Types.StringType.get())
+ );
+
+ TableIdentifier identifier = TableIdentifier.of("default", "part_test");
+ shell.executeStatement("CREATE EXTERNAL TABLE " + identifier +
+ " PARTITIONED BY SPEC (truncate(2, truncate_field), bucket(2, bucket_field))" +
+ " STORED BY ICEBERG " +
+ testTables.locationForCreateTableSQL(identifier) +
+ "TBLPROPERTIES ('" + InputFormatConfig.TABLE_SCHEMA + "'='" +
+ SchemaParser.toJson(schema) + "', " +
+ "'" + InputFormatConfig.CATALOG_NAME + "'='" + testTables.catalogName() + "')");
+
+ PartitionSpec spec = PartitionSpec.builderFor(schema)
+ .truncate("truncate_field", 2)
+ .bucket("bucket_field", 2)
+ .build();
+
+ Table table = testTables.loadTable(identifier);
+ Assert.assertEquals(spec, table.spec());
+
+ shell.executeStatement("ALTER TABLE default.part_test " +
+ "SET PARTITION SPEC (truncaTe(3, truncate_Field), buCket(3, bUckeT_field))");
+
+ spec = PartitionSpec.builderFor(schema)
+ .withSpecId(1)
+ .alwaysNull("truncate_field", "truncate_field_trunc")
+ .alwaysNull("bucket_field", "bucket_field_bucket")
+ .truncate("truncate_field", 3, "truncate_field_trunc_3")
+ .bucket("bucket_field", 3, "bucket_field_bucket_3")
+ .build();
+
+ table.refresh();
+ Assert.assertEquals(spec, table.spec());
+ }
+
+ @Test
public void testCreateDropTable() throws TException, IOException, InterruptedException {
TableIdentifier identifier = TableIdentifier.of("default", "customers");
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/PartitionTransform.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/PartitionTransform.java
index 8013ca0..50a6371 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/PartitionTransform.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/PartitionTransform.java
@@ -22,6 +22,7 @@ import org.apache.hadoop.hive.ql.parse.PartitionTransformSpec.TransformType;
import java.util.ArrayList;
import java.util.List;
+import java.util.Locale;
import java.util.Map;
import java.util.Optional;
import java.util.stream.Collectors;
@@ -68,14 +69,14 @@ public class PartitionTransform {
case HiveParser.TOK_MONTH:
case HiveParser.TOK_DAY:
case HiveParser.TOK_HOUR:
- spec.setColumnName(grandChild.getChild(0).getText());
+ spec.setColumnName(grandChild.getChild(0).getText().toLowerCase());
spec.setTransformType(TRANSFORMS.get(grandChild.getToken().getType()));
break;
case HiveParser.TOK_TRUNCATE:
case HiveParser.TOK_BUCKET:
spec.setTransformType(TRANSFORMS.get(grandChild.getToken().getType()));
spec.setTransformParam(Optional.ofNullable(Integer.valueOf(grandChild.getChild(0).getText())));
- spec.setColumnName(grandChild.getChild(1).getText());
+ spec.setColumnName(grandChild.getChild(1).getText().toLowerCase());
break;
}
}