You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by lp...@apache.org on 2022/08/30 12:51:40 UTC

[hive] branch master updated: HIVE-26476: map ORCFILE to ORC while creating an iceberg table (#3525) (Laszlo Pinter, reviewed by Adam Szita)

This is an automated email from the ASF dual-hosted git repository.

lpinter pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 64b731820cf HIVE-26476: map ORCFILE to ORC while creating an iceberg table (#3525) (Laszlo Pinter, reviewed by Adam Szita)
64b731820cf is described below

commit 64b731820cf462d3c03632fb4e99277ee539dd08
Author: László Pintér <47...@users.noreply.github.com>
AuthorDate: Tue Aug 30 14:51:31 2022 +0200

    HIVE-26476: map ORCFILE to ORC while creating an iceberg table (#3525) (Laszlo Pinter, reviewed by Adam Szita)
---
 .../iceberg/mr/hive/HiveIcebergMetaHook.java       | 44 +++++++++++++++++-----
 .../iceberg/mr/hive/TestHiveIcebergInserts.java    | 26 +++++++++++++
 ...create_iceberg_table_stored_as_fileformat.q.out | 10 ++---
 3 files changed, 66 insertions(+), 14 deletions(-)

diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java
index f3572673742..f7591f79648 100644
--- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java
+++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java
@@ -20,6 +20,7 @@
 package org.apache.iceberg.mr.hive;
 
 import java.io.IOException;
+import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.EnumSet;
@@ -132,6 +133,16 @@ public class HiveIcebergMetaHook implements HiveMetaHook {
   private AlterTableType currentAlterTableOp;
   private boolean createHMSTableInHook = false;
 
+  private enum FileFormat {
+    ORC("orc"), PARQUET("parquet"), AVRO("avro");
+
+    private final String label;
+
+    FileFormat(String label) {
+      this.label = label;
+    }
+  }
+
   public HiveIcebergMetaHook(Configuration conf) {
     this.conf = conf;
   }
@@ -188,6 +199,8 @@ public class HiveIcebergMetaHook implements HiveMetaHook {
     if (hmsTable.getParameters().containsKey(BaseMetastoreTableOperations.METADATA_LOCATION_PROP)) {
       createHMSTableInHook = true;
     }
+
+    assertFileFormat(catalogProperties.getProperty(TableProperties.DEFAULT_FILE_FORMAT));
   }
 
   @Override
@@ -202,6 +215,8 @@ public class HiveIcebergMetaHook implements HiveMetaHook {
         catalogProperties.put(TableProperties.ENGINE_HIVE_ENABLED, true);
       }
 
+      setFileFormat(catalogProperties.getProperty(TableProperties.DEFAULT_FILE_FORMAT));
+
       String metadataLocation = hmsTable.getParameters().get(BaseMetastoreTableOperations.METADATA_LOCATION_PROP);
       if (metadataLocation != null) {
         Catalogs.registerTable(conf, catalogProperties, metadataLocation);
@@ -403,7 +418,7 @@ public class HiveIcebergMetaHook implements HiveMetaHook {
       catalogProperties = getCatalogProperties(hmsTable);
       catalogProperties.put(InputFormatConfig.TABLE_SCHEMA, SchemaParser.toJson(preAlterTableProperties.schema));
       catalogProperties.put(InputFormatConfig.PARTITION_SPEC, PartitionSpecParser.toJson(preAlterTableProperties.spec));
-      setFileFormat();
+      setFileFormat(preAlterTableProperties.format);
       if (Catalogs.hiveCatalog(conf, catalogProperties)) {
         catalogProperties.put(TableProperties.ENGINE_HIVE_ENABLED, true);
       }
@@ -507,15 +522,26 @@ public class HiveIcebergMetaHook implements HiveMetaHook {
     }
   }
 
-  private void setFileFormat() {
-    String format = preAlterTableProperties.format.toLowerCase();
-    if (format.contains("orc")) {
-      catalogProperties.put(TableProperties.DEFAULT_FILE_FORMAT, "orc");
-    } else if (format.contains("parquet")) {
-      catalogProperties.put(TableProperties.DEFAULT_FILE_FORMAT, "parquet");
-    } else if (format.contains("avro")) {
-      catalogProperties.put(TableProperties.DEFAULT_FILE_FORMAT, "avro");
+  private void setFileFormat(String format) {
+    if (format == null) {
+      return;
+    }
+
+    String lowerCaseFormat = format.toLowerCase();
+    for (FileFormat fileFormat : FileFormat.values()) {
+      if (lowerCaseFormat.contains(fileFormat.label)) {
+        catalogProperties.put(TableProperties.DEFAULT_FILE_FORMAT, fileFormat.label);
+      }
+    }
+  }
+
+  private void assertFileFormat(String format) {
+    if (format == null) {
+      return;
     }
+    String lowerCaseFormat = format.toLowerCase();
+    Preconditions.checkArgument(Arrays.stream(FileFormat.values()).anyMatch(v -> lowerCaseFormat.contains(v.label)),
+        String.format("Unsupported fileformat %s", format));
   }
 
   private void setCommonHmsTablePropertiesForIceberg(org.apache.hadoop.hive.metastore.api.Table hmsTable) {
diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergInserts.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergInserts.java
index 0c15ba4e430..675677be8a5 100644
--- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergInserts.java
+++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergInserts.java
@@ -152,6 +152,32 @@ public class TestHiveIcebergInserts extends HiveIcebergStorageHandlerWithEngineB
     HiveIcebergTestUtils.validateData(table, HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS, 0);
   }
 
+  @Test
+  public void testInsertIntoORCFile() throws IOException {
+    Assume.assumeTrue("Testing the create table ... stored as ORCFILE syntax is enough for a single scenario.",
+        testTableType == TestTables.TestTableType.HIVE_CATALOG && fileFormat == FileFormat.ORC);
+    shell.executeStatement("CREATE TABLE t2(c0 DOUBLE , c1 DOUBLE , c2 DECIMAL) STORED BY " +
+        "ICEBERG STORED AS ORCFILE");
+    shell.executeStatement("INSERT INTO t2(c1, c0) VALUES(0.1803113419993464, 0.9381388537256228)");
+    List<Object[]> results = shell.executeStatement("SELECT * FROM t2");
+    Assert.assertEquals(1, results.size());
+    Assert.assertEquals(0.9381388537256228, results.get(0)[0]);
+    Assert.assertEquals(0.1803113419993464, results.get(0)[1]);
+    Assert.assertEquals(null, results.get(0)[2]);
+  }
+
+
+  @Test
+  public void testStoredByIcebergInTextFile() {
+    Assume.assumeTrue("Testing the create table ... stored as TEXTFILE syntax is enough for a single scenario.",
+        testTableType == TestTables.TestTableType.HIVE_CATALOG && fileFormat == FileFormat.ORC);
+    AssertHelpers.assertThrows("Create table should not work with textfile", IllegalArgumentException.class,
+        "Unsupported fileformat",
+        () ->
+            shell.executeStatement("CREATE TABLE IF NOT EXISTS t2(c0 DOUBLE , c1 DOUBLE , c2 DECIMAL) STORED BY " +
+                "ICEBERG STORED AS TEXTFILE"));
+  }
+
   @Test
   public void testInsertSupportedTypes() throws IOException {
     for (int i = 0; i < SUPPORTED_TYPES.size(); i++) {
diff --git a/iceberg/iceberg-handler/src/test/results/positive/create_iceberg_table_stored_as_fileformat.q.out b/iceberg/iceberg-handler/src/test/results/positive/create_iceberg_table_stored_as_fileformat.q.out
index fe76a31b224..2bc658d6885 100644
--- a/iceberg/iceberg-handler/src/test/results/positive/create_iceberg_table_stored_as_fileformat.q.out
+++ b/iceberg/iceberg-handler/src/test/results/positive/create_iceberg_table_stored_as_fileformat.q.out
@@ -43,7 +43,7 @@ Table Parameters:
 	totalSize           	#Masked#                   
 #### A masked pattern was here ####
 	uuid                	#Masked#
-	write.format.default	ORC                 
+	write.format.default	orc                 
 	 	 
 # Storage Information	 	 
 SerDe Library:      	org.apache.iceberg.mr.hive.HiveIcebergSerDe	 
@@ -104,7 +104,7 @@ Table Parameters:
 	totalSize           	#Masked#                   
 #### A masked pattern was here ####
 	uuid                	#Masked#
-	write.format.default	PARQUET             
+	write.format.default	parquet             
 	 	 
 # Storage Information	 	 
 SerDe Library:      	org.apache.iceberg.mr.hive.HiveIcebergSerDe	 
@@ -165,7 +165,7 @@ Table Parameters:
 	totalSize           	#Masked#                   
 #### A masked pattern was here ####
 	uuid                	#Masked#
-	write.format.default	AVRO                
+	write.format.default	avro                
 	 	 
 # Storage Information	 	 
 SerDe Library:      	org.apache.iceberg.mr.hive.HiveIcebergSerDe	 
@@ -226,7 +226,7 @@ Table Parameters:
 	totalSize           	#Masked#                   
 #### A masked pattern was here ####
 	uuid                	#Masked#
-	write.format.default	AVRO                
+	write.format.default	avro                
 	 	 
 # Storage Information	 	 
 SerDe Library:      	org.apache.iceberg.mr.hive.HiveIcebergSerDe	 
@@ -284,7 +284,7 @@ Table Parameters:
 	totalSize           	#Masked#                   
 #### A masked pattern was here ####
 	uuid                	#Masked#
-	write.format.default	ORC                 
+	write.format.default	orc                 
 	 	 
 # Storage Information	 	 
 SerDe Library:      	org.apache.iceberg.mr.hive.HiveIcebergSerDe