You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by lp...@apache.org on 2022/08/30 12:51:40 UTC
[hive] branch master updated: HIVE-26476: map ORCFILE to ORC while creating an iceberg table (#3525) (Laszlo Pinter, reviewed by Adam Szita)
This is an automated email from the ASF dual-hosted git repository.
lpinter pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 64b731820cf HIVE-26476: map ORCFILE to ORC while creating an iceberg table (#3525) (Laszlo Pinter, reviewed by Adam Szita)
64b731820cf is described below
commit 64b731820cf462d3c03632fb4e99277ee539dd08
Author: László Pintér <47...@users.noreply.github.com>
AuthorDate: Tue Aug 30 14:51:31 2022 +0200
HIVE-26476: map ORCFILE to ORC while creating an iceberg table (#3525) (Laszlo Pinter, reviewed by Adam Szita)
---
.../iceberg/mr/hive/HiveIcebergMetaHook.java | 44 +++++++++++++++++-----
.../iceberg/mr/hive/TestHiveIcebergInserts.java | 26 +++++++++++++
...create_iceberg_table_stored_as_fileformat.q.out | 10 ++---
3 files changed, 66 insertions(+), 14 deletions(-)
diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java
index f3572673742..f7591f79648 100644
--- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java
+++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java
@@ -20,6 +20,7 @@
package org.apache.iceberg.mr.hive;
import java.io.IOException;
+import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.EnumSet;
@@ -132,6 +133,16 @@ public class HiveIcebergMetaHook implements HiveMetaHook {
private AlterTableType currentAlterTableOp;
private boolean createHMSTableInHook = false;
+ private enum FileFormat {
+ ORC("orc"), PARQUET("parquet"), AVRO("avro");
+
+ private final String label;
+
+ FileFormat(String label) {
+ this.label = label;
+ }
+ }
+
public HiveIcebergMetaHook(Configuration conf) {
this.conf = conf;
}
@@ -188,6 +199,8 @@ public class HiveIcebergMetaHook implements HiveMetaHook {
if (hmsTable.getParameters().containsKey(BaseMetastoreTableOperations.METADATA_LOCATION_PROP)) {
createHMSTableInHook = true;
}
+
+ assertFileFormat(catalogProperties.getProperty(TableProperties.DEFAULT_FILE_FORMAT));
}
@Override
@@ -202,6 +215,8 @@ public class HiveIcebergMetaHook implements HiveMetaHook {
catalogProperties.put(TableProperties.ENGINE_HIVE_ENABLED, true);
}
+ setFileFormat(catalogProperties.getProperty(TableProperties.DEFAULT_FILE_FORMAT));
+
String metadataLocation = hmsTable.getParameters().get(BaseMetastoreTableOperations.METADATA_LOCATION_PROP);
if (metadataLocation != null) {
Catalogs.registerTable(conf, catalogProperties, metadataLocation);
@@ -403,7 +418,7 @@ public class HiveIcebergMetaHook implements HiveMetaHook {
catalogProperties = getCatalogProperties(hmsTable);
catalogProperties.put(InputFormatConfig.TABLE_SCHEMA, SchemaParser.toJson(preAlterTableProperties.schema));
catalogProperties.put(InputFormatConfig.PARTITION_SPEC, PartitionSpecParser.toJson(preAlterTableProperties.spec));
- setFileFormat();
+ setFileFormat(preAlterTableProperties.format);
if (Catalogs.hiveCatalog(conf, catalogProperties)) {
catalogProperties.put(TableProperties.ENGINE_HIVE_ENABLED, true);
}
@@ -507,15 +522,26 @@ public class HiveIcebergMetaHook implements HiveMetaHook {
}
}
- private void setFileFormat() {
- String format = preAlterTableProperties.format.toLowerCase();
- if (format.contains("orc")) {
- catalogProperties.put(TableProperties.DEFAULT_FILE_FORMAT, "orc");
- } else if (format.contains("parquet")) {
- catalogProperties.put(TableProperties.DEFAULT_FILE_FORMAT, "parquet");
- } else if (format.contains("avro")) {
- catalogProperties.put(TableProperties.DEFAULT_FILE_FORMAT, "avro");
+ private void setFileFormat(String format) {
+ if (format == null) {
+ return;
+ }
+
+ String lowerCaseFormat = format.toLowerCase();
+ for (FileFormat fileFormat : FileFormat.values()) {
+ if (lowerCaseFormat.contains(fileFormat.label)) {
+ catalogProperties.put(TableProperties.DEFAULT_FILE_FORMAT, fileFormat.label);
+ }
+ }
+ }
+
+ private void assertFileFormat(String format) {
+ if (format == null) {
+ return;
}
+ String lowerCaseFormat = format.toLowerCase();
+ Preconditions.checkArgument(Arrays.stream(FileFormat.values()).anyMatch(v -> lowerCaseFormat.contains(v.label)),
+ String.format("Unsupported fileformat %s", format));
}
private void setCommonHmsTablePropertiesForIceberg(org.apache.hadoop.hive.metastore.api.Table hmsTable) {
diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergInserts.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergInserts.java
index 0c15ba4e430..675677be8a5 100644
--- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergInserts.java
+++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergInserts.java
@@ -152,6 +152,32 @@ public class TestHiveIcebergInserts extends HiveIcebergStorageHandlerWithEngineB
HiveIcebergTestUtils.validateData(table, HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS, 0);
}
+ @Test
+ public void testInsertIntoORCFile() throws IOException {
+ Assume.assumeTrue("Testing the create table ... stored as ORCFILE syntax is enough for a single scenario.",
+ testTableType == TestTables.TestTableType.HIVE_CATALOG && fileFormat == FileFormat.ORC);
+ shell.executeStatement("CREATE TABLE t2(c0 DOUBLE , c1 DOUBLE , c2 DECIMAL) STORED BY " +
+ "ICEBERG STORED AS ORCFILE");
+ shell.executeStatement("INSERT INTO t2(c1, c0) VALUES(0.1803113419993464, 0.9381388537256228)");
+ List<Object[]> results = shell.executeStatement("SELECT * FROM t2");
+ Assert.assertEquals(1, results.size());
+ Assert.assertEquals(0.9381388537256228, results.get(0)[0]);
+ Assert.assertEquals(0.1803113419993464, results.get(0)[1]);
+ Assert.assertEquals(null, results.get(0)[2]);
+ }
+
+
+ @Test
+ public void testStoredByIcebergInTextFile() {
+ Assume.assumeTrue("Testing the create table ... stored as TEXTFILE syntax is enough for a single scenario.",
+ testTableType == TestTables.TestTableType.HIVE_CATALOG && fileFormat == FileFormat.ORC);
+ AssertHelpers.assertThrows("Create table should not work with textfile", IllegalArgumentException.class,
+ "Unsupported fileformat",
+ () ->
+ shell.executeStatement("CREATE TABLE IF NOT EXISTS t2(c0 DOUBLE , c1 DOUBLE , c2 DECIMAL) STORED BY " +
+ "ICEBERG STORED AS TEXTFILE"));
+ }
+
@Test
public void testInsertSupportedTypes() throws IOException {
for (int i = 0; i < SUPPORTED_TYPES.size(); i++) {
diff --git a/iceberg/iceberg-handler/src/test/results/positive/create_iceberg_table_stored_as_fileformat.q.out b/iceberg/iceberg-handler/src/test/results/positive/create_iceberg_table_stored_as_fileformat.q.out
index fe76a31b224..2bc658d6885 100644
--- a/iceberg/iceberg-handler/src/test/results/positive/create_iceberg_table_stored_as_fileformat.q.out
+++ b/iceberg/iceberg-handler/src/test/results/positive/create_iceberg_table_stored_as_fileformat.q.out
@@ -43,7 +43,7 @@ Table Parameters:
totalSize #Masked#
#### A masked pattern was here ####
uuid #Masked#
- write.format.default ORC
+ write.format.default orc
# Storage Information
SerDe Library: org.apache.iceberg.mr.hive.HiveIcebergSerDe
@@ -104,7 +104,7 @@ Table Parameters:
totalSize #Masked#
#### A masked pattern was here ####
uuid #Masked#
- write.format.default PARQUET
+ write.format.default parquet
# Storage Information
SerDe Library: org.apache.iceberg.mr.hive.HiveIcebergSerDe
@@ -165,7 +165,7 @@ Table Parameters:
totalSize #Masked#
#### A masked pattern was here ####
uuid #Masked#
- write.format.default AVRO
+ write.format.default avro
# Storage Information
SerDe Library: org.apache.iceberg.mr.hive.HiveIcebergSerDe
@@ -226,7 +226,7 @@ Table Parameters:
totalSize #Masked#
#### A masked pattern was here ####
uuid #Masked#
- write.format.default AVRO
+ write.format.default avro
# Storage Information
SerDe Library: org.apache.iceberg.mr.hive.HiveIcebergSerDe
@@ -284,7 +284,7 @@ Table Parameters:
totalSize #Masked#
#### A masked pattern was here ####
uuid #Masked#
- write.format.default ORC
+ write.format.default orc
# Storage Information
SerDe Library: org.apache.iceberg.mr.hive.HiveIcebergSerDe