You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ay...@apache.org on 2023/02/07 04:34:36 UTC

[hive] branch master updated: HIVE-26929: Iceberg: Allow creating iceberg tables without column definition when 'metadata_location' tblproperties is set. (#4011). (Ayush Saxena, reviewed by Ramesh Kumar Thangarajan)

This is an automated email from the ASF dual-hosted git repository.

ayushsaxena pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 395a28322c2 HIVE-26929: Iceberg: Allow creating iceberg tables without column definition when 'metadata_location' tblproperties is set. (#4011). (Ayush Saxena, reviewed by Ramesh Kumar Thangarajan)
395a28322c2 is described below

commit 395a28322c25c744fa554ceb42b1e6f14716cab4
Author: Ayush Saxena <ay...@apache.org>
AuthorDate: Tue Feb 7 10:04:22 2023 +0530

    HIVE-26929: Iceberg: Allow creating iceberg tables without column definition when 'metadata_location' tblproperties is set. (#4011). (Ayush Saxena, reviewed by Ramesh Kumar Thangarajan)
---
 .../apache/iceberg/mr/hive/HiveIcebergSerDe.java   | 28 +++++++++++++---
 .../hive/TestHiveIcebergStorageHandlerNoScan.java  | 38 ++++++++++++++++++++++
 2 files changed, 61 insertions(+), 5 deletions(-)

diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergSerDe.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergSerDe.java
index 681599d39a2..9acb26003a8 100644
--- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergSerDe.java
+++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergSerDe.java
@@ -44,9 +44,13 @@ import org.apache.iceberg.PartitionSpecParser;
 import org.apache.iceberg.Schema;
 import org.apache.iceberg.SchemaParser;
 import org.apache.iceberg.Table;
+import org.apache.iceberg.TableMetadata;
+import org.apache.iceberg.TableMetadataParser;
 import org.apache.iceberg.data.Record;
 import org.apache.iceberg.exceptions.NoSuchTableException;
+import org.apache.iceberg.hadoop.HadoopFileIO;
 import org.apache.iceberg.hive.HiveSchemaUtil;
+import org.apache.iceberg.io.FileIO;
 import org.apache.iceberg.mr.Catalogs;
 import org.apache.iceberg.mr.InputFormatConfig;
 import org.apache.iceberg.mr.hive.serde.objectinspector.IcebergObjectInspector;
@@ -108,12 +112,26 @@ public class HiveIcebergSerDe extends AbstractSerDe {
         // During table creation we might not have the schema information from the Iceberg table, nor from the HMS
         // table. In this case we have to generate the schema using the serdeProperties which contains the info
         // provided in the CREATE TABLE query.
-        boolean autoConversion = configuration.getBoolean(InputFormatConfig.SCHEMA_AUTO_CONVERSION, false);
-        // If we can not load the table try the provided hive schema
-        this.tableSchema = hiveSchemaOrThrow(e, autoConversion);
-        // This is only for table creation, it is ok to have an empty partition column list
-        this.partitionColumns = ImmutableList.of();
 
+        if (serDeProperties.get("metadata_location") != null) {
+          // If metadata location is provided, extract the schema details from it.
+          try (FileIO fileIO = new HadoopFileIO(configuration)) {
+            TableMetadata metadata = TableMetadataParser.read(fileIO, serDeProperties.getProperty("metadata_location"));
+            this.tableSchema = metadata.schema();
+            this.partitionColumns =
+                metadata.spec().fields().stream().map(PartitionField::name).collect(Collectors.toList());
+            // Validate no schema is provided via create command
+            if (!getColumnNames().isEmpty() || !getPartitionColumnNames().isEmpty()) {
+              throw new SerDeException("Column names can not be provided along with metadata location.");
+            }
+          }
+        } else {
+          boolean autoConversion = configuration.getBoolean(InputFormatConfig.SCHEMA_AUTO_CONVERSION, false);
+          // If we can not load the table try the provided hive schema
+          this.tableSchema = hiveSchemaOrThrow(e, autoConversion);
+          // This is only for table creation, it is ok to have an empty partition column list
+          this.partitionColumns = ImmutableList.of();
+        }
         if (e instanceof NoSuchTableException &&
             HiveTableUtil.isCtas(serDeProperties) &&
             !Catalogs.hiveCatalog(configuration, serDeProperties)) {
diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java
index b00017726b1..db76c4db6c7 100644
--- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java
+++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java
@@ -70,6 +70,7 @@ import org.apache.iceberg.hive.HiveSchemaUtil;
 import org.apache.iceberg.hive.MetastoreUtil;
 import org.apache.iceberg.mr.Catalogs;
 import org.apache.iceberg.mr.InputFormatConfig;
+import org.apache.iceberg.mr.TestHelper;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet;
@@ -1751,6 +1752,43 @@ public class TestHiveIcebergStorageHandlerNoScan {
         ((BaseTable) testTables.loadTable(identifier)).operations().current().previousFiles().size());
   }
 
+  @Test
+  public void testCreateTableWithMetadataLocationWithoutSchema() throws IOException, TException, InterruptedException {
+    Assume.assumeTrue("Create with metadata location is only supported for Hive Catalog tables",
+        testTableType.equals(TestTables.TestTableType.HIVE_CATALOG));
+    TableIdentifier sourceIdentifier = TableIdentifier.of("default", "source");
+    PartitionSpec spec =
+        PartitionSpec.builderFor(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA).identity("customer_id").build();
+    List<Record> records = TestHelper.generateRandomRecords(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, 4, 0L);
+    Table sourceTable =
+        testTables.createTable(shell, sourceIdentifier.name(), HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, spec,
+            FileFormat.PARQUET, records, 1,
+            ImmutableMap.<String, String>builder().put(InputFormatConfig.EXTERNAL_TABLE_PURGE, "FALSE").build());
+    String metadataLocation = ((BaseTable) sourceTable).operations().current().metadataFileLocation();
+    shell.executeStatement("DROP TABLE " + sourceIdentifier.name());
+    TableIdentifier targetIdentifier = TableIdentifier.of("default", "target");
+
+    String tblProps =
+        testTables.propertiesForCreateTableSQL(Collections.singletonMap("metadata_location", metadataLocation));
+
+    // Try the query with columns also specified, it should throw exception.
+    AssertHelpers.assertThrows("should throw exception", IllegalArgumentException.class,
+        "Column names can not be provided along with metadata location.", () -> {
+          shell.executeStatement("CREATE EXTERNAL TABLE target (id int) STORED BY ICEBERG " +
+              testTables.locationForCreateTableSQL(targetIdentifier) + tblProps);
+        });
+    shell.executeStatement(
+        "CREATE EXTERNAL TABLE target STORED BY ICEBERG " + testTables.locationForCreateTableSQL(targetIdentifier) +
+            tblProps);
+
+    // Check the partition and the schema are preserved.
+    Table targetIcebergTable =
+        IcebergTableUtil.getTable(shell.getHiveConf(), shell.metastore().getTable(targetIdentifier));
+    Assert.assertEquals(1, targetIcebergTable.spec().fields().size());
+    Assert.assertEquals(sourceTable.spec().fields(), targetIcebergTable.spec().fields());
+    Assert.assertEquals(sourceTable.schema().toString(), targetIcebergTable.schema().toString());
+  }
+
 
   /**
    * Checks that the new schema has newintcol and newstring col columns on both HMS and Iceberg sides