You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iceberg.apache.org by pv...@apache.org on 2021/02/05 10:23:19 UTC

[iceberg] branch master updated: Hive: Avoid drop table related exceptions in MetaHook (#2191)

This is an automated email from the ASF dual-hosted git repository.

pvary pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git


The following commit(s) were added to refs/heads/master by this push:
     new 469f6c3  Hive: Avoid drop table related exceptions in MetaHook (#2191)
469f6c3 is described below

commit 469f6c3f640293df6c629a693450736350aa6fab
Author: Marton Bod <ma...@gmail.com>
AuthorDate: Fri Feb 5 11:23:02 2021 +0100

    Hive: Avoid drop table related exceptions in MetaHook (#2191)
---
 .../iceberg/mr/hive/HiveIcebergMetaHook.java       | 20 ++++++++---
 .../hive/TestHiveIcebergStorageHandlerNoScan.java  | 42 ++++++++++++++++++++++
 .../org/apache/iceberg/mr/hive/TestTables.java     | 42 +++++++++++++---------
 3 files changed, 83 insertions(+), 21 deletions(-)

diff --git a/mr/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java b/mr/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java
index 54c2247..c76e849 100644
--- a/mr/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java
+++ b/mr/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java
@@ -171,11 +171,21 @@ public class HiveIcebergMetaHook implements HiveMetaHook {
   @Override
   public void commitDropTable(org.apache.hadoop.hive.metastore.api.Table hmsTable, boolean deleteData) {
     if (deleteData && deleteIcebergTable) {
-      if (!Catalogs.hiveCatalog(conf)) {
-        LOG.info("Dropping with purge all the data for table {}.{}", hmsTable.getDbName(), hmsTable.getTableName());
-        Catalogs.dropTable(conf, catalogProperties);
-      } else {
-        CatalogUtil.dropTableData(deleteIo, deleteMetadata);
+      try {
+        if (!Catalogs.hiveCatalog(conf)) {
+          LOG.info("Dropping with purge all the data for table {}.{}", hmsTable.getDbName(), hmsTable.getTableName());
+          Catalogs.dropTable(conf, catalogProperties);
+        } else {
+          // do nothing if metadata folder has been deleted already (Hive 4 behaviour for purge=TRUE)
+          if (deleteIo.newInputFile(deleteMetadata.location()).exists()) {
+            CatalogUtil.dropTableData(deleteIo, deleteMetadata);
+          }
+        }
+      } catch (Exception e) {
+        // we want to successfully complete the Hive DROP TABLE command despite catalog-related exceptions here
+        // e.g. we wish to successfully delete a Hive table even if the underlying Hadoop table has already been deleted
+        LOG.warn("Exception during commitDropTable operation for table {}.{}.",
+            hmsTable.getDbName(), hmsTable.getTableName(), e);
       }
     }
   }
diff --git a/mr/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java b/mr/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java
index 7311bb8..a57bf18 100644
--- a/mr/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java
+++ b/mr/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java
@@ -26,6 +26,7 @@ import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Properties;
 import java.util.Set;
 import java.util.stream.Collectors;
 import org.apache.hadoop.fs.FileSystem;
@@ -48,6 +49,7 @@ import org.apache.iceberg.hive.HiveSchemaUtil;
 import org.apache.iceberg.hive.MetastoreUtil;
 import org.apache.iceberg.mr.Catalogs;
 import org.apache.iceberg.mr.InputFormatConfig;
+import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet;
 import org.apache.iceberg.types.Type;
@@ -56,6 +58,7 @@ import org.apache.thrift.TException;
 import org.junit.After;
 import org.junit.AfterClass;
 import org.junit.Assert;
+import org.junit.Assume;
 import org.junit.Before;
 import org.junit.BeforeClass;
 import org.junit.Rule;
@@ -568,6 +571,45 @@ public class TestHiveIcebergStorageHandlerNoScan {
     }
   }
 
+  @Test
+  public void testDropTableWithAppendedData() throws IOException {
+    TableIdentifier identifier = TableIdentifier.of("default", "customers");
+
+    testTables.createTable(shell, identifier.name(), HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, SPEC,
+        FileFormat.PARQUET, ImmutableList.of());
+
+    org.apache.iceberg.Table icebergTable = testTables.loadTable(identifier);
+    testTables.appendIcebergTable(shell.getHiveConf(), icebergTable, FileFormat.PARQUET, null,
+        HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS);
+
+    shell.executeStatement("DROP TABLE customers");
+  }
+
+  @Test
+  public void testDropHiveTableWithoutUnderlyingTable() throws IOException {
+    Assume.assumeFalse("Not relevant for HiveCatalog", Catalogs.hiveCatalog(shell.getHiveConf()));
+
+    TableIdentifier identifier = TableIdentifier.of("default", "customers");
+    // Create the Iceberg table in non-HiveCatalog
+    testTables.createIcebergTable(shell.getHiveConf(), identifier.name(),
+        HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, FileFormat.PARQUET,
+        HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS);
+
+    // Create Hive table on top
+    String tableLocation = testTables.locationForCreateTableSQL(identifier);
+    shell.executeStatement(testTables.createHiveTableSQL(identifier,
+        ImmutableMap.of(InputFormatConfig.EXTERNAL_TABLE_PURGE, "TRUE")));
+
+    // Drop the Iceberg table
+    Properties properties = new Properties();
+    properties.put(Catalogs.NAME, identifier.toString());
+    properties.put(Catalogs.LOCATION, tableLocation);
+    Catalogs.dropTable(shell.getHiveConf(), properties);
+
+    // Finally drop the Hive table as well
+    shell.executeStatement("DROP TABLE " + identifier);
+  }
+
   private String getCurrentSnapshotForHiveCatalogTable(org.apache.iceberg.Table icebergTable) {
     return ((BaseMetastoreTableOperations) ((BaseTable) icebergTable).operations()).currentMetadataLocation();
   }
diff --git a/mr/src/test/java/org/apache/iceberg/mr/hive/TestTables.java b/mr/src/test/java/org/apache/iceberg/mr/hive/TestTables.java
index 375fff3..4f84556 100644
--- a/mr/src/test/java/org/apache/iceberg/mr/hive/TestTables.java
+++ b/mr/src/test/java/org/apache/iceberg/mr/hive/TestTables.java
@@ -106,13 +106,21 @@ abstract class TestTables {
    * string which we have to execute. Overridden for HiveCatalog where the Hive table is immediately created
    * during the Iceberg table creation so no extra sql execution is required.
    * @param identifier The table identifier (the namespace should be non-empty and single level)
+   * @param tableProps Optional map of table properties
    * @return The SQL string - which should be executed, null - if it is not needed.
    */
-  public String createHiveTableSQL(TableIdentifier identifier) {
+  public String createHiveTableSQL(TableIdentifier identifier, Map<String, String> tableProps) {
     Preconditions.checkArgument(!identifier.namespace().isEmpty(), "Namespace should not be empty");
     Preconditions.checkArgument(identifier.namespace().levels().length == 1, "Namespace should be single level");
-    return String.format("CREATE TABLE %s.%s STORED BY '%s' %s", identifier.namespace(), identifier.name(),
+    String sql = String.format("CREATE TABLE %s.%s STORED BY '%s' %s", identifier.namespace(), identifier.name(),
         HiveIcebergStorageHandler.class.getName(), locationForCreateTableSQL(identifier));
+    if (tableProps != null && !tableProps.isEmpty()) {
+      String props = tableProps.entrySet().stream()
+          .map(entry -> String.format("'%s'='%s'", entry.getKey(), entry.getValue()))
+          .collect(Collectors.joining(","));
+      sql += " TBLPROPERTIES (" + props + ")";
+    }
+    return sql;
   }
 
   /**
@@ -140,7 +148,7 @@ abstract class TestTables {
   public Table createTable(TestHiveShell shell, String tableName, Schema schema, FileFormat fileFormat,
       List<Record> records) throws IOException {
     Table table = createIcebergTable(shell.getHiveConf(), tableName, schema, fileFormat, records);
-    String createHiveSQL = createHiveTableSQL(TableIdentifier.of("default", tableName));
+    String createHiveSQL = createHiveTableSQL(TableIdentifier.of("default", tableName), ImmutableMap.of());
     if (createHiveSQL != null) {
       shell.executeStatement(createHiveSQL);
     }
@@ -172,18 +180,20 @@ abstract class TestTables {
         PartitionSpecParser.toJson(spec) + "', " +
         "'" + TableProperties.DEFAULT_FILE_FORMAT + "'='" + fileFormat + "')");
 
-    StringBuilder query = new StringBuilder().append("INSERT INTO " + identifier + " VALUES ");
-
-    records.forEach(record -> {
-      query.append("(");
-      query.append(record.struct().fields().stream()
-          .map(field -> getStringValueForInsert(record.getField(field.name()), field.type()))
-          .collect(Collectors.joining(",")));
-      query.append("),");
-    });
-    query.setLength(query.length() - 1);
-
-    shell.executeStatement(query.toString());
+    if (records != null && !records.isEmpty()) {
+      StringBuilder query = new StringBuilder().append("INSERT INTO " + identifier + " VALUES ");
+
+      records.forEach(record -> {
+        query.append("(");
+        query.append(record.struct().fields().stream()
+                .map(field -> getStringValueForInsert(record.getField(field.name()), field.type()))
+                .collect(Collectors.joining(",")));
+        query.append("),");
+      });
+      query.setLength(query.length() - 1);
+
+      shell.executeStatement(query.toString());
+    }
 
     return loadTable(identifier);
   }
@@ -386,7 +396,7 @@ abstract class TestTables {
     }
 
     @Override
-    public String createHiveTableSQL(TableIdentifier identifier) {
+    public String createHiveTableSQL(TableIdentifier identifier, Map<String, String> tblProps) {
       return null;
     }
   }