You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by sz...@apache.org on 2021/10/04 08:23:38 UTC

[hive] branch master updated: HIVE-25564: Enable dropping HMS tables despite Iceberg metadata problems (Adam Szita, reviewed by Marton Bod)

This is an automated email from the ASF dual-hosted git repository.

szita pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 3967ff3  HIVE-25564: Enable dropping HMS tables despite Iceberg metadata problems (Adam Szita, reviewed by Marton Bod)
3967ff3 is described below

commit 3967ff3a60456a77b78b1e60094bb8bd374ad07d
Author: Adam Szita <40...@users.noreply.github.com>
AuthorDate: Mon Oct 4 10:23:28 2021 +0200

    HIVE-25564: Enable dropping HMS tables despite Iceberg metadata problems (Adam Szita, reviewed by Marton Bod)
    
    (cherry picked from commit c2725b1fd7a1814d75a40b76b6c8c23fdb09aafb)
---
 .../org/apache/iceberg/hive/TestHiveMetastore.java |  5 ++++
 .../iceberg/mr/hive/HiveIcebergMetaHook.java       | 18 ++++++++++----
 .../hive/TestHiveIcebergStorageHandlerNoScan.java  | 28 ++++++++++++++++++++++
 3 files changed, 46 insertions(+), 5 deletions(-)

diff --git a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveMetastore.java b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveMetastore.java
index 62e0a91..c359277 100644
--- a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveMetastore.java
+++ b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveMetastore.java
@@ -34,6 +34,7 @@ import org.apache.hadoop.hive.metastore.TSetIpAddressProcessor;
 import org.apache.hadoop.hive.metastore.api.Table;
 import org.apache.hadoop.hive.metastore.conf.MetastoreConf;
 import org.apache.hadoop.hive.metastore.utils.TestTxnDbUtil;
+import org.apache.iceberg.catalog.TableIdentifier;
 import org.apache.iceberg.common.DynConstructors;
 import org.apache.iceberg.common.DynMethods;
 import org.apache.iceberg.hadoop.Util;
@@ -196,6 +197,10 @@ public class TestHiveMetastore {
     return clientPool.run(client -> client.getTable(dbName, tableName));
   }
 
+  public Table getTable(TableIdentifier identifier) throws TException, InterruptedException {
+    return getTable(identifier.namespace().toString(), identifier.name());
+  }
+
   private TServer newThriftServer(TServerSocket socket, int poolSize, HiveConf conf) throws Exception {
     HiveConf serverConf = new HiveConf(conf);
     serverConf.set(HiveConf.ConfVars.METASTORECONNECTURLKEY.varname, "jdbc:derby:" + getDerbyPath() + ";create=true");
diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java
index 8d773b9..dca4c74 100644
--- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java
+++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java
@@ -191,10 +191,18 @@ public class HiveIcebergMetaHook implements HiveMetaHook {
         "TRUE".equalsIgnoreCase(hmsTable.getParameters().get(InputFormatConfig.EXTERNAL_TABLE_PURGE));
 
     if (deleteIcebergTable && Catalogs.hiveCatalog(conf, catalogProperties) && deleteData) {
-      // Store the metadata and the id for deleting the actual table data
-      String metadataLocation = hmsTable.getParameters().get(BaseMetastoreTableOperations.METADATA_LOCATION_PROP);
-      this.deleteIo = IcebergTableUtil.getTable(conf, catalogProperties).io();
-      this.deleteMetadata = TableMetadataParser.read(deleteIo, metadataLocation);
+      // Store the metadata and the io for deleting the actual table data
+      try {
+        String metadataLocation = hmsTable.getParameters().get(BaseMetastoreTableOperations.METADATA_LOCATION_PROP);
+        this.deleteIo = Catalogs.loadTable(conf, catalogProperties).io();
+        this.deleteMetadata = TableMetadataParser.read(deleteIo, metadataLocation);
+      } catch (Exception e) {
+        LOG.error("preDropTable: Error during loading Iceberg table or parsing its metadata for HMS table: {}.{}. " +
+                "In some cases, this might lead to undeleted metadata files under the table directory: {}. " +
+                "Please double check and, if needed, manually delete any dangling files/folders, if any. " +
+                "In spite of this error, the HMS table drop operation should proceed as normal.",
+            hmsTable.getDbName(), hmsTable.getTableName(), hmsTable.getSd().getLocation(), e);
+      }
     }
   }
 
@@ -212,7 +220,7 @@ public class HiveIcebergMetaHook implements HiveMetaHook {
           Catalogs.dropTable(conf, catalogProperties);
         } else {
           // do nothing if metadata folder has been deleted already (Hive 4 behaviour for purge=TRUE)
-          if (deleteIo.newInputFile(deleteMetadata.location()).exists()) {
+          if (deleteMetadata != null && deleteIo.newInputFile(deleteMetadata.location()).exists()) {
             CatalogUtil.dropTableData(deleteIo, deleteMetadata);
           }
         }
diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java
index f300684..235ed55 100644
--- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java
+++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java
@@ -474,6 +474,34 @@ public class TestHiveIcebergStorageHandlerNoScan {
   }
 
   @Test
+  public void testDropTableWithCorruptedMetadata() throws TException, IOException, InterruptedException {
+    Assume.assumeTrue("Only HiveCatalog attempts to load the Iceberg table prior to dropping it.",
+        testTableType == TestTables.TestTableType.HIVE_CATALOG);
+
+    // create test table
+    TableIdentifier identifier = TableIdentifier.of("default", "customers");
+    testTables.createTable(shell, identifier.name(),
+        HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, FileFormat.PARQUET, ImmutableList.of());
+
+    // enable data purging (this should set external.table.purge=true on the HMS table)
+    Table table = testTables.loadTable(identifier);
+    table.updateProperties().set(GC_ENABLED, "true").commit();
+
+    // delete its current snapshot file (i.e. corrupt the metadata to make the Iceberg table unloadable)
+    String metadataLocation = shell.metastore().getTable(identifier)
+        .getParameters().get(BaseMetastoreTableOperations.METADATA_LOCATION_PROP);
+    table.io().deleteFile(metadataLocation);
+
+    // check if HMS table is nonetheless still droppable
+    shell.executeStatement(String.format("DROP TABLE %s", identifier));
+    AssertHelpers.assertThrows("should throw exception", NoSuchTableException.class,
+        "Table does not exist", () -> {
+          testTables.loadTable(identifier);
+        }
+    );
+  }
+
+  @Test
   public void testCreateTableError() {
     TableIdentifier identifier = TableIdentifier.of("default", "withShell2");