You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@gobblin.apache.org by wl...@apache.org on 2023/05/11 21:46:14 UTC

[gobblin] branch master updated: [GOBBLIN-1832] Emit warning instead of failing job for retention of Hive Table Views (#3695)

This is an automated email from the ASF dual-hosted git repository.

wlo pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/gobblin.git


The following commit(s) were added to refs/heads/master by this push:
     new 05c732c20 [GOBBLIN-1832] Emit warning instead of failing job for retention of Hive Table Views (#3695)
05c732c20 is described below

commit 05c732c2008e38d5fa6bf06dcad5fcd0eb4aba47
Author: umustafi <um...@gmail.com>
AuthorDate: Thu May 11 14:46:06 2023 -0700

    [GOBBLIN-1832] Emit warning instead of failing job for retention of Hive Table Views (#3695)
    
    We should not allow hive retention on a view since it shouldn't have access to delete underlying data. Instead it should throw a warning message if it is a view instead of failing the job as there may be retention jobs configured to include both hive tables and views. We want to be able to dynamically determine at runtime whether or not to skip retention on the dataset in question rather than statically allow/denylist tables in the configurations.
    
    Co-authored-by: Urmi Mustafi <um...@linkedin.com>
---
 .../version/finder/AbstractHiveDatasetVersionFinder.java     | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/version/finder/AbstractHiveDatasetVersionFinder.java b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/version/finder/AbstractHiveDatasetVersionFinder.java
index f12204862..a7a933bcf 100644
--- a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/version/finder/AbstractHiveDatasetVersionFinder.java
+++ b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/version/finder/AbstractHiveDatasetVersionFinder.java
@@ -18,11 +18,13 @@ package org.apache.gobblin.data.management.version.finder;
 
 import java.io.IOException;
 import java.util.Collection;
+import java.util.Collections;
 import java.util.List;
 
 import lombok.extern.slf4j.Slf4j;
 
 import org.apache.hadoop.hive.metastore.IMetaStoreClient;
+import org.apache.hadoop.hive.metastore.TableType;
 import org.apache.hadoop.hive.ql.metadata.Partition;
 
 import com.google.common.base.Function;
@@ -56,6 +58,8 @@ public abstract class AbstractHiveDatasetVersionFinder implements VersionFinder<
    * Calls {@link #getDatasetVersion(Partition)} for every {@link Partition} found.
    * <p>
    * Note: If an exception occurs while processing a partition, that partition will be ignored in the returned collection
+   * Also note that if the dataset passed is a view type, we will return an empty list even if the underlying table is
+   * partitioned.
    * </p>
    *
    * @throws IllegalArgumentException if <code>dataset</code> is not a {@link HiveDataset}. Or if {@link HiveDataset#getTable()}
@@ -69,7 +73,13 @@ public abstract class AbstractHiveDatasetVersionFinder implements VersionFinder<
     final HiveDataset hiveDataset = (HiveDataset) dataset;
 
     if (!hiveDataset.getTable().isPartitioned()) {
-      throw new IllegalArgumentException("HiveDatasetVersionFinder is only compatible with partitioned hive tables");
+      if (hiveDataset.getTable().getTableType() == TableType.VIRTUAL_VIEW) {
+        log.warn("Skipping processing a view type dataset: ", ((HiveDataset) dataset).getTable().getTableName());
+        return Collections.emptyList();
+      } else {
+        throw new IllegalArgumentException("HiveDatasetVersionFinder is only compatible with partitioned hive tables. "
+            + "This is a snapshot hive table.");
+      }
     }
 
     try (AutoReturnableObject<IMetaStoreClient> client = hiveDataset.getClientPool().getClient()) {