You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jd...@apache.org on 2018/06/05 00:29:22 UTC

hive git commit: HIVE-19334: Use actual file size rather than stats for fetch task optimization with external tables (Jason Dere, reviewed by GopalV)

Repository: hive
Updated Branches:
  refs/heads/master 85ac54403 -> 91cdd4f4c


HIVE-19334: Use actual file size rather than stats for fetch task optimization with external tables (Jason Dere, reviewed by GopalV)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/91cdd4f4
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/91cdd4f4
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/91cdd4f4

Branch: refs/heads/master
Commit: 91cdd4f4cf664a1a758e69c8c403d46dc36c076a
Parents: 85ac544
Author: Jason Dere <jd...@hortonworks.com>
Authored: Mon Jun 4 17:28:37 2018 -0700
Committer: Jason Dere <jd...@hortonworks.com>
Committed: Mon Jun 4 17:28:37 2018 -0700

----------------------------------------------------------------------
 .../hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/91cdd4f4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java
index 6b46188..ffd47a2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java
@@ -43,6 +43,7 @@ import org.slf4j.LoggerFactory;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils;
 import org.apache.hadoop.hive.ql.exec.CommonJoinOperator;
 import org.apache.hadoop.hive.ql.exec.FetchTask;
 import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
@@ -523,6 +524,7 @@ public class SimpleFetchOptimizer extends Transform {
     // scanning the filesystem to get file lengths.
     private Status checkThresholdWithMetastoreStats(final Table table, final PrunedPartitionList partsList,
       final long threshold) {
+      Status status = Status.UNAVAILABLE;
       if (table != null && !table.isPartitioned()) {
         long dataSize = StatsUtils.getTotalSize(table);
         if (dataSize <= 0) {
@@ -530,7 +532,7 @@ public class SimpleFetchOptimizer extends Transform {
           return Status.UNAVAILABLE;
         }
 
-        return (threshold - dataSize) >= 0 ? Status.PASS : Status.FAIL;
+        status = (threshold - dataSize) >= 0 ? Status.PASS : Status.FAIL;
       } else if (table != null && table.isPartitioned() && partsList != null) {
         List<Long> dataSizes = StatsUtils.getBasicStatForPartitions(table, partsList.getNotDeniedPartns(),
           StatsSetupConst.TOTAL_SIZE);
@@ -541,10 +543,15 @@ public class SimpleFetchOptimizer extends Transform {
           return Status.UNAVAILABLE;
         }
 
-        return (threshold - totalDataSize) >= 0 ? Status.PASS : Status.FAIL;
+        status = (threshold - totalDataSize) >= 0 ? Status.PASS : Status.FAIL;
       }
 
-      return Status.UNAVAILABLE;
+      if (status == Status.PASS && MetaStoreUtils.isExternalTable(table.getTTable())) {
+        // External table should also check the underlying file size.
+        LOG.warn("Table {} is external table, falling back to filesystem scan.", table.getCompleteName());
+        status = Status.UNAVAILABLE;
+      }
+      return status;
     }
 
     private long getPathLength(JobConf conf, Path path,