You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jd...@apache.org on 2018/06/05 00:29:22 UTC
hive git commit: HIVE-19334: Use actual file size rather than stats
for fetch task optimization with external tables (Jason Dere,
reviewed by GopalV)
Repository: hive
Updated Branches:
refs/heads/master 85ac54403 -> 91cdd4f4c
HIVE-19334: Use actual file size rather than stats for fetch task optimization with external tables (Jason Dere, reviewed by GopalV)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/91cdd4f4
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/91cdd4f4
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/91cdd4f4
Branch: refs/heads/master
Commit: 91cdd4f4cf664a1a758e69c8c403d46dc36c076a
Parents: 85ac544
Author: Jason Dere <jd...@hortonworks.com>
Authored: Mon Jun 4 17:28:37 2018 -0700
Committer: Jason Dere <jd...@hortonworks.com>
Committed: Mon Jun 4 17:28:37 2018 -0700
----------------------------------------------------------------------
.../hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java | 13 ++++++++++---
1 file changed, 10 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/91cdd4f4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java
index 6b46188..ffd47a2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java
@@ -43,6 +43,7 @@ import org.slf4j.LoggerFactory;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils;
import org.apache.hadoop.hive.ql.exec.CommonJoinOperator;
import org.apache.hadoop.hive.ql.exec.FetchTask;
import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
@@ -523,6 +524,7 @@ public class SimpleFetchOptimizer extends Transform {
// scanning the filesystem to get file lengths.
private Status checkThresholdWithMetastoreStats(final Table table, final PrunedPartitionList partsList,
final long threshold) {
+ Status status = Status.UNAVAILABLE;
if (table != null && !table.isPartitioned()) {
long dataSize = StatsUtils.getTotalSize(table);
if (dataSize <= 0) {
@@ -530,7 +532,7 @@ public class SimpleFetchOptimizer extends Transform {
return Status.UNAVAILABLE;
}
- return (threshold - dataSize) >= 0 ? Status.PASS : Status.FAIL;
+ status = (threshold - dataSize) >= 0 ? Status.PASS : Status.FAIL;
} else if (table != null && table.isPartitioned() && partsList != null) {
List<Long> dataSizes = StatsUtils.getBasicStatForPartitions(table, partsList.getNotDeniedPartns(),
StatsSetupConst.TOTAL_SIZE);
@@ -541,10 +543,15 @@ public class SimpleFetchOptimizer extends Transform {
return Status.UNAVAILABLE;
}
- return (threshold - totalDataSize) >= 0 ? Status.PASS : Status.FAIL;
+ status = (threshold - totalDataSize) >= 0 ? Status.PASS : Status.FAIL;
}
- return Status.UNAVAILABLE;
+ if (status == Status.PASS && MetaStoreUtils.isExternalTable(table.getTTable())) {
+ // External table should also check the underlying file size.
+ LOG.warn("Table {} is external table, falling back to filesystem scan.", table.getCompleteName());
+ status = Status.UNAVAILABLE;
+ }
+ return status;
}
private long getPathLength(JobConf conf, Path path,