You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2019/07/29 20:25:37 UTC
[hive] branch master updated: HIVE-22057: Early bailout in
SharedWorkOptimizer if all tables are referenced only once (Jesus Camacho
Rodriguez, reviewed by Vineet Garg)
This is an automated email from the ASF dual-hosted git repository.
jcamacho pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 58f4c23 HIVE-22057: Early bailout in SharedWorkOptimizer if all tables are referenced only once (Jesus Camacho Rodriguez, reviewed by Vineet Garg)
58f4c23 is described below
commit 58f4c23da57f3521525d3234c00596573a70602a
Author: Jesus Camacho Rodriguez <jc...@apache.org>
AuthorDate: Fri Jul 26 17:38:52 2019 -0700
HIVE-22057: Early bailout in SharedWorkOptimizer if all tables are referenced only once (Jesus Camacho Rodriguez, reviewed by Vineet Garg)
---
.../hive/ql/optimizer/SharedWorkOptimizer.java | 26 ++++++++++++++--------
1 file changed, 17 insertions(+), 9 deletions(-)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java
index b9c1094..0e16b7b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java
@@ -132,25 +132,33 @@ public class SharedWorkOptimizer extends Transform {
return pctx;
}
+ // Map of dbName.TblName -> TSOperator
+ ArrayListMultimap<String, TableScanOperator> tableNameToOps = splitTableScanOpsByTable(pctx);
+
+ // Check whether all tables in the plan are unique
+ boolean tablesReferencedOnlyOnce =
+ tableNameToOps.asMap().entrySet().stream().noneMatch(e -> e.getValue().size() > 1);
+ if (tablesReferencedOnlyOnce) {
+ // Nothing to do, bail out
+ return pctx;
+ }
+
if (LOG.isDebugEnabled()) {
LOG.debug("Before SharedWorkOptimizer:\n" + Operator.toString(pctx.getTopOps().values()));
}
- // Cache to use during optimization
- SharedWorkOptimizerCache optimizerCache = new SharedWorkOptimizerCache();
-
- // Gather information about the DPP table scans and store it in the cache
- gatherDPPTableScanOps(pctx, optimizerCache);
-
- // Map of dbName.TblName -> TSOperator
- ArrayListMultimap<String, TableScanOperator> tableNameToOps = splitTableScanOpsByTable(pctx);
-
// We enforce a certain order when we do the reutilization.
// In particular, we use size of table x number of reads to
// rank the tables.
List<Entry<String, Long>> sortedTables = rankTablesByAccumulatedSize(pctx);
LOG.debug("Sorted tables by size: {}", sortedTables);
+ // Cache to use during optimization
+ SharedWorkOptimizerCache optimizerCache = new SharedWorkOptimizerCache();
+
+ // Gather information about the DPP table scans and store it in the cache
+ gatherDPPTableScanOps(pctx, optimizerCache);
+
// Execute shared work optimization
sharedWorkOptimization(pctx, optimizerCache, tableNameToOps, sortedTables, false);