You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@hive.apache.org by jc...@apache.org on 2019/07/29 20:25:37 UTC

[hive] branch master updated: HIVE-22057: Early bailout in SharedWorkOptimizer if all tables are referenced only once (Jesus Camacho Rodriguez, reviewed by Vineet Garg)

This is an automated email from the ASF dual-hosted git repository.

jcamacho pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 58f4c23  HIVE-22057: Early bailout in SharedWorkOptimizer if all tables are referenced only once (Jesus Camacho Rodriguez, reviewed by Vineet Garg)
58f4c23 is described below

commit 58f4c23da57f3521525d3234c00596573a70602a
Author: Jesus Camacho Rodriguez <jc...@apache.org>
AuthorDate: Fri Jul 26 17:38:52 2019 -0700

    HIVE-22057: Early bailout in SharedWorkOptimizer if all tables are referenced only once (Jesus Camacho Rodriguez, reviewed by Vineet Garg)
---
 .../hive/ql/optimizer/SharedWorkOptimizer.java     | 26 ++++++++++++++--------
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java
index b9c1094..0e16b7b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java
@@ -132,25 +132,33 @@ public class SharedWorkOptimizer extends Transform {
       return pctx;
     }
 
+    // Map of dbName.TblName -> TSOperator
+    ArrayListMultimap<String, TableScanOperator> tableNameToOps = splitTableScanOpsByTable(pctx);
+
+    // Check whether all tables in the plan are unique
+    boolean tablesReferencedOnlyOnce =
+        tableNameToOps.asMap().entrySet().stream().noneMatch(e -> e.getValue().size() > 1);
+    if (tablesReferencedOnlyOnce) {
+      // Nothing to do, bail out
+      return pctx;
+    }
+
     if (LOG.isDebugEnabled()) {
       LOG.debug("Before SharedWorkOptimizer:\n" + Operator.toString(pctx.getTopOps().values()));
     }
 
-    // Cache to use during optimization
-    SharedWorkOptimizerCache optimizerCache = new SharedWorkOptimizerCache();
-
-    // Gather information about the DPP table scans and store it in the cache
-    gatherDPPTableScanOps(pctx, optimizerCache);
-
-    // Map of dbName.TblName -> TSOperator
-    ArrayListMultimap<String, TableScanOperator> tableNameToOps = splitTableScanOpsByTable(pctx);
-
     // We enforce a certain order when we do the reutilization.
     // In particular, we use size of table x number of reads to
     // rank the tables.
     List<Entry<String, Long>> sortedTables = rankTablesByAccumulatedSize(pctx);
     LOG.debug("Sorted tables by size: {}", sortedTables);
 
+    // Cache to use during optimization
+    SharedWorkOptimizerCache optimizerCache = new SharedWorkOptimizerCache();
+
+    // Gather information about the DPP table scans and store it in the cache
+    gatherDPPTableScanOps(pctx, optimizerCache);
+
     // Execute shared work optimization
     sharedWorkOptimization(pctx, optimizerCache, tableNameToOps, sortedTables, false);