You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@hive.apache.org by "ASF GitHub Bot (Jira)" <ji...@apache.org> on 2022/04/11 09:49:00 UTC
[jira] [Work logged] (HIVE-25941) Long compilation time of complex query due to analysis for materialized view rewrite

     [ https://issues.apache.org/jira/browse/HIVE-25941?focusedWorklogId=755148&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-755148 ]

ASF GitHub Bot logged work on HIVE-25941:
-----------------------------------------

                Author: ASF GitHub Bot
            Created on: 11/Apr/22 09:48
            Start Date: 11/Apr/22 09:48
    Worklog Time Spent: 10m 
      Work Description: kgyrtkirk commented on code in PR #3014:
URL: https://github.com/apache/hive/pull/3014#discussion_r847121787


##########
ql/src/java/org/apache/hadoop/hive/ql/metadata/MaterializedViewsCache.java:
##########
@@ -205,4 +212,52 @@ HiveRelOptMaterialization get(String dbName, String viewName) {
   public boolean isEmpty() {
     return materializedViews.isEmpty();
   }
+
+
+  private static class ASTKey {
+    private final ASTNode root;
+
+    public ASTKey(ASTNode root) {
+      this.root = root;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) return true;
+      if (o == null || getClass() != o.getClass()) return false;
+      ASTKey that = (ASTKey) o;
+      return equals(root, that.root);
+    }
+
+    private boolean equals(ASTNode astNode1, ASTNode astNode2) {
+      if (!(astNode1.getType() == astNode2.getType() &&
+              astNode1.getText().equals(astNode2.getText()) &&
+              astNode1.getChildCount() == astNode2.getChildCount())) {
+        return false;
+      }
+
+      for (int i = 0; i < astNode1.getChildCount(); ++i) {
+        if (!equals((ASTNode) astNode1.getChild(i), (ASTNode) astNode2.getChild(i))) {
+          return false;
+        }
+      }
+
+      return true;
+    }
+
+    @Override
+    public int hashCode() {
+      return hashcode(root);

Review Comment:
   you could probably cache the hashcode - so that its not neccessary to compute it multiple times



##########
ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveMaterializedViewASTSubQueryRewriteShuttle.java:
##########
@@ -0,0 +1,189 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite;
+
+import org.apache.calcite.plan.RelOptCluster;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.tools.RelBuilder;
+import org.apache.hadoop.hive.common.TableName;
+import org.apache.hadoop.hive.ql.lockmgr.HiveTxnManager;
+import org.apache.hadoop.hive.ql.metadata.Hive;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.metadata.HiveRelOptMaterialization;
+import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject;
+import org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveMaterializedViewUtils;
+import org.apache.hadoop.hive.ql.parse.ASTNode;
+import org.apache.hadoop.hive.ql.parse.CalcitePlanner;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.EnumSet;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.Stack;
+import java.util.function.Predicate;
+
+import static java.util.Collections.singletonList;
+import static java.util.Collections.unmodifiableMap;
+import static java.util.Collections.unmodifiableSet;
+import static org.apache.hadoop.hive.ql.metadata.HiveRelOptMaterialization.RewriteAlgorithm.NON_CALCITE;
+import static org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveMaterializedViewUtils.extractTable;
+
+/**
+ * Traverse the plan and tries to rewrite subtrees of the plan to materialized view scans.
+ *
+ * The rewrite depends on whether the subtree's corresponding AST match with any materialized view
+ * definitions AST.
+ */
+public class HiveMaterializedViewASTSubQueryRewriteShuttle extends HiveRelShuttleImpl {
+
+  private static final Logger LOG = LoggerFactory.getLogger(HiveMaterializedViewASTSubQueryRewriteShuttle.class);
+
+  private final Map<RelNode, ASTNode> subQueryMap;
+  private final ASTNode originalAST;
+  private final ASTNode expandedAST;
+  private final RelBuilder relBuilder;
+  private final Hive db;
+  private final Set<TableName> tablesUsedByOriginalPlan;
+  private final HiveTxnManager txnManager;
+
+  public HiveMaterializedViewASTSubQueryRewriteShuttle(
+          Map<RelNode, ASTNode> subQueryMap,
+          ASTNode originalAST,
+          ASTNode expandedAST,
+          RelBuilder relBuilder,
+          Hive db,
+          Set<TableName> tablesUsedByOriginalPlan,
+          HiveTxnManager txnManager) {
+    this.subQueryMap = unmodifiableMap(subQueryMap);
+    this.originalAST = originalAST;
+    this.expandedAST = expandedAST;
+    this.relBuilder = relBuilder;
+    this.db = db;
+    this.tablesUsedByOriginalPlan = unmodifiableSet(tablesUsedByOriginalPlan);
+    this.txnManager = txnManager;
+  }
+
+  public RelNode rewrite(RelNode relNode) {
+    return relNode.accept(this);
+  }
+
+  @Override
+  public RelNode visit(HiveProject project) {
+    if (!subQueryMap.containsKey(project)) {

Review Comment:
   it might be true that all subqueries/MVs have a `Project` at their top level ; but what if that's not true?
   
   Would it make sense to add some check somewhere which could fire during testing if this preconception at some point breaks?



##########
ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveMaterializedViewASTSubQueryRewriteShuttle.java:
##########
@@ -0,0 +1,189 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite;
+
+import org.apache.calcite.plan.RelOptCluster;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.tools.RelBuilder;
+import org.apache.hadoop.hive.common.TableName;
+import org.apache.hadoop.hive.ql.lockmgr.HiveTxnManager;
+import org.apache.hadoop.hive.ql.metadata.Hive;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.metadata.HiveRelOptMaterialization;
+import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject;
+import org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveMaterializedViewUtils;
+import org.apache.hadoop.hive.ql.parse.ASTNode;
+import org.apache.hadoop.hive.ql.parse.CalcitePlanner;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.EnumSet;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.Stack;
+import java.util.function.Predicate;
+
+import static java.util.Collections.singletonList;
+import static java.util.Collections.unmodifiableMap;
+import static java.util.Collections.unmodifiableSet;
+import static org.apache.hadoop.hive.ql.metadata.HiveRelOptMaterialization.RewriteAlgorithm.NON_CALCITE;
+import static org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveMaterializedViewUtils.extractTable;
+
+/**
+ * Traverse the plan and tries to rewrite subtrees of the plan to materialized view scans.
+ *
+ * The rewrite depends on whether the subtree's corresponding AST match with any materialized view
+ * definitions AST.
+ */
+public class HiveMaterializedViewASTSubQueryRewriteShuttle extends HiveRelShuttleImpl {
+
+  private static final Logger LOG = LoggerFactory.getLogger(HiveMaterializedViewASTSubQueryRewriteShuttle.class);
+
+  private final Map<RelNode, ASTNode> subQueryMap;
+  private final ASTNode originalAST;
+  private final ASTNode expandedAST;
+  private final RelBuilder relBuilder;
+  private final Hive db;
+  private final Set<TableName> tablesUsedByOriginalPlan;
+  private final HiveTxnManager txnManager;
+
+  public HiveMaterializedViewASTSubQueryRewriteShuttle(
+          Map<RelNode, ASTNode> subQueryMap,
+          ASTNode originalAST,
+          ASTNode expandedAST,
+          RelBuilder relBuilder,
+          Hive db,
+          Set<TableName> tablesUsedByOriginalPlan,
+          HiveTxnManager txnManager) {
+    this.subQueryMap = unmodifiableMap(subQueryMap);
+    this.originalAST = originalAST;
+    this.expandedAST = expandedAST;
+    this.relBuilder = relBuilder;
+    this.db = db;
+    this.tablesUsedByOriginalPlan = unmodifiableSet(tablesUsedByOriginalPlan);
+    this.txnManager = txnManager;
+  }
+
+  public RelNode rewrite(RelNode relNode) {
+    return relNode.accept(this);
+  }
+
+  @Override
+  public RelNode visit(HiveProject project) {
+    if (!subQueryMap.containsKey(project)) {
+      // No AST is found for this subtree
+      return super.visit(project);
+    }
+
+    // The AST associated to the RelNode is part of the original AST, but we need the expanded one
+    // 1. Collect the path elements of this node in the original AST
+    Stack<Integer> path = new Stack<>();
+    ASTNode curr = subQueryMap.get(project);
+    while (curr != null && curr != originalAST) {
+      path.push(curr.getType());
+      curr = (ASTNode) curr.getParent();
+    }
+
+    // 2. To find the AST node in the expanded we need the path elements in reverse order

Review Comment:
   instead of using a stack + reverse ; wouldn't using a deque make this second step unneccessary?





Issue Time Tracking
-------------------

    Worklog Id:     (was: 755148)
    Time Spent: 1h 10m  (was: 1h)

> Long compilation time of complex query due to analysis for materialized view rewrite
> ------------------------------------------------------------------------------------
>
>                 Key: HIVE-25941
>                 URL: https://issues.apache.org/jira/browse/HIVE-25941
>             Project: Hive
>          Issue Type: Bug
>          Components: Materialized views
>            Reporter: Krisztian Kasa
>            Assignee: Krisztian Kasa
>            Priority: Major
>              Labels: pull-request-available
>         Attachments: sample.png
>
>          Time Spent: 1h 10m
>  Remaining Estimate: 0h
>
> When compiling query the optimizer tries to rewrite the query plan or subtrees of the plan to use materialized view scans.
> If
> {code}
> set hive.materializedview.rewriting.sql.subquery=false;
> {code}
> the compilation succeed in less then 10 sec otherwise it takes several minutes (~ 5min) depending on the hardware.



--
This message was sent by Atlassian Jira
(v8.20.1#820001)