You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2018/06/05 23:45:45 UTC

hive git commit: HIVE-19773 : CBO exception while running queries with tables that are not present in materialized views (Jesus Camacho Rodriguez via Ashutosh Chauhan)

Repository: hive
Updated Branches:
  refs/heads/master c166202ac -> 0992d8292


HIVE-19773 : CBO exception while running queries with tables that are not present in materialized views (Jesus Camacho Rodriguez via Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan <ha...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/0992d829
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/0992d829
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/0992d829

Branch: refs/heads/master
Commit: 0992d82924497754d5407f6de17ef2906414ca54
Parents: c166202
Author: Jesus Camacho Rodriguez <jc...@apache.org>
Authored: Tue Jun 5 16:44:26 2018 -0700
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Tue Jun 5 16:44:26 2018 -0700

----------------------------------------------------------------------
 .../test/resources/testconfiguration.properties |   1 +
 .../apache/hadoop/hive/ql/metadata/Hive.java    |  38 ++++--
 .../hadoop/hive/ql/parse/CalcitePlanner.java    |  21 +---
 .../clientpositive/materialized_view_rebuild.q  |  31 +++++
 .../llap/materialized_view_rebuild.q.out        | 117 +++++++++++++++++++
 5 files changed, 179 insertions(+), 29 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/0992d829/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index de0ca44..463fda1 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -554,6 +554,7 @@ minillaplocal.query.files=\
   materialized_view_create_rewrite_5.q,\
   materialized_view_describe.q,\
   materialized_view_drop.q,\
+  materialized_view_rebuild.q,\
   materialized_view_rewrite_1.q,\
   materialized_view_rewrite_2.q,\
   materialized_view_rewrite_3.q,\

http://git-wip-us.apache.org/repos/asf/hive/blob/0992d829/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
index ef7be03..3524294 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
@@ -68,6 +68,7 @@ import org.apache.calcite.plan.RelOptRuleCall;
 import org.apache.calcite.plan.hep.HepPlanner;
 import org.apache.calcite.plan.hep.HepProgramBuilder;
 import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.RelVisitor;
 import org.apache.calcite.rel.core.Project;
 import org.apache.calcite.rel.core.TableScan;
 import org.apache.calcite.rel.type.RelDataType;
@@ -177,6 +178,7 @@ import org.apache.hadoop.hive.ql.exec.SerializationUtilities;
 import org.apache.hadoop.hive.ql.exec.Utilities;
 import org.apache.hadoop.hive.ql.io.AcidUtils;
 import org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+import org.apache.hadoop.hive.ql.lockmgr.LockException;
 import org.apache.hadoop.hive.ql.log.PerfLogger;
 import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories;
 import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable;
@@ -1340,7 +1342,7 @@ public class Hive {
    * @return the list of materialized views available for rewriting
    * @throws HiveException
    */
-  public List<RelOptMaterialization> getAllValidMaterializedViews(boolean forceMVContentsUpToDate, ValidTxnWriteIdList txnList)
+  public List<RelOptMaterialization> getAllValidMaterializedViews(boolean forceMVContentsUpToDate, String validTxnsList)
       throws HiveException {
     // Final result
     List<RelOptMaterialization> result = new ArrayList<>();
@@ -1352,7 +1354,7 @@ public class Hive {
           // Bail out: empty list
           continue;
         }
-        result.addAll(getValidMaterializedViews(dbName, materializedViewNames, forceMVContentsUpToDate, txnList));
+        result.addAll(getValidMaterializedViews(dbName, materializedViewNames, forceMVContentsUpToDate, validTxnsList));
       }
       return result;
     } catch (Exception e) {
@@ -1361,12 +1363,12 @@ public class Hive {
   }
 
   public List<RelOptMaterialization> getValidMaterializedView(String dbName, String materializedViewName,
-      boolean forceMVContentsUpToDate, ValidTxnWriteIdList txnList) throws HiveException {
-    return getValidMaterializedViews(dbName, ImmutableList.of(materializedViewName), forceMVContentsUpToDate, txnList);
+      boolean forceMVContentsUpToDate, String validTxnsList) throws HiveException {
+    return getValidMaterializedViews(dbName, ImmutableList.of(materializedViewName), forceMVContentsUpToDate, validTxnsList);
   }
 
   private List<RelOptMaterialization> getValidMaterializedViews(String dbName, List<String> materializedViewNames,
-      boolean forceMVContentsUpToDate, ValidTxnWriteIdList txnList) throws HiveException {
+      boolean forceMVContentsUpToDate, String validTxnsList) throws HiveException {
     final boolean tryIncrementalRewriting =
         HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_MATERIALIZED_VIEW_REWRITING_INCREMENTAL);
     final long defaultDiff =
@@ -1421,7 +1423,7 @@ public class Hive {
         }
 
         if (outdated && (!tryIncrementalRewriting || materializationInvInfo == null
-            || txnList == null || materializationInvInfo.isSourceTablesUpdateDeleteModified())) {
+            || validTxnsList == null || materializationInvInfo.isSourceTablesUpdateDeleteModified())) {
           // We will not try partial rewriting either because the config specification, this
           // is a rebuild over some non-transactional table, or there were update/delete
           // operations in the source tables (not supported yet)
@@ -1450,7 +1452,7 @@ public class Hive {
               // We will rewrite it to include the filters on transaction list
               // so we can produce partial rewritings
               materialization = augmentMaterializationWithTimeInformation(
-                  materialization, txnList, new ValidTxnWriteIdList(
+                  materialization, validTxnsList, new ValidTxnWriteIdList(
                       materializationInvInfo.getValidTxnList()));
             }
             result.add(materialization);
@@ -1473,7 +1475,7 @@ public class Hive {
               // We will rewrite it to include the filters on transaction list
               // so we can produce partial rewritings
               materialization = augmentMaterializationWithTimeInformation(
-                  materialization, txnList, new ValidTxnWriteIdList(
+                  materialization, validTxnsList, new ValidTxnWriteIdList(
                       materializationInvInfo.getValidTxnList()));
             }
             result.add(materialization);
@@ -1497,8 +1499,24 @@ public class Hive {
    * its invalidation.
    */
   private static RelOptMaterialization augmentMaterializationWithTimeInformation(
-      RelOptMaterialization materialization, ValidTxnWriteIdList currentTxnList,
-      ValidTxnWriteIdList materializationTxnList) {
+      RelOptMaterialization materialization, String validTxnsList,
+      ValidTxnWriteIdList materializationTxnList) throws LockException {
+    // Extract tables used by the query which will in turn be used to generate
+    // the corresponding txn write ids
+    List<String> tablesUsed = new ArrayList<>();
+    new RelVisitor() {
+      @Override
+      public void visit(RelNode node, int ordinal, RelNode parent) {
+        if (node instanceof TableScan) {
+          TableScan ts = (TableScan) node;
+          tablesUsed.add(((RelOptHiveTable) ts.getTable()).getHiveTableMD().getFullyQualifiedName());
+        }
+        super.visit(node, ordinal, parent);
+      }
+    }.go(materialization.queryRel);
+    ValidTxnWriteIdList currentTxnList =
+        SessionState.get().getTxnMgr().getValidWriteIds(tablesUsed, validTxnsList);
+    // Augment
     final RexBuilder rexBuilder = materialization.queryRel.getCluster().getRexBuilder();
     final HepProgramBuilder augmentMaterializationProgram = new HepProgramBuilder()
         .addRuleInstance(new HiveAugmentMaterializationRule(rexBuilder, currentTxnList, materializationTxnList));

http://git-wip-us.apache.org/repos/asf/hive/blob/0992d829/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index d939110..e091f38 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -2081,35 +2081,18 @@ public class CalcitePlanner extends SemanticAnalyzer {
       // Add views to planner
       List<RelOptMaterialization> materializations = new ArrayList<>();
       try {
-        // Extract tables used by the query which will in turn be used to generate
-        // the corresponding txn write ids
-        List<String> tablesUsed = new ArrayList<>();
-        new RelVisitor() {
-          @Override
-          public void visit(RelNode node, int ordinal, RelNode parent) {
-            if (node instanceof TableScan) {
-              TableScan ts = (TableScan) node;
-              tablesUsed.add(((RelOptHiveTable) ts.getTable()).getHiveTableMD().getFullyQualifiedName());
-            }
-            super.visit(node, ordinal, parent);
-          }
-        }.go(basePlan);
         final String validTxnsList = conf.get(ValidTxnList.VALID_TXNS_KEY);
-        ValidTxnWriteIdList txnWriteIds = null;
-        if (validTxnsList != null && !validTxnsList.isEmpty()) {
-          txnWriteIds = getTxnMgr().getValidWriteIds(tablesUsed, validTxnsList);
-        }
         if (mvRebuildMode != MaterializationRebuildMode.NONE) {
           // We only retrieve the materialization corresponding to the rebuild. In turn,
           // we pass 'true' for the forceMVContentsUpToDate parameter, as we cannot allow the
           // materialization contents to be stale for a rebuild if we want to use it.
           materializations = Hive.get().getValidMaterializedView(mvRebuildDbName, mvRebuildName,
-              true, txnWriteIds);
+              true, validTxnsList);
         } else {
           // This is not a rebuild, we retrieve all the materializations. In turn, we do not need
           // to force the materialization contents to be up-to-date, as this is not a rebuild, and
           // we apply the user parameters (HIVE_MATERIALIZED_VIEW_REWRITING_TIME_WINDOW) instead.
-          materializations = Hive.get().getAllValidMaterializedViews(false, txnWriteIds);
+          materializations = Hive.get().getAllValidMaterializedViews(false, validTxnsList);
         }
         // We need to use the current cluster for the scan operator on views,
         // otherwise the planner will throw an Exception (different planners)

http://git-wip-us.apache.org/repos/asf/hive/blob/0992d829/ql/src/test/queries/clientpositive/materialized_view_rebuild.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/materialized_view_rebuild.q b/ql/src/test/queries/clientpositive/materialized_view_rebuild.q
new file mode 100644
index 0000000..9f6eaaf
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/materialized_view_rebuild.q
@@ -0,0 +1,31 @@
+-- SORT_QUERY_RESULTS
+
+set hive.vectorized.execution.enabled=false;
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+set hive.strict.checks.cartesian.product=false;
+set hive.stats.fetch.column.stats=true;
+set hive.materializedview.rewriting=true;
+
+drop materialized view if exists mv_rebuild;
+drop table if exists basetable_rebuild;
+
+create table basetable_rebuild (a int, b varchar(256), c decimal(10,2))
+stored as orc TBLPROPERTIES ('transactional'='true');
+
+insert into basetable_rebuild values (1, 'alfred', 10.30),(2, 'bob', 3.14),(2, 'bonnie', 172342.2),(3, 'calvin', 978.76),(3, 'charlie', 9.8);
+
+create materialized view mv_rebuild as select a, b, sum(a) from basetable_rebuild group by a,b;
+
+select * from mv_rebuild;
+
+insert into basetable_rebuild values (4, 'amia', 7.5);
+
+select * from mv_rebuild;
+
+alter materialized view mv_rebuild rebuild;
+
+select * from mv_rebuild;
+
+drop materialized view mv_rebuild;
+drop table basetable_rebuild;

http://git-wip-us.apache.org/repos/asf/hive/blob/0992d829/ql/src/test/results/clientpositive/llap/materialized_view_rebuild.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_rebuild.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_rebuild.q.out
new file mode 100644
index 0000000..4d37d82
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/materialized_view_rebuild.q.out
@@ -0,0 +1,117 @@
+PREHOOK: query: drop materialized view if exists mv_rebuild
+PREHOOK: type: DROP_MATERIALIZED_VIEW
+POSTHOOK: query: drop materialized view if exists mv_rebuild
+POSTHOOK: type: DROP_MATERIALIZED_VIEW
+PREHOOK: query: drop table if exists basetable_rebuild
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists basetable_rebuild
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table basetable_rebuild (a int, b varchar(256), c decimal(10,2))
+stored as orc TBLPROPERTIES ('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@basetable_rebuild
+POSTHOOK: query: create table basetable_rebuild (a int, b varchar(256), c decimal(10,2))
+stored as orc TBLPROPERTIES ('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@basetable_rebuild
+PREHOOK: query: insert into basetable_rebuild values (1, 'alfred', 10.30),(2, 'bob', 3.14),(2, 'bonnie', 172342.2),(3, 'calvin', 978.76),(3, 'charlie', 9.8)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@basetable_rebuild
+POSTHOOK: query: insert into basetable_rebuild values (1, 'alfred', 10.30),(2, 'bob', 3.14),(2, 'bonnie', 172342.2),(3, 'calvin', 978.76),(3, 'charlie', 9.8)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@basetable_rebuild
+POSTHOOK: Lineage: basetable_rebuild.a SCRIPT []
+POSTHOOK: Lineage: basetable_rebuild.b SCRIPT []
+POSTHOOK: Lineage: basetable_rebuild.c SCRIPT []
+PREHOOK: query: create materialized view mv_rebuild as select a, b, sum(a) from basetable_rebuild group by a,b
+PREHOOK: type: CREATE_MATERIALIZED_VIEW
+PREHOOK: Input: default@basetable_rebuild
+PREHOOK: Output: database:default
+PREHOOK: Output: default@mv_rebuild
+POSTHOOK: query: create materialized view mv_rebuild as select a, b, sum(a) from basetable_rebuild group by a,b
+POSTHOOK: type: CREATE_MATERIALIZED_VIEW
+POSTHOOK: Input: default@basetable_rebuild
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@mv_rebuild
+PREHOOK: query: select * from mv_rebuild
+PREHOOK: type: QUERY
+PREHOOK: Input: default@mv_rebuild
+#### A masked pattern was here ####
+POSTHOOK: query: select * from mv_rebuild
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@mv_rebuild
+#### A masked pattern was here ####
+1	alfred	1
+2	bob	2
+2	bonnie	2
+3	calvin	3
+3	charlie	3
+PREHOOK: query: insert into basetable_rebuild values (4, 'amia', 7.5)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@basetable_rebuild
+POSTHOOK: query: insert into basetable_rebuild values (4, 'amia', 7.5)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@basetable_rebuild
+POSTHOOK: Lineage: basetable_rebuild.a SCRIPT []
+POSTHOOK: Lineage: basetable_rebuild.b SCRIPT []
+POSTHOOK: Lineage: basetable_rebuild.c SCRIPT []
+PREHOOK: query: select * from mv_rebuild
+PREHOOK: type: QUERY
+PREHOOK: Input: default@mv_rebuild
+#### A masked pattern was here ####
+POSTHOOK: query: select * from mv_rebuild
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@mv_rebuild
+#### A masked pattern was here ####
+1	alfred	1
+2	bob	2
+2	bonnie	2
+3	calvin	3
+3	charlie	3
+PREHOOK: query: alter materialized view mv_rebuild rebuild
+PREHOOK: type: QUERY
+PREHOOK: Input: default@basetable_rebuild
+PREHOOK: Output: default@mv_rebuild
+POSTHOOK: query: alter materialized view mv_rebuild rebuild
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@basetable_rebuild
+POSTHOOK: Output: default@mv_rebuild
+POSTHOOK: Lineage: mv_rebuild._c2 EXPRESSION [(basetable_rebuild)basetable_rebuild.FieldSchema(name:a, type:int, comment:null), ]
+POSTHOOK: Lineage: mv_rebuild.a SIMPLE [(basetable_rebuild)basetable_rebuild.FieldSchema(name:a, type:int, comment:null), ]
+POSTHOOK: Lineage: mv_rebuild.b SIMPLE [(basetable_rebuild)basetable_rebuild.FieldSchema(name:b, type:varchar(256), comment:null), ]
+PREHOOK: query: select * from mv_rebuild
+PREHOOK: type: QUERY
+PREHOOK: Input: default@mv_rebuild
+#### A masked pattern was here ####
+POSTHOOK: query: select * from mv_rebuild
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@mv_rebuild
+#### A masked pattern was here ####
+1	alfred	1
+2	bob	2
+2	bonnie	2
+3	calvin	3
+3	charlie	3
+4	amia	4
+PREHOOK: query: drop materialized view mv_rebuild
+PREHOOK: type: DROP_MATERIALIZED_VIEW
+PREHOOK: Input: default@mv_rebuild
+PREHOOK: Output: default@mv_rebuild
+POSTHOOK: query: drop materialized view mv_rebuild
+POSTHOOK: type: DROP_MATERIALIZED_VIEW
+POSTHOOK: Input: default@mv_rebuild
+POSTHOOK: Output: default@mv_rebuild
+PREHOOK: query: drop table basetable_rebuild
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@basetable_rebuild
+PREHOOK: Output: default@basetable_rebuild
+POSTHOOK: query: drop table basetable_rebuild
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@basetable_rebuild
+POSTHOOK: Output: default@basetable_rebuild