You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2018/06/05 23:45:45 UTC
hive git commit: HIVE-19773 : CBO exception while running queries
with tables that are not present in materialized views (Jesus Camacho
Rodriguez via Ashutosh Chauhan)
Repository: hive
Updated Branches:
refs/heads/master c166202ac -> 0992d8292
HIVE-19773 : CBO exception while running queries with tables that are not present in materialized views (Jesus Camacho Rodriguez via Ashutosh Chauhan)
Signed-off-by: Ashutosh Chauhan <ha...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/0992d829
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/0992d829
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/0992d829
Branch: refs/heads/master
Commit: 0992d82924497754d5407f6de17ef2906414ca54
Parents: c166202
Author: Jesus Camacho Rodriguez <jc...@apache.org>
Authored: Tue Jun 5 16:44:26 2018 -0700
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Tue Jun 5 16:44:26 2018 -0700
----------------------------------------------------------------------
.../test/resources/testconfiguration.properties | 1 +
.../apache/hadoop/hive/ql/metadata/Hive.java | 38 ++++--
.../hadoop/hive/ql/parse/CalcitePlanner.java | 21 +---
.../clientpositive/materialized_view_rebuild.q | 31 +++++
.../llap/materialized_view_rebuild.q.out | 117 +++++++++++++++++++
5 files changed, 179 insertions(+), 29 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/0992d829/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index de0ca44..463fda1 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -554,6 +554,7 @@ minillaplocal.query.files=\
materialized_view_create_rewrite_5.q,\
materialized_view_describe.q,\
materialized_view_drop.q,\
+ materialized_view_rebuild.q,\
materialized_view_rewrite_1.q,\
materialized_view_rewrite_2.q,\
materialized_view_rewrite_3.q,\
http://git-wip-us.apache.org/repos/asf/hive/blob/0992d829/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
index ef7be03..3524294 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
@@ -68,6 +68,7 @@ import org.apache.calcite.plan.RelOptRuleCall;
import org.apache.calcite.plan.hep.HepPlanner;
import org.apache.calcite.plan.hep.HepProgramBuilder;
import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.RelVisitor;
import org.apache.calcite.rel.core.Project;
import org.apache.calcite.rel.core.TableScan;
import org.apache.calcite.rel.type.RelDataType;
@@ -177,6 +178,7 @@ import org.apache.hadoop.hive.ql.exec.SerializationUtilities;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.io.AcidUtils;
import org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+import org.apache.hadoop.hive.ql.lockmgr.LockException;
import org.apache.hadoop.hive.ql.log.PerfLogger;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories;
import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable;
@@ -1340,7 +1342,7 @@ public class Hive {
* @return the list of materialized views available for rewriting
* @throws HiveException
*/
- public List<RelOptMaterialization> getAllValidMaterializedViews(boolean forceMVContentsUpToDate, ValidTxnWriteIdList txnList)
+ public List<RelOptMaterialization> getAllValidMaterializedViews(boolean forceMVContentsUpToDate, String validTxnsList)
throws HiveException {
// Final result
List<RelOptMaterialization> result = new ArrayList<>();
@@ -1352,7 +1354,7 @@ public class Hive {
// Bail out: empty list
continue;
}
- result.addAll(getValidMaterializedViews(dbName, materializedViewNames, forceMVContentsUpToDate, txnList));
+ result.addAll(getValidMaterializedViews(dbName, materializedViewNames, forceMVContentsUpToDate, validTxnsList));
}
return result;
} catch (Exception e) {
@@ -1361,12 +1363,12 @@ public class Hive {
}
public List<RelOptMaterialization> getValidMaterializedView(String dbName, String materializedViewName,
- boolean forceMVContentsUpToDate, ValidTxnWriteIdList txnList) throws HiveException {
- return getValidMaterializedViews(dbName, ImmutableList.of(materializedViewName), forceMVContentsUpToDate, txnList);
+ boolean forceMVContentsUpToDate, String validTxnsList) throws HiveException {
+ return getValidMaterializedViews(dbName, ImmutableList.of(materializedViewName), forceMVContentsUpToDate, validTxnsList);
}
private List<RelOptMaterialization> getValidMaterializedViews(String dbName, List<String> materializedViewNames,
- boolean forceMVContentsUpToDate, ValidTxnWriteIdList txnList) throws HiveException {
+ boolean forceMVContentsUpToDate, String validTxnsList) throws HiveException {
final boolean tryIncrementalRewriting =
HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_MATERIALIZED_VIEW_REWRITING_INCREMENTAL);
final long defaultDiff =
@@ -1421,7 +1423,7 @@ public class Hive {
}
if (outdated && (!tryIncrementalRewriting || materializationInvInfo == null
- || txnList == null || materializationInvInfo.isSourceTablesUpdateDeleteModified())) {
+ || validTxnsList == null || materializationInvInfo.isSourceTablesUpdateDeleteModified())) {
// We will not try partial rewriting either because the config specification, this
// is a rebuild over some non-transactional table, or there were update/delete
// operations in the source tables (not supported yet)
@@ -1450,7 +1452,7 @@ public class Hive {
// We will rewrite it to include the filters on transaction list
// so we can produce partial rewritings
materialization = augmentMaterializationWithTimeInformation(
- materialization, txnList, new ValidTxnWriteIdList(
+ materialization, validTxnsList, new ValidTxnWriteIdList(
materializationInvInfo.getValidTxnList()));
}
result.add(materialization);
@@ -1473,7 +1475,7 @@ public class Hive {
// We will rewrite it to include the filters on transaction list
// so we can produce partial rewritings
materialization = augmentMaterializationWithTimeInformation(
- materialization, txnList, new ValidTxnWriteIdList(
+ materialization, validTxnsList, new ValidTxnWriteIdList(
materializationInvInfo.getValidTxnList()));
}
result.add(materialization);
@@ -1497,8 +1499,24 @@ public class Hive {
* its invalidation.
*/
private static RelOptMaterialization augmentMaterializationWithTimeInformation(
- RelOptMaterialization materialization, ValidTxnWriteIdList currentTxnList,
- ValidTxnWriteIdList materializationTxnList) {
+ RelOptMaterialization materialization, String validTxnsList,
+ ValidTxnWriteIdList materializationTxnList) throws LockException {
+ // Extract tables used by the query which will in turn be used to generate
+ // the corresponding txn write ids
+ List<String> tablesUsed = new ArrayList<>();
+ new RelVisitor() {
+ @Override
+ public void visit(RelNode node, int ordinal, RelNode parent) {
+ if (node instanceof TableScan) {
+ TableScan ts = (TableScan) node;
+ tablesUsed.add(((RelOptHiveTable) ts.getTable()).getHiveTableMD().getFullyQualifiedName());
+ }
+ super.visit(node, ordinal, parent);
+ }
+ }.go(materialization.queryRel);
+ ValidTxnWriteIdList currentTxnList =
+ SessionState.get().getTxnMgr().getValidWriteIds(tablesUsed, validTxnsList);
+ // Augment
final RexBuilder rexBuilder = materialization.queryRel.getCluster().getRexBuilder();
final HepProgramBuilder augmentMaterializationProgram = new HepProgramBuilder()
.addRuleInstance(new HiveAugmentMaterializationRule(rexBuilder, currentTxnList, materializationTxnList));
http://git-wip-us.apache.org/repos/asf/hive/blob/0992d829/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index d939110..e091f38 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -2081,35 +2081,18 @@ public class CalcitePlanner extends SemanticAnalyzer {
// Add views to planner
List<RelOptMaterialization> materializations = new ArrayList<>();
try {
- // Extract tables used by the query which will in turn be used to generate
- // the corresponding txn write ids
- List<String> tablesUsed = new ArrayList<>();
- new RelVisitor() {
- @Override
- public void visit(RelNode node, int ordinal, RelNode parent) {
- if (node instanceof TableScan) {
- TableScan ts = (TableScan) node;
- tablesUsed.add(((RelOptHiveTable) ts.getTable()).getHiveTableMD().getFullyQualifiedName());
- }
- super.visit(node, ordinal, parent);
- }
- }.go(basePlan);
final String validTxnsList = conf.get(ValidTxnList.VALID_TXNS_KEY);
- ValidTxnWriteIdList txnWriteIds = null;
- if (validTxnsList != null && !validTxnsList.isEmpty()) {
- txnWriteIds = getTxnMgr().getValidWriteIds(tablesUsed, validTxnsList);
- }
if (mvRebuildMode != MaterializationRebuildMode.NONE) {
// We only retrieve the materialization corresponding to the rebuild. In turn,
// we pass 'true' for the forceMVContentsUpToDate parameter, as we cannot allow the
// materialization contents to be stale for a rebuild if we want to use it.
materializations = Hive.get().getValidMaterializedView(mvRebuildDbName, mvRebuildName,
- true, txnWriteIds);
+ true, validTxnsList);
} else {
// This is not a rebuild, we retrieve all the materializations. In turn, we do not need
// to force the materialization contents to be up-to-date, as this is not a rebuild, and
// we apply the user parameters (HIVE_MATERIALIZED_VIEW_REWRITING_TIME_WINDOW) instead.
- materializations = Hive.get().getAllValidMaterializedViews(false, txnWriteIds);
+ materializations = Hive.get().getAllValidMaterializedViews(false, validTxnsList);
}
// We need to use the current cluster for the scan operator on views,
// otherwise the planner will throw an Exception (different planners)
http://git-wip-us.apache.org/repos/asf/hive/blob/0992d829/ql/src/test/queries/clientpositive/materialized_view_rebuild.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/materialized_view_rebuild.q b/ql/src/test/queries/clientpositive/materialized_view_rebuild.q
new file mode 100644
index 0000000..9f6eaaf
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/materialized_view_rebuild.q
@@ -0,0 +1,31 @@
+-- SORT_QUERY_RESULTS
+
+set hive.vectorized.execution.enabled=false;
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+set hive.strict.checks.cartesian.product=false;
+set hive.stats.fetch.column.stats=true;
+set hive.materializedview.rewriting=true;
+
+drop materialized view if exists mv_rebuild;
+drop table if exists basetable_rebuild;
+
+create table basetable_rebuild (a int, b varchar(256), c decimal(10,2))
+stored as orc TBLPROPERTIES ('transactional'='true');
+
+insert into basetable_rebuild values (1, 'alfred', 10.30),(2, 'bob', 3.14),(2, 'bonnie', 172342.2),(3, 'calvin', 978.76),(3, 'charlie', 9.8);
+
+create materialized view mv_rebuild as select a, b, sum(a) from basetable_rebuild group by a,b;
+
+select * from mv_rebuild;
+
+insert into basetable_rebuild values (4, 'amia', 7.5);
+
+select * from mv_rebuild;
+
+alter materialized view mv_rebuild rebuild;
+
+select * from mv_rebuild;
+
+drop materialized view mv_rebuild;
+drop table basetable_rebuild;
http://git-wip-us.apache.org/repos/asf/hive/blob/0992d829/ql/src/test/results/clientpositive/llap/materialized_view_rebuild.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_rebuild.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_rebuild.q.out
new file mode 100644
index 0000000..4d37d82
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/materialized_view_rebuild.q.out
@@ -0,0 +1,117 @@
+PREHOOK: query: drop materialized view if exists mv_rebuild
+PREHOOK: type: DROP_MATERIALIZED_VIEW
+POSTHOOK: query: drop materialized view if exists mv_rebuild
+POSTHOOK: type: DROP_MATERIALIZED_VIEW
+PREHOOK: query: drop table if exists basetable_rebuild
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists basetable_rebuild
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table basetable_rebuild (a int, b varchar(256), c decimal(10,2))
+stored as orc TBLPROPERTIES ('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@basetable_rebuild
+POSTHOOK: query: create table basetable_rebuild (a int, b varchar(256), c decimal(10,2))
+stored as orc TBLPROPERTIES ('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@basetable_rebuild
+PREHOOK: query: insert into basetable_rebuild values (1, 'alfred', 10.30),(2, 'bob', 3.14),(2, 'bonnie', 172342.2),(3, 'calvin', 978.76),(3, 'charlie', 9.8)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@basetable_rebuild
+POSTHOOK: query: insert into basetable_rebuild values (1, 'alfred', 10.30),(2, 'bob', 3.14),(2, 'bonnie', 172342.2),(3, 'calvin', 978.76),(3, 'charlie', 9.8)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@basetable_rebuild
+POSTHOOK: Lineage: basetable_rebuild.a SCRIPT []
+POSTHOOK: Lineage: basetable_rebuild.b SCRIPT []
+POSTHOOK: Lineage: basetable_rebuild.c SCRIPT []
+PREHOOK: query: create materialized view mv_rebuild as select a, b, sum(a) from basetable_rebuild group by a,b
+PREHOOK: type: CREATE_MATERIALIZED_VIEW
+PREHOOK: Input: default@basetable_rebuild
+PREHOOK: Output: database:default
+PREHOOK: Output: default@mv_rebuild
+POSTHOOK: query: create materialized view mv_rebuild as select a, b, sum(a) from basetable_rebuild group by a,b
+POSTHOOK: type: CREATE_MATERIALIZED_VIEW
+POSTHOOK: Input: default@basetable_rebuild
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@mv_rebuild
+PREHOOK: query: select * from mv_rebuild
+PREHOOK: type: QUERY
+PREHOOK: Input: default@mv_rebuild
+#### A masked pattern was here ####
+POSTHOOK: query: select * from mv_rebuild
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@mv_rebuild
+#### A masked pattern was here ####
+1 alfred 1
+2 bob 2
+2 bonnie 2
+3 calvin 3
+3 charlie 3
+PREHOOK: query: insert into basetable_rebuild values (4, 'amia', 7.5)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@basetable_rebuild
+POSTHOOK: query: insert into basetable_rebuild values (4, 'amia', 7.5)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@basetable_rebuild
+POSTHOOK: Lineage: basetable_rebuild.a SCRIPT []
+POSTHOOK: Lineage: basetable_rebuild.b SCRIPT []
+POSTHOOK: Lineage: basetable_rebuild.c SCRIPT []
+PREHOOK: query: select * from mv_rebuild
+PREHOOK: type: QUERY
+PREHOOK: Input: default@mv_rebuild
+#### A masked pattern was here ####
+POSTHOOK: query: select * from mv_rebuild
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@mv_rebuild
+#### A masked pattern was here ####
+1 alfred 1
+2 bob 2
+2 bonnie 2
+3 calvin 3
+3 charlie 3
+PREHOOK: query: alter materialized view mv_rebuild rebuild
+PREHOOK: type: QUERY
+PREHOOK: Input: default@basetable_rebuild
+PREHOOK: Output: default@mv_rebuild
+POSTHOOK: query: alter materialized view mv_rebuild rebuild
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@basetable_rebuild
+POSTHOOK: Output: default@mv_rebuild
+POSTHOOK: Lineage: mv_rebuild._c2 EXPRESSION [(basetable_rebuild)basetable_rebuild.FieldSchema(name:a, type:int, comment:null), ]
+POSTHOOK: Lineage: mv_rebuild.a SIMPLE [(basetable_rebuild)basetable_rebuild.FieldSchema(name:a, type:int, comment:null), ]
+POSTHOOK: Lineage: mv_rebuild.b SIMPLE [(basetable_rebuild)basetable_rebuild.FieldSchema(name:b, type:varchar(256), comment:null), ]
+PREHOOK: query: select * from mv_rebuild
+PREHOOK: type: QUERY
+PREHOOK: Input: default@mv_rebuild
+#### A masked pattern was here ####
+POSTHOOK: query: select * from mv_rebuild
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@mv_rebuild
+#### A masked pattern was here ####
+1 alfred 1
+2 bob 2
+2 bonnie 2
+3 calvin 3
+3 charlie 3
+4 amia 4
+PREHOOK: query: drop materialized view mv_rebuild
+PREHOOK: type: DROP_MATERIALIZED_VIEW
+PREHOOK: Input: default@mv_rebuild
+PREHOOK: Output: default@mv_rebuild
+POSTHOOK: query: drop materialized view mv_rebuild
+POSTHOOK: type: DROP_MATERIALIZED_VIEW
+POSTHOOK: Input: default@mv_rebuild
+POSTHOOK: Output: default@mv_rebuild
+PREHOOK: query: drop table basetable_rebuild
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@basetable_rebuild
+PREHOOK: Output: default@basetable_rebuild
+POSTHOOK: query: drop table basetable_rebuild
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@basetable_rebuild
+POSTHOOK: Output: default@basetable_rebuild