You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2018/06/27 06:22:46 UTC
[12/12] hive git commit: HIVE-19027: Make materializations
invalidation cache work with multiple active remote metastores (Jesus Camacho
Rodriguez, reviewed by Ashutosh Chauhan)
HIVE-19027: Make materializations invalidation cache work with multiple active remote metastores (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/774a8ef7
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/774a8ef7
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/774a8ef7
Branch: refs/heads/branch-3
Commit: 774a8ef7a6e92c8a43cad2fa66bd944e666f75f0
Parents: d558b3c
Author: Jesus Camacho Rodriguez <jc...@apache.org>
Authored: Tue Jun 26 11:49:46 2018 -0700
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Tue Jun 26 23:21:21 2018 -0700
----------------------------------------------------------------------
.../org/apache/hadoop/hive/conf/HiveConf.java | 26 +-
.../hive/ql/exec/MaterializedViewTask.java | 2 -
.../apache/hadoop/hive/ql/metadata/Hive.java | 124 +-
.../hadoop/hive/ql/parse/CalcitePlanner.java | 21 +-
...terialized_view_create_rewrite_time_window.q | 4 +-
.../clientpositive/druid/druidmini_mv.q.out | 81 +-
.../materialized_view_create_rewrite_5.q.out | 4 +-
...alized_view_create_rewrite_time_window.q.out | 16 +-
.../llap/materialized_view_rewrite_empty.q.out | 4 +-
.../gen/thrift/gen-cpp/ThriftHiveMetastore.cpp | 2351 +++++++-------
.../gen/thrift/gen-cpp/ThriftHiveMetastore.h | 52 +-
.../ThriftHiveMetastore_server.skeleton.cpp | 2 +-
.../gen/thrift/gen-cpp/hive_metastore_types.cpp | 1442 +++++----
.../gen/thrift/gen-cpp/hive_metastore_types.h | 45 +-
.../hive/metastore/api/CreationMetadata.java | 111 +-
.../metastore/api/FindSchemasByColsResp.java | 36 +-
.../hive/metastore/api/Materialization.java | 409 +--
.../hive/metastore/api/SchemaVersion.java | 36 +-
.../hive/metastore/api/ThriftHiveMetastore.java | 2858 +++++++++---------
.../hive/metastore/api/WMFullResourcePlan.java | 144 +-
.../api/WMGetAllResourcePlanResponse.java | 36 +-
.../WMGetTriggersForResourePlanResponse.java | 36 +-
.../api/WMValidateResourcePlanResponse.java | 64 +-
.../gen-php/metastore/ThriftHiveMetastore.php | 1510 +++++----
.../src/gen/thrift/gen-php/metastore/Types.php | 324 +-
.../hive_metastore/ThriftHiveMetastore-remote | 4 +-
.../hive_metastore/ThriftHiveMetastore.py | 1015 +++----
.../gen/thrift/gen-py/hive_metastore/ttypes.py | 208 +-
.../gen/thrift/gen-rb/hive_metastore_types.rb | 16 +-
.../gen/thrift/gen-rb/thrift_hive_metastore.rb | 20 +-
.../hadoop/hive/metastore/HiveMetaStore.java | 10 +-
.../hive/metastore/HiveMetaStoreClient.java | 7 +-
.../hadoop/hive/metastore/IMetaStoreClient.java | 2 +-
.../MaterializationsCacheCleanerTask.java | 63 -
.../MaterializationsInvalidationCache.java | 543 ----
.../MaterializationsRebuildLockCleanerTask.java | 30 +-
.../hadoop/hive/metastore/ObjectStore.java | 20 +-
.../hive/metastore/conf/MetastoreConf.java | 2 -
.../hive/metastore/model/MCreationMetadata.java | 16 +-
.../hadoop/hive/metastore/txn/TxnDbUtil.java | 12 +-
.../hadoop/hive/metastore/txn/TxnHandler.java | 300 +-
.../hadoop/hive/metastore/txn/TxnStore.java | 38 +-
.../main/sql/derby/hive-schema-3.1.0.derby.sql | 14 +-
.../sql/derby/upgrade-3.0.0-to-3.1.0.derby.sql | 19 +
.../main/sql/mssql/hive-schema-3.1.0.mssql.sql | 17 +-
.../sql/mssql/upgrade-3.0.0-to-3.1.0.mssql.sql | 18 +
.../main/sql/mysql/hive-schema-3.1.0.mysql.sql | 12 +-
.../sql/mysql/upgrade-3.0.0-to-3.1.0.mysql.sql | 19 +
.../sql/oracle/hive-schema-3.1.0.oracle.sql | 14 +-
.../oracle/upgrade-3.0.0-to-3.1.0.oracle.sql | 19 +
.../sql/postgres/hive-schema-3.1.0.postgres.sql | 14 +-
.../upgrade-3.0.0-to-3.1.0.postgres.sql | 19 +
.../src/main/thrift/hive_metastore.thrift | 8 +-
.../HiveMetaStoreClientPreCatalog.java | 7 +-
...stMetaStoreMaterializationsCacheCleaner.java | 328 --
.../TestTablesCreateDropAlterTruncate.java | 1 +
56 files changed, 5643 insertions(+), 6910 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/774a8ef7/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 6281dc3..2934052 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -1611,13 +1611,13 @@ public class HiveConf extends Configuration {
"choosing the plan with lower cost among possible plans containing a materialized view\n" +
" costbased: Fully cost-based strategy, always use plan with lower cost, independently on whether " +
"it uses a materialized view or not"),
- HIVE_MATERIALIZED_VIEW_REWRITING_TIME_WINDOW("hive.materializedview.rewriting.time.window", "0s", new TimeValidator(TimeUnit.SECONDS),
+ HIVE_MATERIALIZED_VIEW_REWRITING_TIME_WINDOW("hive.materializedview.rewriting.time.window", "0min", new TimeValidator(TimeUnit.MINUTES),
"Time window, specified in seconds, after which outdated materialized views become invalid for automatic query rewriting.\n" +
- "For instance, if a materialized view is created and afterwards one of its source tables is changed at " +
- "moment in time t0, the materialized view will not be considered for rewriting anymore after t0 plus " +
- "the value assigned to this property. Default value 0 means that the materialized view cannot be " +
- "outdated to be used automatically in query rewriting."),
- HIVE_MATERIALIZED_VIEW_REWRITING_INCREMENTAL("hive.materializedview.rewriting.incremental", true,
+ "For instance, if more time than the value assigned to the property has passed since the materialized view " +
+ "was created or rebuilt, and one of its source tables has changed since, the materialized view will not be " +
+ "considered for rewriting. Default value 0 means that the materialized view cannot be " +
+ "outdated to be used automatically in query rewriting. Value -1 means to skip this check."),
+ HIVE_MATERIALIZED_VIEW_REWRITING_INCREMENTAL("hive.materializedview.rewriting.incremental", false,
"Whether to try to execute incremental rewritings based on outdated materializations and\n" +
"current content of tables. Default value of true effectively amounts to enabling incremental\n" +
"rebuild for the materializations too."),
@@ -1625,24 +1625,12 @@ public class HiveConf extends Configuration {
"Whether to try to execute incremental rebuild for the materialized views. Incremental rebuild\n" +
"tries to modify the original materialization contents to reflect the latest changes to the\n" +
"materialized view source tables, instead of rebuilding the contents fully. Incremental rebuild\n" +
- "is based on the materialized view algebraic incremental rewriting. Hence, this requires\n" +
- "hive.materializedview.rewriting.incremental to be true."),
+ "is based on the materialized view algebraic incremental rewriting."),
HIVE_MATERIALIZED_VIEW_FILE_FORMAT("hive.materializedview.fileformat", "ORC",
new StringSet("none", "TextFile", "SequenceFile", "RCfile", "ORC"),
"Default file format for CREATE MATERIALIZED VIEW statement"),
HIVE_MATERIALIZED_VIEW_SERDE("hive.materializedview.serde",
"org.apache.hadoop.hive.ql.io.orc.OrcSerde", "Default SerDe used for materialized views"),
- HIVE_MATERIALIZATIONS_INVALIDATION_CACHE_IMPL("hive.metastore.materializations.invalidation.impl", "DEFAULT",
- new StringSet("DEFAULT", "DISABLE"),
- "The implementation that we should use for the materializations invalidation cache. \n" +
- " DEFAULT: Default implementation for invalidation cache\n" +
- " DISABLE: Disable invalidation cache (debugging purposes)"),
- HIVE_MATERIALIZATIONS_INVALIDATION_CACHE_CLEAN_FREQUENCY("hive.metastore.materializations.invalidation.clean.frequency",
- "3600s", new TimeValidator(TimeUnit.SECONDS), "Frequency at which timer task runs to remove unnecessary transactions information from" +
- "materializations invalidation cache."),
- HIVE_MATERIALIZATIONS_INVALIDATION_CACHE_EXPIRY_DURATION("hive.metastore.materializations.invalidation.max.duration",
- "86400s", new TimeValidator(TimeUnit.SECONDS), "Maximum duration for query producing a materialization. After this time, transactions" +
- "information that is not relevant for materializations can be removed from invalidation cache."),
// hive.mapjoin.bucket.cache.size has been replaced by hive.smbjoin.cache.row,
// need to remove by hive .13. Also, do not change default (see SMB operator)
http://git-wip-us.apache.org/repos/asf/hive/blob/774a8ef7/ql/src/java/org/apache/hadoop/hive/ql/exec/MaterializedViewTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/MaterializedViewTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/MaterializedViewTask.java
index 834df84..6208abd 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/MaterializedViewTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/MaterializedViewTask.java
@@ -19,9 +19,7 @@
package org.apache.hadoop.hive.ql.exec;
import com.google.common.collect.ImmutableSet;
-import org.apache.hadoop.hive.common.ValidTxnList;
import org.apache.hadoop.hive.common.ValidTxnWriteIdList;
-import org.apache.hadoop.hive.common.ValidWriteIdList;
import org.apache.hadoop.hive.metastore.api.CreationMetadata;
import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils;
import org.apache.hadoop.hive.ql.DriverContext;
http://git-wip-us.apache.org/repos/asf/hive/blob/774a8ef7/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
index fe01646..ea200c8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
@@ -84,7 +84,9 @@ import org.apache.hadoop.hive.common.FileUtils;
import org.apache.hadoop.hive.common.HiveStatsUtils;
import org.apache.hadoop.hive.common.ObjectPair;
import org.apache.hadoop.hive.common.StatsSetupConst;
+import org.apache.hadoop.hive.common.ValidTxnList;
import org.apache.hadoop.hive.common.ValidTxnWriteIdList;
+import org.apache.hadoop.hive.common.ValidWriteIdList;
import org.apache.hadoop.hive.common.classification.InterfaceAudience.LimitedPrivate;
import org.apache.hadoop.hive.common.classification.InterfaceStability.Unstable;
import org.apache.hadoop.hive.common.log.InPlaceUpdate;
@@ -192,6 +194,7 @@ import org.apache.hadoop.hive.shims.ShimLoader;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.StringUtils;
+import org.apache.hive.common.util.TxnIdUtils;
import org.apache.thrift.TException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -1331,7 +1334,7 @@ public class Hive {
* @return the list of materialized views available for rewriting
* @throws HiveException
*/
- public List<RelOptMaterialization> getAllValidMaterializedViews(boolean forceMVContentsUpToDate, String validTxnsList)
+ public List<RelOptMaterialization> getAllValidMaterializedViews(List<String> tablesUsed, boolean forceMVContentsUpToDate)
throws HiveException {
// Final result
List<RelOptMaterialization> result = new ArrayList<>();
@@ -1343,7 +1346,7 @@ public class Hive {
// Bail out: empty list
continue;
}
- result.addAll(getValidMaterializedViews(dbName, materializedViewNames, forceMVContentsUpToDate, validTxnsList));
+ result.addAll(getValidMaterializedViews(dbName, materializedViewNames, tablesUsed, forceMVContentsUpToDate));
}
return result;
} catch (Exception e) {
@@ -1352,14 +1355,19 @@ public class Hive {
}
public List<RelOptMaterialization> getValidMaterializedView(String dbName, String materializedViewName,
- boolean forceMVContentsUpToDate, String validTxnsList) throws HiveException {
- return getValidMaterializedViews(dbName, ImmutableList.of(materializedViewName), forceMVContentsUpToDate, validTxnsList);
+ List<String> tablesUsed, boolean forceMVContentsUpToDate) throws HiveException {
+ return getValidMaterializedViews(dbName, ImmutableList.of(materializedViewName), tablesUsed, forceMVContentsUpToDate);
}
private List<RelOptMaterialization> getValidMaterializedViews(String dbName, List<String> materializedViewNames,
- boolean forceMVContentsUpToDate, String validTxnsList) throws HiveException {
+ List<String> tablesUsed, boolean forceMVContentsUpToDate) throws HiveException {
+ final String validTxnsList = conf.get(ValidTxnList.VALID_TXNS_KEY);
+ final ValidTxnWriteIdList currentTxnWriteIds =
+ SessionState.get().getTxnMgr().getValidWriteIds(tablesUsed, validTxnsList);
final boolean tryIncrementalRewriting =
HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_MATERIALIZED_VIEW_REWRITING_INCREMENTAL);
+ final boolean tryIncrementalRebuild =
+ HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_MATERIALIZED_VIEW_REBUILD_INCREMENTAL);
final long defaultDiff =
HiveConf.getTimeVar(conf, HiveConf.ConfVars.HIVE_MATERIALIZED_VIEW_REWRITING_TIME_WINDOW,
TimeUnit.MILLISECONDS);
@@ -1368,8 +1376,6 @@ public class Hive {
// Final result
List<RelOptMaterialization> result = new ArrayList<>();
List<Table> materializedViewTables = getTableObjects(dbName, materializedViewNames);
- Map<String, Materialization> databaseInvalidationInfo =
- getMSC().getMaterializationsInvalidationInfo(dbName, materializedViewNames);
for (Table materializedViewTable : materializedViewTables) {
// Check if materialization defined its own invalidation time window
String timeWindowString = materializedViewTable.getProperty(MATERIALIZED_VIEW_REWRITING_TIME_WINDOW);
@@ -1377,7 +1383,7 @@ public class Hive {
HiveConf.toTime(timeWindowString,
HiveConf.getDefaultTimeUnit(HiveConf.ConfVars.HIVE_MATERIALIZED_VIEW_REWRITING_TIME_WINDOW),
TimeUnit.MILLISECONDS);
- Materialization materializationInvInfo = null;
+ CreationMetadata creationMetadata = materializedViewTable.getCreationMetadata();
boolean outdated = false;
if (diff < 0L) {
// We only consider the materialized view to be outdated if forceOutdated = true, i.e.,
@@ -1385,40 +1391,80 @@ public class Hive {
outdated = forceMVContentsUpToDate;
} else {
// Check whether the materialized view is invalidated
- materializationInvInfo =
- databaseInvalidationInfo.get(materializedViewTable.getTableName());
- if (materializationInvInfo == null) {
- LOG.debug("Materialized view " + materializedViewTable.getFullyQualifiedName() +
- " ignored for rewriting as there was no information loaded in the invalidation cache");
- continue;
- }
- long invalidationTime = materializationInvInfo.getInvalidationTime();
- if (invalidationTime == Long.MIN_VALUE) {
- LOG.debug("Materialized view " + materializedViewTable.getFullyQualifiedName() +
- " ignored for rewriting as it contains non-transactional tables");
- continue;
- }
- // If the limit is not met, we do not add the materialized view.
- // If we are doing a rebuild, we do not consider outdated materialized views either.
- if (diff == 0L || forceMVContentsUpToDate) {
- if (invalidationTime != 0L) {
- outdated = true;
+ if (forceMVContentsUpToDate || diff == 0L || creationMetadata.getMaterializationTime() < currentTime - diff) {
+ if (currentTxnWriteIds == null) {
+ LOG.debug("Materialized view " + materializedViewTable.getFullyQualifiedName() +
+ " ignored for rewriting as we could not obtain current txn ids");
+ continue;
}
- } else {
- if (invalidationTime != 0L && invalidationTime > currentTime - diff) {
- outdated = true;
+ if (creationMetadata.getValidTxnList() == null ||
+ creationMetadata.getValidTxnList().isEmpty()) {
+ LOG.debug("Materialized view " + materializedViewTable.getFullyQualifiedName() +
+ " ignored for rewriting as we could not obtain materialization txn ids");
+ continue;
+ }
+ boolean ignore = false;
+ ValidTxnWriteIdList mvTxnWriteIds = new ValidTxnWriteIdList(
+ creationMetadata.getValidTxnList());
+ for (String qName : tablesUsed) {
+ // Note. If the materialized view does not contain a table that is contained in the query,
+ // we do not need to check whether that specific table is outdated or not. If a rewriting
+ // is produced in those cases, it is because that additional table is joined with the
+ // existing tables with an append-columns only join, i.e., PK-FK + not null.
+ if (!creationMetadata.getTablesUsed().contains(qName)) {
+ continue;
+ }
+ ValidWriteIdList tableCurrentWriteIds = currentTxnWriteIds.getTableValidWriteIdList(qName);
+ if (tableCurrentWriteIds == null) {
+ // Uses non-transactional table, cannot be considered
+ LOG.debug("Materialized view " + materializedViewTable.getFullyQualifiedName() +
+ " ignored for rewriting as it is outdated and cannot be considered for " +
+ " rewriting because it uses non-transactional table " + qName);
+ ignore = true;
+ break;
+ }
+ ValidWriteIdList tableWriteIds = mvTxnWriteIds.getTableValidWriteIdList(qName);
+ if (tableWriteIds == null) {
+ // This should not happen, but we ignore for safety
+ LOG.warn("Materialized view " + materializedViewTable.getFullyQualifiedName() +
+ " ignored for rewriting as details about txn ids for table " + qName +
+ " could not be found in " + mvTxnWriteIds);
+ ignore = true;
+ break;
+ }
+ if (!outdated && !TxnIdUtils.checkEquivalentWriteIds(tableCurrentWriteIds, tableWriteIds)) {
+ LOG.debug("Materialized view " + materializedViewTable.getFullyQualifiedName() +
+ " contents are outdated");
+ outdated = true;
+ }
+ }
+ if (ignore) {
+ continue;
}
}
}
- if (outdated && (!tryIncrementalRewriting || materializationInvInfo == null
- || validTxnsList == null || materializationInvInfo.isSourceTablesUpdateDeleteModified())) {
- // We will not try partial rewriting either because the config specification, this
- // is a rebuild over some non-transactional table, or there were update/delete
- // operations in the source tables (not supported yet)
- LOG.debug("Materialized view " + materializedViewTable.getFullyQualifiedName() +
- " ignored for rewriting as its contents are outdated");
- continue;
+ if (outdated) {
+ // The MV is outdated, see whether we should consider it for rewriting or not
+ boolean ignore = false;
+ if (forceMVContentsUpToDate && !tryIncrementalRebuild) {
+ // We will not try partial rewriting for rebuild if incremental rebuild is disabled
+ ignore = true;
+ } else if (!forceMVContentsUpToDate && !tryIncrementalRewriting) {
+ // We will not try partial rewriting for non-rebuild if incremental rewriting is disabled
+ ignore = true;
+ } else {
+ // Obtain additional information if we should try incremental rewriting / rebuild
+ // We will not try partial rewriting if there were update/delete operations on source tables
+ Materialization invalidationInfo = getMSC().getMaterializationInvalidationInfo(
+ creationMetadata, conf.get(ValidTxnList.VALID_TXNS_KEY));
+ ignore = invalidationInfo == null || invalidationInfo.isSourceTablesUpdateDeleteModified();
+ }
+ if (ignore) {
+ LOG.debug("Materialized view " + materializedViewTable.getFullyQualifiedName() +
+ " ignored for rewriting as its contents are outdated");
+ continue;
+ }
}
// It passed the test, load
@@ -1442,7 +1488,7 @@ public class Hive {
// so we can produce partial rewritings
materialization = augmentMaterializationWithTimeInformation(
materialization, validTxnsList, new ValidTxnWriteIdList(
- materializationInvInfo.getValidTxnList()));
+ creationMetadata.getValidTxnList()));
}
result.add(materialization);
continue;
@@ -1465,7 +1511,7 @@ public class Hive {
// so we can produce partial rewritings
materialization = augmentMaterializationWithTimeInformation(
materialization, validTxnsList, new ValidTxnWriteIdList(
- materializationInvInfo.getValidTxnList()));
+ creationMetadata.getValidTxnList()));
}
result.add(materialization);
}
http://git-wip-us.apache.org/repos/asf/hive/blob/774a8ef7/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index e091f38..fecfd0c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -2081,18 +2081,17 @@ public class CalcitePlanner extends SemanticAnalyzer {
// Add views to planner
List<RelOptMaterialization> materializations = new ArrayList<>();
try {
- final String validTxnsList = conf.get(ValidTxnList.VALID_TXNS_KEY);
if (mvRebuildMode != MaterializationRebuildMode.NONE) {
// We only retrieve the materialization corresponding to the rebuild. In turn,
// we pass 'true' for the forceMVContentsUpToDate parameter, as we cannot allow the
// materialization contents to be stale for a rebuild if we want to use it.
materializations = Hive.get().getValidMaterializedView(mvRebuildDbName, mvRebuildName,
- true, validTxnsList);
+ getTablesUsed(basePlan), true);
} else {
// This is not a rebuild, we retrieve all the materializations. In turn, we do not need
// to force the materialization contents to be up-to-date, as this is not a rebuild, and
// we apply the user parameters (HIVE_MATERIALIZED_VIEW_REWRITING_TIME_WINDOW) instead.
- materializations = Hive.get().getAllValidMaterializedViews(false, validTxnsList);
+ materializations = Hive.get().getAllValidMaterializedViews(getTablesUsed(basePlan), false);
}
// We need to use the current cluster for the scan operator on views,
// otherwise the planner will throw an Exception (different planners)
@@ -2169,7 +2168,6 @@ public class CalcitePlanner extends SemanticAnalyzer {
// A rewriting was produced, we will check whether it was part of an incremental rebuild
// to try to replace INSERT OVERWRITE by INSERT
if (mvRebuildMode == MaterializationRebuildMode.INSERT_OVERWRITE_REBUILD &&
- HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_MATERIALIZED_VIEW_REWRITING_INCREMENTAL) &&
HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_MATERIALIZED_VIEW_REBUILD_INCREMENTAL)) {
// First we need to check if it is valid to convert to MERGE/INSERT INTO.
// If we succeed, we modify the plan and afterwards the AST.
@@ -2196,6 +2194,21 @@ public class CalcitePlanner extends SemanticAnalyzer {
return basePlan;
}
+ private List<String> getTablesUsed(RelNode plan) {
+ List<String> tablesUsed = new ArrayList<>();
+ new RelVisitor() {
+ @Override
+ public void visit(RelNode node, int ordinal, RelNode parent) {
+ if (node instanceof TableScan) {
+ TableScan ts = (TableScan) node;
+ tablesUsed.add(((RelOptHiveTable) ts.getTable()).getHiveTableMD().getFullyQualifiedName());
+ }
+ super.visit(node, ordinal, parent);
+ }
+ }.go(plan);
+ return tablesUsed;
+ }
+
/**
* Run the HEP Planner with the given rule set.
*
http://git-wip-us.apache.org/repos/asf/hive/blob/774a8ef7/ql/src/test/queries/clientpositive/materialized_view_create_rewrite_time_window.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/materialized_view_create_rewrite_time_window.q b/ql/src/test/queries/clientpositive/materialized_view_create_rewrite_time_window.q
index c61730e..55c6c04 100644
--- a/ql/src/test/queries/clientpositive/materialized_view_create_rewrite_time_window.q
+++ b/ql/src/test/queries/clientpositive/materialized_view_create_rewrite_time_window.q
@@ -25,13 +25,13 @@ analyze table cmv_basetable_2_n1 compute statistics for columns;
-- CREATE VIEW WITH REWRITE DISABLED
EXPLAIN
-CREATE MATERIALIZED VIEW cmv_mat_view_n3 DISABLE REWRITE TBLPROPERTIES('rewriting.time.window'='300s') AS
+CREATE MATERIALIZED VIEW cmv_mat_view_n3 DISABLE REWRITE TBLPROPERTIES('rewriting.time.window'='5min') AS
SELECT cmv_basetable_n3.a, cmv_basetable_2_n1.c
FROM cmv_basetable_n3 JOIN cmv_basetable_2_n1 ON (cmv_basetable_n3.a = cmv_basetable_2_n1.a)
WHERE cmv_basetable_2_n1.c > 10.0
GROUP BY cmv_basetable_n3.a, cmv_basetable_2_n1.c;
-CREATE MATERIALIZED VIEW cmv_mat_view_n3 DISABLE REWRITE TBLPROPERTIES('rewriting.time.window'='300s') AS
+CREATE MATERIALIZED VIEW cmv_mat_view_n3 DISABLE REWRITE TBLPROPERTIES('rewriting.time.window'='5min') AS
SELECT cmv_basetable_n3.a, cmv_basetable_2_n1.c
FROM cmv_basetable_n3 JOIN cmv_basetable_2_n1 ON (cmv_basetable_n3.a = cmv_basetable_2_n1.a)
WHERE cmv_basetable_2_n1.c > 10.0
http://git-wip-us.apache.org/repos/asf/hive/blob/774a8ef7/ql/src/test/results/clientpositive/druid/druidmini_mv.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/druid/druidmini_mv.q.out b/ql/src/test/results/clientpositive/druid/druidmini_mv.q.out
index 7720db3..470f132 100644
--- a/ql/src/test/results/clientpositive/druid/druidmini_mv.q.out
+++ b/ql/src/test/results/clientpositive/druid/druidmini_mv.q.out
@@ -160,30 +160,35 @@ STAGE PLANS:
limit: -1
Processor Tree:
TableScan
- alias: cmv_basetable_n2
- Filter Operator
- predicate: (a = 3) (type: boolean)
- Select Operator
- expressions: 3 (type: int), c (type: double)
- outputColumnNames: _col0, _col1
- ListSink
+ alias: cmv_mat_view2_n0
+ properties:
+ druid.fieldNames vc,c
+ druid.fieldTypes int,double
+ druid.query.json {"queryType":"scan","dataSource":"default.cmv_mat_view2_n0","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"3","outputType":"LONG"}],"columns":["vc","c"],"resultFormat":"compactedList"}
+ druid.query.type scan
+ Select Operator
+ expressions: vc (type: int), c (type: double)
+ outputColumnNames: _col0, _col1
+ ListSink
PREHOOK: query: SELECT a, c
FROM cmv_basetable_n2
WHERE a = 3
PREHOOK: type: QUERY
PREHOOK: Input: default@cmv_basetable_n2
+PREHOOK: Input: default@cmv_mat_view2_n0
PREHOOK: Output: hdfs://### HDFS PATH ###
POSTHOOK: query: SELECT a, c
FROM cmv_basetable_n2
WHERE a = 3
POSTHOOK: type: QUERY
POSTHOOK: Input: default@cmv_basetable_n2
+POSTHOOK: Input: default@cmv_mat_view2_n0
POSTHOOK: Output: hdfs://### HDFS PATH ###
-3 15.8
-3 9.8
-3 978.76
-Warning: Shuffle Join MERGEJOIN[13][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
+3 15.800000190734863
+3 9.800000190734863
+3 978.760009765625
+Warning: Shuffle Join MERGEJOIN[10][tables = [cmv_mat_view2_n0, $hdt$_0]] in Stage 'Reducer 2' is a cross product
PREHOOK: query: EXPLAIN
SELECT * FROM (
(SELECT a, c FROM cmv_basetable_n2 WHERE a = 3) table1
@@ -214,34 +219,32 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: cmv_basetable_n2
- Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (a = 3) (type: boolean)
- Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: c (type: double)
- outputColumnNames: _col0
- Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: double)
- Map 3
- Map Operator Tree:
- TableScan
- alias: cmv_basetable_n2
Statistics: Num rows: 6 Data size: 96 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: ((a = 3) and (d = 3)) (type: boolean)
Statistics: Num rows: 6 Data size: 96 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: c (type: double)
- outputColumnNames: _col0
+ outputColumnNames: _col1
Statistics: Num rows: 6 Data size: 96 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
sort order:
Statistics: Num rows: 6 Data size: 96 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: double)
+ value expressions: _col1 (type: double)
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: cmv_mat_view2_n0
+ properties:
+ druid.fieldNames a,c
+ druid.fieldTypes int,double
+ druid.query.json {"queryType":"scan","dataSource":"default.cmv_mat_view2_n0","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"columns":["a","c"],"resultFormat":"compactedList"}
+ druid.query.type scan
+ Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE
+ value expressions: a (type: int), c (type: double)
Reducer 2
Reduce Operator Tree:
Merge Join Operator
@@ -250,15 +253,15 @@ STAGE PLANS:
keys:
0
1
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 36 Data size: 1044 Basic stats: COMPLETE Column stats: NONE
+ outputColumnNames: _col0, _col1, _col6
+ Statistics: Num rows: 18 Data size: 522 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: 3 (type: int), _col0 (type: double), 3 (type: int), _col1 (type: double)
+ expressions: _col0 (type: int), _col1 (type: double), _col0 (type: int), _col6 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 36 Data size: 1044 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 18 Data size: 522 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 36 Data size: 1044 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 18 Data size: 522 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -270,7 +273,7 @@ STAGE PLANS:
Processor Tree:
ListSink
-Warning: Shuffle Join MERGEJOIN[13][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
+Warning: Shuffle Join MERGEJOIN[10][tables = [cmv_mat_view2_n0, $hdt$_0]] in Stage 'Reducer 2' is a cross product
PREHOOK: query: SELECT * FROM (
(SELECT a, c FROM cmv_basetable_n2 WHERE a = 3) table1
JOIN
@@ -278,6 +281,7 @@ PREHOOK: query: SELECT * FROM (
ON table1.a = table2.a)
PREHOOK: type: QUERY
PREHOOK: Input: default@cmv_basetable_n2
+PREHOOK: Input: default@cmv_mat_view2_n0
PREHOOK: Output: hdfs://### HDFS PATH ###
POSTHOOK: query: SELECT * FROM (
(SELECT a, c FROM cmv_basetable_n2 WHERE a = 3) table1
@@ -286,10 +290,11 @@ POSTHOOK: query: SELECT * FROM (
ON table1.a = table2.a)
POSTHOOK: type: QUERY
POSTHOOK: Input: default@cmv_basetable_n2
+POSTHOOK: Input: default@cmv_mat_view2_n0
POSTHOOK: Output: hdfs://### HDFS PATH ###
-3 15.8 3 978.76
-3 9.8 3 978.76
-3 978.76 3 978.76
+3 15.800000190734863 3 978.76
+3 9.800000190734863 3 978.76
+3 978.760009765625 3 978.76
PREHOOK: query: INSERT INTO cmv_basetable_n2 VALUES
(cast(current_timestamp() AS timestamp), 3, 'charlie', 'charlie_c', 15.8, 1)
PREHOOK: type: QUERY
http://git-wip-us.apache.org/repos/asf/hive/blob/774a8ef7/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_5.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_5.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_5.q.out
index 20146f8..2c4ea52 100644
--- a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_5.q.out
+++ b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_5.q.out
@@ -178,12 +178,12 @@ STAGE PLANS:
Select Operator
expressions: a (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: llap
LLAP IO: may be used (ACID table)
Reducer 2
http://git-wip-us.apache.org/repos/asf/hive/blob/774a8ef7/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_time_window.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_time_window.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_time_window.q.out
index cfff416..c379b8e 100644
--- a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_time_window.q.out
+++ b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_time_window.q.out
@@ -73,14 +73,14 @@ POSTHOOK: Input: default@cmv_basetable_2_n1
POSTHOOK: Output: default@cmv_basetable_2_n1
#### A masked pattern was here ####
PREHOOK: query: EXPLAIN
-CREATE MATERIALIZED VIEW cmv_mat_view_n3 DISABLE REWRITE TBLPROPERTIES('rewriting.time.window'='300s') AS
+CREATE MATERIALIZED VIEW cmv_mat_view_n3 DISABLE REWRITE TBLPROPERTIES('rewriting.time.window'='5min') AS
SELECT cmv_basetable_n3.a, cmv_basetable_2_n1.c
FROM cmv_basetable_n3 JOIN cmv_basetable_2_n1 ON (cmv_basetable_n3.a = cmv_basetable_2_n1.a)
WHERE cmv_basetable_2_n1.c > 10.0
GROUP BY cmv_basetable_n3.a, cmv_basetable_2_n1.c
PREHOOK: type: CREATE_MATERIALIZED_VIEW
POSTHOOK: query: EXPLAIN
-CREATE MATERIALIZED VIEW cmv_mat_view_n3 DISABLE REWRITE TBLPROPERTIES('rewriting.time.window'='300s') AS
+CREATE MATERIALIZED VIEW cmv_mat_view_n3 DISABLE REWRITE TBLPROPERTIES('rewriting.time.window'='5min') AS
SELECT cmv_basetable_n3.a, cmv_basetable_2_n1.c
FROM cmv_basetable_n3 JOIN cmv_basetable_2_n1 ON (cmv_basetable_n3.a = cmv_basetable_2_n1.a)
WHERE cmv_basetable_2_n1.c > 10.0
@@ -188,7 +188,7 @@ STAGE PLANS:
Create View
columns: a int, c decimal(10,2)
table properties:
- rewriting.time.window 300s
+ rewriting.time.window 5min
expanded text: SELECT `cmv_basetable_n3`.`a`, `cmv_basetable_2_n1`.`c`
FROM `default`.`cmv_basetable_n3` JOIN `default`.`cmv_basetable_2_n1` ON (`cmv_basetable_n3`.`a` = `cmv_basetable_2_n1`.`a`)
WHERE `cmv_basetable_2_n1`.`c` > 10.0
@@ -212,7 +212,7 @@ STAGE PLANS:
hdfs directory: true
#### A masked pattern was here ####
-PREHOOK: query: CREATE MATERIALIZED VIEW cmv_mat_view_n3 DISABLE REWRITE TBLPROPERTIES('rewriting.time.window'='300s') AS
+PREHOOK: query: CREATE MATERIALIZED VIEW cmv_mat_view_n3 DISABLE REWRITE TBLPROPERTIES('rewriting.time.window'='5min') AS
SELECT cmv_basetable_n3.a, cmv_basetable_2_n1.c
FROM cmv_basetable_n3 JOIN cmv_basetable_2_n1 ON (cmv_basetable_n3.a = cmv_basetable_2_n1.a)
WHERE cmv_basetable_2_n1.c > 10.0
@@ -222,7 +222,7 @@ PREHOOK: Input: default@cmv_basetable_2_n1
PREHOOK: Input: default@cmv_basetable_n3
PREHOOK: Output: database:default
PREHOOK: Output: default@cmv_mat_view_n3
-POSTHOOK: query: CREATE MATERIALIZED VIEW cmv_mat_view_n3 DISABLE REWRITE TBLPROPERTIES('rewriting.time.window'='300s') AS
+POSTHOOK: query: CREATE MATERIALIZED VIEW cmv_mat_view_n3 DISABLE REWRITE TBLPROPERTIES('rewriting.time.window'='5min') AS
SELECT cmv_basetable_n3.a, cmv_basetable_2_n1.c
FROM cmv_basetable_n3 JOIN cmv_basetable_2_n1 ON (cmv_basetable_n3.a = cmv_basetable_2_n1.a)
WHERE cmv_basetable_2_n1.c > 10.0
@@ -254,7 +254,7 @@ Table Parameters:
numFiles 2
numRows 2
rawDataSize 232
- rewriting.time.window 300s
+ rewriting.time.window 5min
totalSize 608
#### A masked pattern was here ####
@@ -476,7 +476,7 @@ Table Parameters:
numFiles 2
numRows 2
rawDataSize 232
- rewriting.time.window 300s
+ rewriting.time.window 5min
totalSize 608
#### A masked pattern was here ####
@@ -779,7 +779,7 @@ Table Parameters:
numFiles 2
numRows 3
rawDataSize 348
- rewriting.time.window 300s
+ rewriting.time.window 5min
totalSize 628
#### A masked pattern was here ####
http://git-wip-us.apache.org/repos/asf/hive/blob/774a8ef7/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_empty.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_empty.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_empty.q.out
index b33d8c3..e988ea4 100644
--- a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_empty.q.out
+++ b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_empty.q.out
@@ -55,7 +55,7 @@ STAGE PLANS:
limit: -1
Processor Tree:
TableScan
- alias: emps_mv_rewrite_empty
+ alias: default.emps_mv_rewrite_empty_mv1
Filter Operator
predicate: (empid < 120) (type: boolean)
Select Operator
@@ -66,10 +66,12 @@ STAGE PLANS:
PREHOOK: query: select * from emps_mv_rewrite_empty where empid < 120
PREHOOK: type: QUERY
PREHOOK: Input: default@emps_mv_rewrite_empty
+PREHOOK: Input: default@emps_mv_rewrite_empty_mv1
#### A masked pattern was here ####
POSTHOOK: query: select * from emps_mv_rewrite_empty where empid < 120
POSTHOOK: type: QUERY
POSTHOOK: Input: default@emps_mv_rewrite_empty
+POSTHOOK: Input: default@emps_mv_rewrite_empty_mv1
#### A masked pattern was here ####
PREHOOK: query: drop materialized view emps_mv_rewrite_empty_mv1
PREHOOK: type: DROP_MATERIALIZED_VIEW