You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by kr...@apache.org on 2020/06/17 05:00:48 UTC
[hive] branch master updated: HIVE-23493: Rewrite plan to join back
tables with many projected columns joined multiple times (#1124)
This is an automated email from the ASF dual-hosted git repository.
krisztiankasa pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 216e341 HIVE-23493: Rewrite plan to join back tables with many projected columns joined multiple times (#1124)
216e341 is described below
commit 216e34157d2e010abd39256193df0d04534c8399
Author: kasakrisz <33...@users.noreply.github.com>
AuthorDate: Wed Jun 17 07:00:35 2020 +0200
HIVE-23493: Rewrite plan to join back tables with many projected columns joined multiple times (#1124)
HIVE-23493: Rewrite plan to join back tables with many projected columns joined multiple times (Krisztian Kasa, reviewed by Jesus Camacho Rodriguez)
---
.../java/org/apache/hadoop/hive/conf/HiveConf.java | 5 +
.../calcite/HiveDefaultRelMetadataProvider.java | 29 +-
.../HiveDefaultTezModelRelMetadataProvider.java | 84 +++
.../HiveCardinalityPreservingJoinOptimization.java | 381 +++++++++++
.../rules/HiveCardinalityPreservingJoinRule.java | 76 +++
.../calcite/rules/HiveFieldTrimmerRule.java | 14 +-
.../calcite/rules/HiveRelFieldTrimmer.java | 2 +-
.../hadoop/hive/ql/parse/CalcitePlanner.java | 7 +
.../llap/constraints_optimization.q.out | 8 +-
.../perf/tez/constraints/cbo_query11.q.out | 114 ++--
.../perf/tez/constraints/cbo_query4.q.out | 162 +++--
.../perf/tez/constraints/cbo_query74.q.out | 114 ++--
.../perf/tez/constraints/query11.q.out | 525 +++++++-------
.../perf/tez/constraints/query4.q.out | 755 +++++++++++----------
.../perf/tez/constraints/query74.q.out | 512 +++++++-------
15 files changed, 1730 insertions(+), 1058 deletions(-)
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index fce7fc3..a1e8ae7 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -1719,6 +1719,11 @@ public class HiveConf extends Configuration {
"When CBO estimates output rows for a join involving multiple columns, the default behavior assumes" +
"the columns are independent. Setting this flag to true will cause the estimator to assume" +
"the columns are correlated."),
+ HIVE_CARDINALITY_PRESERVING_JOIN_OPTIMIZATION_FACTOR("hive.cardinality.preserving.join.optimization.factor", 1.0f,
+ "Original plan cost multiplier for rewriting when query has tables joined multiple time on primary/unique key and " +
+ "projected the majority of columns from these table. This optimization trims fields at root of tree and " +
+ "then joins back affected tables at top of tree to get rest of columns. " +
+ "Set this to 0.0 to disable this optimization or increase it for more agressive optimization."),
AGGR_JOIN_TRANSPOSE("hive.transpose.aggr.join", false, "push aggregates through join"),
SEMIJOIN_CONVERSION("hive.optimize.semijoin.conversion", true, "convert group by followed by inner equi join into semijoin"),
HIVE_COLUMN_ALIGNMENT("hive.order.columnalignment", true, "Flag to control whether we want to try to align" +
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java
index 7ad3214..c4b153e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java
@@ -18,19 +18,8 @@
package org.apache.hadoop.hive.ql.optimizer.calcite;
import java.util.List;
-import org.apache.calcite.adapter.druid.DruidQuery;
-import org.apache.calcite.adapter.jdbc.JdbcRules.JdbcAggregate;
-import org.apache.calcite.adapter.jdbc.JdbcRules.JdbcFilter;
-import org.apache.calcite.adapter.jdbc.JdbcRules.JdbcJoin;
-import org.apache.calcite.adapter.jdbc.JdbcRules.JdbcProject;
-import org.apache.calcite.adapter.jdbc.JdbcRules.JdbcSort;
-import org.apache.calcite.adapter.jdbc.JdbcRules.JdbcUnion;
-import org.apache.calcite.plan.hep.HepRelVertex;
-import org.apache.calcite.plan.volcano.AbstractConverter;
-import org.apache.calcite.plan.volcano.RelSubset;
-import org.apache.calcite.rel.AbstractRelNode;
+
import org.apache.calcite.rel.RelNode;
-import org.apache.calcite.rel.convert.ConverterImpl;
import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider;
import org.apache.calcite.rel.metadata.JaninoRelMetadataProvider;
import org.apache.calcite.rel.metadata.RelMetadataProvider;
@@ -38,22 +27,6 @@ import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveDefaultCostModel;
import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveOnTezCostModel;
import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveRelMdCost;
-import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate;
-import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveExcept;
-import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter;
-import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveIntersect;
-import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin;
-import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin;
-import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject;
-import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode;
-import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin;
-import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortExchange;
-import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit;
-import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableFunctionScan;
-import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan;
-import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveUnion;
-import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.jdbc.HiveJdbcConverter;
-import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.jdbc.JdbcHiveTableScan;
import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdColumnUniqueness;
import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdCollation;
import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdCumulativeCost;
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultTezModelRelMetadataProvider.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultTezModelRelMetadataProvider.java
new file mode 100644
index 0000000..39de521
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultTezModelRelMetadataProvider.java
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite;
+
+import java.util.List;
+
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider;
+import org.apache.calcite.rel.metadata.JaninoRelMetadataProvider;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveOnTezCostModel;
+import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveRelMdCost;
+import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdCollation;
+import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdColumnUniqueness;
+import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdCumulativeCost;
+import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdDistinctRowCount;
+import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdDistribution;
+import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdMemory;
+import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdPredicates;
+import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdRowCount;
+import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdSelectivity;
+import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdSize;
+import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdUniqueKeys;
+
+import com.google.common.collect.ImmutableList;
+
+public class HiveDefaultTezModelRelMetadataProvider {
+
+ private static final JaninoRelMetadataProvider DEFAULT_TEZ_COST_MODEL =
+ JaninoRelMetadataProvider.of(
+ ChainedRelMetadataProvider.of(
+ ImmutableList.of(
+ HiveRelMdDistinctRowCount.SOURCE,
+ HiveRelMdCumulativeCost.SOURCE,
+ new HiveRelMdCost(HiveOnTezCostModel.getCostModel(new HiveConf())).getMetadataProvider(),
+ HiveRelMdSelectivity.SOURCE,
+ HiveRelMdRowCount.SOURCE,
+ HiveRelMdUniqueKeys.SOURCE,
+ HiveRelMdColumnUniqueness.SOURCE,
+ HiveRelMdSize.SOURCE,
+ HiveRelMdMemory.SOURCE,
+ HiveRelMdDistribution.SOURCE,
+ HiveRelMdCollation.SOURCE,
+ HiveRelMdPredicates.SOURCE,
+ JaninoRelMetadataProvider.DEFAULT)));
+
+ private final JaninoRelMetadataProvider metadataProvider;
+
+
+ public HiveDefaultTezModelRelMetadataProvider() {
+ metadataProvider = DEFAULT_TEZ_COST_MODEL;
+ }
+
+ public JaninoRelMetadataProvider getMetadataProvider() {
+ return metadataProvider;
+ }
+
+ /**
+ * This method can be called at startup time to pre-register all the
+ * additional Hive classes (compared to Calcite core classes) that may
+ * be visited during the planning phase.
+ */
+ public static void initializeMetadataProviderClass(List<Class<? extends RelNode>> nodeClasses) {
+ // This will register the classes in the default Janino implementation
+ JaninoRelMetadataProvider.DEFAULT.register(nodeClasses);
+ // This will register the classes in the default Hive implementation
+ DEFAULT_TEZ_COST_MODEL.register(nodeClasses);
+ }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveCardinalityPreservingJoinOptimization.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveCardinalityPreservingJoinOptimization.java
new file mode 100644
index 0000000..d7a951b
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveCardinalityPreservingJoinOptimization.java
@@ -0,0 +1,381 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
+
+import static java.util.Arrays.asList;
+import static java.util.Collections.singletonList;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+import org.apache.calcite.plan.RelOptCluster;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Aggregate;
+import org.apache.calcite.rel.core.JoinRelType;
+import org.apache.calcite.rel.metadata.RelMetadataQuery;
+import org.apache.calcite.rel.type.RelDataTypeField;
+import org.apache.calcite.rex.RexBuilder;
+import org.apache.calcite.rex.RexInputRef;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.rex.RexTableInputRef;
+import org.apache.calcite.rex.RexUtil;
+import org.apache.calcite.sql.SqlKind;
+import org.apache.calcite.sql.fun.SqlStdOperatorTable;
+import org.apache.calcite.tools.RelBuilder;
+import org.apache.calcite.util.ImmutableBitSet;
+import org.apache.calcite.util.mapping.Mapping;
+import org.apache.calcite.util.mapping.MappingType;
+import org.apache.calcite.util.mapping.Mappings;
+import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Optimization to reduce the amount of broadcasted/shuffled data throughout the DAG processing.
+ * This optimization targets queries with one or more tables joined multiple times on their keys
+ * and several columns are projected from those tables.
+ *
+ * Example:
+ * with sq as (
+ * select c_customer_id customer_id
+ * ,c_first_name customer_first_name
+ * ,c_last_name customer_last_name
+ * from customer
+ * )
+ * select c1.customer_id
+ * ,c1.customer_first_name
+ * ,c1.customer_last_name
+ * from sq c1
+ * ,sq c2
+ * ...
+ * ,sq cn
+ * where c1.customer_id = c2.customer_id
+ * and ...
+ *
+ * In this case all column data in the cte will be shuffled.
+ *
+ * Goal of this optimization: rewrite the plan to include only primary key or non null unique key columns of
+ * affected tables and join the them back to the result set of the main query to fetch the rest of the wide columns.
+ * This reduces the data size of the affected tables that is broadcast/shuffled throughout the DAG processing.
+ *
+ * HiveProject(customer_id=[$0], c_first_name=[$2], c_last_name=[$3])
+ * HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available])
+ * (original plan)
+ * HiveProject(customer_id=[$2])
+ * HiveJoin(...)
+ * ...
+ * (joined back customer table)
+ * HiveProject(c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9])
+ * HiveTableScan(table=[[default, customer]], table:alias=[customer])
+ */
+public class HiveCardinalityPreservingJoinOptimization extends HiveRelFieldTrimmer {
+ private static final Logger LOG = LoggerFactory.getLogger(HiveCardinalityPreservingJoinOptimization.class);
+
+ public HiveCardinalityPreservingJoinOptimization() {
+ super(false);
+ }
+
+ @Override
+ public RelNode trim(RelBuilder relBuilder, RelNode root) {
+ try {
+ if (root.getInputs().size() != 1) {
+ LOG.debug("Only plans where root has one input are supported. Root: " + root);
+ return root;
+ }
+
+ REL_BUILDER.set(relBuilder);
+
+ RexBuilder rexBuilder = relBuilder.getRexBuilder();
+ RelNode rootInput = root.getInput(0);
+ if (rootInput instanceof Aggregate) {
+ LOG.debug("Root input is Aggregate: not supported.");
+ return root;
+ }
+
+ // Build the list of projected fields from root's input RowType
+ List<RexInputRef> rootFieldList = new ArrayList<>(rootInput.getRowType().getFieldCount());
+ for (int i = 0; i < rootInput.getRowType().getFieldList().size(); ++i) {
+ RelDataTypeField relDataTypeField = rootInput.getRowType().getFieldList().get(i);
+ rootFieldList.add(rexBuilder.makeInputRef(relDataTypeField.getType(), i));
+ }
+
+ List<ProjectedFields> lineages = getExpressionLineageOf(rootFieldList, rootInput);
+ if (lineages == null) {
+ LOG.debug("Some projected field lineage can not be determined");
+ return root;
+ }
+
+ // 1. Collect candidate tables for join back
+ // 2. Collect all used fields from original plan
+ ImmutableBitSet fieldsUsed = ImmutableBitSet.of();
+ List<TableToJoinBack> tableToJoinBackList = new ArrayList<>();
+ for (ProjectedFields projectedFields : lineages) {
+ Optional<ImmutableBitSet> projectedKeys = projectedFields.relOptHiveTable.getNonNullableKeys().stream()
+ .filter(projectedFields.fieldsInSourceTable::contains)
+ .findFirst();
+
+ if (projectedKeys.isPresent()) {
+ TableToJoinBack tableToJoinBack = new TableToJoinBack(projectedKeys.get(), projectedFields);
+ tableToJoinBackList.add(tableToJoinBack);
+ fieldsUsed = fieldsUsed.union(projectedFields.getSource(projectedKeys.get()));
+ } else {
+ fieldsUsed = fieldsUsed.union(projectedFields.fieldsInRootProject);
+ }
+ }
+
+ if (tableToJoinBackList.isEmpty()) {
+ LOG.debug("None of the tables has keys projected, unable to join back");
+ return root;
+ }
+
+ // 3. Trim out non-key fields of joined back tables
+ Set<RelDataTypeField> extraFields = Collections.emptySet();
+ TrimResult trimResult = dispatchTrimFields(rootInput, fieldsUsed, extraFields);
+ RelNode newInput = trimResult.left;
+ if (newInput.getRowType().equals(rootInput.getRowType())) {
+ LOG.debug("Nothing was trimmed out.");
+ return root;
+ }
+
+ // 4. Collect fields for new Project on the top of Join backs
+ Mapping newInputMapping = trimResult.right;
+ RexNode[] newProjects = new RexNode[rootFieldList.size()];
+ String[] newColumnNames = new String[rootFieldList.size()];
+ projectsFromOriginalPlan(rexBuilder, newInput.getRowType().getFieldCount(), newInput, newInputMapping,
+ newProjects, newColumnNames);
+
+ // 5. Join back tables to the top of original plan
+ for (TableToJoinBack tableToJoinBack : tableToJoinBackList) {
+ LOG.debug("Joining back table " + tableToJoinBack.projectedFields.relOptHiveTable.getName());
+
+ // 5.1 Create new TableScan of tables to join back
+ RelOptHiveTable relOptTable = tableToJoinBack.projectedFields.relOptHiveTable;
+ RelOptCluster cluster = relBuilder.getCluster();
+ HiveTableScan tableScan = new HiveTableScan(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION),
+ relOptTable, relOptTable.getHiveTableMD().getTableName(), null, false, false);
+ // 5.2 Project only required fields from this table
+ RelNode projectTableAccessRel = tableScan.project(
+ tableToJoinBack.projectedFields.fieldsInSourceTable, new HashSet<>(0), REL_BUILDER.get());
+
+ Mapping keyMapping = Mappings.create(MappingType.INVERSE_SURJECTION,
+ tableScan.getRowType().getFieldCount(), tableToJoinBack.keys.cardinality());
+ int projectIndex = 0;
+ int offset = newInput.getRowType().getFieldCount();
+
+ for (int source : tableToJoinBack.projectedFields.fieldsInSourceTable) {
+ if (tableToJoinBack.keys.get(source)) {
+ // 5.3 Map key field to it's index in the Project on the TableScan
+ keyMapping.set(source, projectIndex);
+ } else {
+ // 5.4 if this is not a key field then we need it in the new Project on the top of Join backs
+ ProjectMapping currentProjectMapping =
+ tableToJoinBack.projectedFields.mapping.stream()
+ .filter(projectMapping -> projectMapping.indexInSourceTable == source)
+ .findFirst().get();
+ addToProject(projectTableAccessRel, projectIndex, rexBuilder,
+ offset + projectIndex,
+ currentProjectMapping.indexInRootProject,
+ newProjects, newColumnNames);
+ }
+ ++projectIndex;
+ }
+
+ // 5.5 Create Join
+ relBuilder.push(newInput);
+ relBuilder.push(projectTableAccessRel);
+
+ RexNode joinCondition = joinCondition(
+ newInput, newInputMapping, tableToJoinBack, projectTableAccessRel, keyMapping, rexBuilder);
+
+ newInput = relBuilder.join(JoinRelType.INNER, joinCondition).build();
+ }
+
+ // 6 Create Project on top of all Join backs
+ relBuilder.push(newInput);
+ relBuilder.project(asList(newProjects), asList(newColumnNames));
+
+ return root.copy(root.getTraitSet(), singletonList(relBuilder.build()));
+ } finally {
+ REL_BUILDER.remove();
+ }
+ }
+
+ private List<ProjectedFields> getExpressionLineageOf(
+ List<RexInputRef> projectExpressions, RelNode projectInput) {
+ RelMetadataQuery relMetadataQuery = RelMetadataQuery.instance();
+ Map<RexTableInputRef.RelTableRef, ProjectedFieldsBuilder> fieldMappingBuilders = new HashMap<>();
+ List<RexTableInputRef.RelTableRef> tablesOrdered = new ArrayList<>(); // use this list to keep the order of tables
+ for (RexInputRef expr : projectExpressions) {
+ Set<RexNode> expressionLineage = relMetadataQuery.getExpressionLineage(projectInput, expr);
+ if (expressionLineage == null || expressionLineage.size() != 1) {
+ LOG.debug("Lineage can not be determined of expression: " + expr);
+ return null;
+ }
+
+ RexNode rexNode = expressionLineage.iterator().next();
+ RexTableInputRef rexTableInputRef = rexTableInputRef(rexNode);
+ if (rexTableInputRef == null) {
+ LOG.debug("Unable determine expression lineage " + rexNode);
+ return null;
+ }
+
+ RexTableInputRef.RelTableRef tableRef = rexTableInputRef.getTableRef();
+ ProjectedFieldsBuilder projectedFieldsBuilder = fieldMappingBuilders.computeIfAbsent(
+ tableRef, k -> {
+ tablesOrdered.add(tableRef);
+ return new ProjectedFieldsBuilder(tableRef);
+ });
+ projectedFieldsBuilder.add(expr, rexTableInputRef);
+ }
+
+ return tablesOrdered.stream()
+ .map(relOptHiveTable -> fieldMappingBuilders.get(relOptHiveTable).build())
+ .collect(Collectors.toList());
+ }
+
+ public RexTableInputRef rexTableInputRef(RexNode rexNode) {
+ if (rexNode.getKind() == SqlKind.TABLE_INPUT_REF) {
+ return (RexTableInputRef) rexNode;
+ }
+ LOG.debug("Unable determine expression lineage " + rexNode);
+ return null;
+ }
+
+ private void projectsFromOriginalPlan(RexBuilder rexBuilder, int count, RelNode newInput, Mapping newInputMapping,
+ RexNode[] newProjects, String[] newColumnNames) {
+ for (int newProjectIndex = 0; newProjectIndex < count; ++newProjectIndex) {
+ addToProject(newInput, newProjectIndex, rexBuilder, newProjectIndex, newInputMapping.getSource(newProjectIndex),
+ newProjects, newColumnNames);
+ }
+ }
+
+ private void addToProject(RelNode relNode, int projectSourceIndex, RexBuilder rexBuilder,
+ int targetIndex, int index,
+ RexNode[] newProjects, String[] newColumnNames) {
+ RelDataTypeField relDataTypeField =
+ relNode.getRowType().getFieldList().get(projectSourceIndex);
+ newProjects[index] = rexBuilder.makeInputRef(
+ relDataTypeField.getType(),
+ targetIndex);
+ newColumnNames[index] = relDataTypeField.getName();
+ }
+
+ private RexNode joinCondition(
+ RelNode leftInput, Mapping leftInputMapping,
+ TableToJoinBack tableToJoinBack, RelNode rightInput, Mapping rightInputKeyMapping,
+ RexBuilder rexBuilder) {
+
+ List<RexNode> equalsConditions = new ArrayList<>(tableToJoinBack.keys.size());
+ for (ProjectMapping projectMapping : tableToJoinBack.projectedFields.mapping) {
+ if (!tableToJoinBack.keys.get(projectMapping.indexInSourceTable)) {
+ continue;
+ }
+
+ int leftKeyIndex = leftInputMapping.getTarget(projectMapping.indexInRootProject);
+ RelDataTypeField leftKeyField = leftInput.getRowType().getFieldList().get(leftKeyIndex);
+ int rightKeyIndex = rightInputKeyMapping.getTarget(projectMapping.indexInSourceTable);
+ RelDataTypeField rightKeyField = rightInput.getRowType().getFieldList().get(rightKeyIndex);
+
+ equalsConditions.add(rexBuilder.makeCall(SqlStdOperatorTable.EQUALS,
+ rexBuilder.makeInputRef(leftKeyField.getValue(), leftKeyField.getIndex()),
+ rexBuilder.makeInputRef(rightKeyField.getValue(),
+ leftInput.getRowType().getFieldCount() + rightKeyIndex)));
+ }
+ return RexUtil.composeConjunction(rexBuilder, equalsConditions);
+ }
+
+ private static final class ProjectMapping {
+ private final int indexInRootProject;
+ private final int indexInSourceTable;
+
+ private ProjectMapping(int indexInRootProject, RexTableInputRef rexTableInputRef) {
+ this.indexInRootProject = indexInRootProject;
+ this.indexInSourceTable = rexTableInputRef.getIndex();
+ }
+ }
+
+ private static final class ProjectedFields {
+ private final RelOptHiveTable relOptHiveTable;
+ private final ImmutableBitSet fieldsInRootProject;
+ private final ImmutableBitSet fieldsInSourceTable;
+ private final List<ProjectMapping> mapping;
+
+ private ProjectedFields(RexTableInputRef.RelTableRef relTableRef,
+ ImmutableBitSet fieldsInRootProject, ImmutableBitSet fieldsInSourceTable,
+ List<ProjectMapping> mapping) {
+ this.relOptHiveTable = (RelOptHiveTable) relTableRef.getTable();
+ this.fieldsInRootProject = fieldsInRootProject;
+ this.fieldsInSourceTable = fieldsInSourceTable;
+ this.mapping = mapping;
+ }
+
+ public ImmutableBitSet getSource(ImmutableBitSet fields) {
+ ImmutableBitSet.Builder targetFieldsBuilder = ImmutableBitSet.builder();
+ for (ProjectMapping fieldMapping : mapping) {
+ if (fields.get(fieldMapping.indexInSourceTable)) {
+ targetFieldsBuilder.set(fieldMapping.indexInRootProject);
+ }
+ }
+ return targetFieldsBuilder.build();
+ }
+ }
+
+ private static class ProjectedFieldsBuilder {
+ private final RexTableInputRef.RelTableRef relTableRef;
+ private final ImmutableBitSet.Builder fieldsInRootProjectBuilder = ImmutableBitSet.builder();
+ private final ImmutableBitSet.Builder fieldsInSourceTableBuilder = ImmutableBitSet.builder();
+ private final List<ProjectMapping> mapping = new ArrayList<>();
+
+ private ProjectedFieldsBuilder(RexTableInputRef.RelTableRef relTableRef) {
+ this.relTableRef = relTableRef;
+ }
+
+ public void add(RexInputRef rexInputRef, RexTableInputRef sourceTableInputRef) {
+ fieldsInRootProjectBuilder.set(rexInputRef.getIndex());
+ fieldsInSourceTableBuilder.set(sourceTableInputRef.getIndex());
+ mapping.add(new ProjectMapping(rexInputRef.getIndex(), sourceTableInputRef));
+ }
+
+ public ProjectedFields build() {
+ return new ProjectedFields(relTableRef,
+ fieldsInRootProjectBuilder.build(),
+ fieldsInSourceTableBuilder.build(),
+ mapping);
+ }
+ }
+
+ private static final class TableToJoinBack {
+ private final ProjectedFields projectedFields;
+ private final ImmutableBitSet keys;
+
+ private TableToJoinBack(ImmutableBitSet keys, ProjectedFields projectedFields) {
+ this.projectedFields = projectedFields;
+ this.keys = keys;
+ }
+ }
+}
+
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveCardinalityPreservingJoinRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveCardinalityPreservingJoinRule.java
new file mode 100644
index 0000000..2828bc1
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveCardinalityPreservingJoinRule.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
+
+import org.apache.calcite.plan.RelOptCost;
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.plan.RelOptUtil;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.metadata.JaninoRelMetadataProvider;
+import org.apache.calcite.rel.metadata.RelMetadataQuery;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveDefaultTezModelRelMetadataProvider;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Rule to trigger {@link HiveCardinalityPreservingJoinOptimization} on top of the plan.
+ */
+public class HiveCardinalityPreservingJoinRule extends HiveFieldTrimmerRule {
+ private static final Logger LOG = LoggerFactory.getLogger(HiveCardinalityPreservingJoinRule.class);
+
+ private final double factor;
+
+ public HiveCardinalityPreservingJoinRule(double factor) {
+ super(false, "HiveCardinalityPreservingJoinRule");
+ this.factor = Math.max(factor, 0.0);
+ }
+
+ @Override
+ protected RelNode trim(RelOptRuleCall call, RelNode node) {
+ RelNode optimized = new HiveCardinalityPreservingJoinOptimization().trim(call.builder(), node);
+ if (optimized == node) {
+ return node;
+ }
+
+ JaninoRelMetadataProvider original = RelMetadataQuery.THREAD_PROVIDERS.get();
+ try {
+ RelMetadataQuery.THREAD_PROVIDERS.set(getJaninoRelMetadataProvider());
+ RelMetadataQuery metadataQuery = RelMetadataQuery.instance();
+
+ RelOptCost optimizedCost = metadataQuery.getCumulativeCost(optimized);
+ RelOptCost originalCost = metadataQuery.getCumulativeCost(node);
+ originalCost = originalCost.multiplyBy(factor);
+ LOG.debug("Original plan cost {} vs Optimized plan cost {}", originalCost, optimizedCost);
+ if (optimizedCost.isLt(originalCost)) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Plan after:\n" + RelOptUtil.toString(optimized));
+ }
+ return optimized;
+ }
+
+ return node;
+ }
+ finally {
+ RelMetadataQuery.THREAD_PROVIDERS.set(original);
+ }
+ }
+
+ private JaninoRelMetadataProvider getJaninoRelMetadataProvider() {
+ return new HiveDefaultTezModelRelMetadataProvider().getMetadataProvider();
+ }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFieldTrimmerRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFieldTrimmerRule.java
index 73ff1bc..ef053b6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFieldTrimmerRule.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFieldTrimmerRule.java
@@ -40,10 +40,14 @@ public class HiveFieldTrimmerRule extends RelOptRule {
private boolean triggered;
public HiveFieldTrimmerRule(boolean fetchStats) {
+ this(fetchStats, "HiveFieldTrimmerRule");
+ }
+
+ protected HiveFieldTrimmerRule(boolean fetchStats, String description) {
super(operand(RelNode.class, any()),
- HiveRelFactories.HIVE_BUILDER, "HiveFieldTrimmerRule");
+ HiveRelFactories.HIVE_BUILDER, description);
this.fetchStats = fetchStats;
- triggered = false;
+ this.triggered = false;
}
@Override
@@ -63,11 +67,13 @@ public class HiveFieldTrimmerRule extends RelOptRule {
final HepPlanner tmpPlanner = new HepPlanner(PROGRAM);
tmpPlanner.setRoot(node);
node = tmpPlanner.findBestExp();
- call.transformTo(
- HiveRelFieldTrimmer.get(fetchStats).trim(call.builder(), node));
+ call.transformTo(trim(call, node));
triggered = true;
}
+ protected RelNode trim(RelOptRuleCall call, RelNode node) {
+ return HiveRelFieldTrimmer.get(fetchStats).trim(call.builder(), node);
+ }
/**
* The goal of this rule is to extract the RelNode from the
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java
index 19ce3ea..c242524 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java
@@ -105,7 +105,7 @@ public class HiveRelFieldTrimmer extends RelFieldTrimmer {
new ThreadLocal<>();
- private HiveRelFieldTrimmer(boolean fetchStats) {
+ protected HiveRelFieldTrimmer(boolean fetchStats) {
this(fetchStats, true);
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index 2396641..335e256 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -200,6 +200,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveAggregatePullUpCons
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveAggregateReduceFunctionsRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveAggregateReduceRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveAggregateSplitRule;
+import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveCardinalityPreservingJoinRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveDruidRules;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveExceptRewriteRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveExpandDistinctAggregatesRule;
@@ -2389,6 +2390,12 @@ public class CalcitePlanner extends SemanticAnalyzer {
final HepProgramBuilder program = new HepProgramBuilder();
+ double factor = conf.getFloatVar(ConfVars.HIVE_CARDINALITY_PRESERVING_JOIN_OPTIMIZATION_FACTOR);
+ if (factor > 0.0) {
+ generatePartialProgram(program, false, HepMatchOrder.TOP_DOWN,
+ new HiveCardinalityPreservingJoinRule(factor));
+ }
+
// 1. Run other optimizations that do not need stats
generatePartialProgram(program, false, HepMatchOrder.DEPTH_FIRST,
ProjectRemoveRule.INSTANCE, HiveUnionMergeRule.INSTANCE,
diff --git a/ql/src/test/results/clientpositive/llap/constraints_optimization.q.out b/ql/src/test/results/clientpositive/llap/constraints_optimization.q.out
index 4e4f087..6ea0317 100644
--- a/ql/src/test/results/clientpositive/llap/constraints_optimization.q.out
+++ b/ql/src/test/results/clientpositive/llap/constraints_optimization.q.out
@@ -2218,7 +2218,7 @@ POSTHOOK: Input: default@store_sales
#### A masked pattern was here ####
CBO PLAN:
HiveAggregate(group=[{1}])
- HiveJoin(condition=[=($0, $8)], joinType=[inner], algorithm=[none], cost=[not available])
+ HiveJoin(condition=[=($0, $8)], joinType=[inner], algorithm=[CommonJoin], cost=[not available])
HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16])
HiveTableScan(table=[[default, customer]], table:alias=[customer])
HiveProject(ss_customer_sk=[$3])
@@ -2259,7 +2259,7 @@ POSTHOOK: Input: default@store_sales
#### A masked pattern was here ####
CBO PLAN:
HiveAggregate(group=[{2}])
- HiveJoin(condition=[=($1, $0)], joinType=[inner], algorithm=[none], cost=[not available])
+ HiveJoin(condition=[=($1, $0)], joinType=[inner], algorithm=[CommonJoin], cost=[not available])
HiveProject(ss_customer_sk=[$3])
HiveFilter(condition=[IS NOT NULL($3)])
HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales])
@@ -2638,7 +2638,7 @@ POSTHOOK: Input: default@store_sales
#### A masked pattern was here ####
CBO PLAN:
HiveAggregate(group=[{0}])
- HiveJoin(condition=[=($0, $8)], joinType=[inner], algorithm=[none], cost=[not available])
+ HiveJoin(condition=[=($0, $8)], joinType=[inner], algorithm=[CommonJoin], cost=[not available])
HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16])
HiveTableScan(table=[[default, customer]], table:alias=[customer])
HiveProject(ss_customer_sk=[$3])
@@ -2789,7 +2789,7 @@ POSTHOOK: Input: default@store_sales
#### A masked pattern was here ####
CBO PLAN:
HiveAggregate(group=[{1}])
- HiveJoin(condition=[=($0, $8)], joinType=[inner], algorithm=[none], cost=[not available])
+ HiveJoin(condition=[=($0, $8)], joinType=[inner], algorithm=[CommonJoin], cost=[not available])
HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16])
HiveTableScan(table=[[default, customer]], table:alias=[customer])
HiveProject(ss_customer_sk=[$3])
diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query11.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query11.q.out
index 0136ee4..f57b728 100644
--- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query11.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query11.q.out
@@ -170,58 +170,70 @@ POSTHOOK: Input: default@web_sales
POSTHOOK: Output: hdfs://### HDFS PATH ###
CBO PLAN:
HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], fetch=[100])
- HiveProject(customer_id=[$0], customer_first_name=[$1], customer_last_name=[$2], customer_birth_country=[$3])
- HiveJoin(condition=[AND(CASE(IS NOT NULL($6), CASE($11, >(/($8, $10), /($4, $6)), >(0:DECIMAL(1, 0), /($4, $6))), CASE($11, >(/($8, $10), 0:DECIMAL(1, 0)), false)), =($0, $9))], joinType=[inner], algorithm=[none], cost=[not available])
- HiveJoin(condition=[=($0, $7)], joinType=[inner], algorithm=[none], cost=[not available])
- HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available])
- HiveProject(c_customer_id=[$0], c_first_name=[$1], c_last_name=[$2], c_birth_country=[$3], $f4=[$4])
- HiveAggregate(group=[{1, 2, 3, 5}], agg#0=[sum($10)])
- HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available])
- HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16])
- HiveTableScan(table=[[default, customer]], table:alias=[customer])
- HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available])
- HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], -=[-($17, $14)])
- HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))])
- HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales])
- HiveProject(d_date_sk=[$0])
- HiveFilter(condition=[=($6, 2000)])
- HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
- HiveProject($f0=[$0], $f1=[$1])
+ HiveProject($f00=[$0], c_first_name=[$2], c_last_name=[$3], c_birth_country=[$4])
+ HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[CommonJoin], cost=[not available])
+ HiveProject($f00=[$0])
+ HiveJoin(condition=[AND(CASE(IS NOT NULL($4), CASE($9, >(/($6, $8), /($2, $4)), >(0:DECIMAL(1, 0), /($2, $4))), CASE($9, >(/($6, $8), 0:DECIMAL(1, 0)), false)), =($0, $7))], joinType=[inner], algorithm=[CommonJoin], cost=[not available])
+ HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[CommonJoin], cost=[not available])
+ HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[SMBJoin], cost=[not available])
+ HiveProject(c_customer_id=[$0], c_birth_country=[$1], $f2=[$2])
+ HiveAggregate(group=[{1, 4}], agg#0=[sum($7)])
+ HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[CommonJoin], cost=[not available])
+ HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_birth_country=[$14])
+ HiveTableScan(table=[[default, customer]], table:alias=[customer])
+ HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[MapJoin], cost=[not available])
+ HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$1], -=[-($3, $2)])
+ HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))])
+ HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], ss_ext_discount_amt=[$14], ss_ext_list_price=[$17])
+ HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales])
+ HiveProject(d_date_sk=[$0])
+ HiveFilter(condition=[=($1, 2000)])
+ HiveProject(d_date_sk=[$0], d_year=[$6])
+ HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
+ HiveProject($f0=[$0], $f1=[$1])
+ HiveFilter(condition=[>($1, 0)])
+ HiveAggregate(group=[{1}], agg#0=[sum($4)])
+ HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[CommonJoin], cost=[not available])
+ HiveProject(c_customer_sk=[$0], c_customer_id=[$1])
+ HiveTableScan(table=[[default, customer]], table:alias=[customer])
+ HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[MapJoin], cost=[not available])
+ HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$1], -=[-($3, $2)])
+ HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))])
+ HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], ss_ext_discount_amt=[$14], ss_ext_list_price=[$17])
+ HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales])
+ HiveProject(d_date_sk=[$0])
+ HiveFilter(condition=[=($1, 1999)])
+ HiveProject(d_date_sk=[$0], d_year=[$6])
+ HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
+ HiveProject(c_customer_id=[$0], $f1=[$1])
+ HiveAggregate(group=[{1}], agg#0=[sum($4)])
+ HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[CommonJoin], cost=[not available])
+ HiveProject(c_customer_sk=[$0], c_customer_id=[$1])
+ HiveTableScan(table=[[default, customer]], table:alias=[customer])
+ HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[MapJoin], cost=[not available])
+ HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$1], -=[-($3, $2)])
+ HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))])
+ HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4], ws_ext_discount_amt=[$22], ws_ext_list_price=[$25])
+ HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales])
+ HiveProject(d_date_sk=[$0])
+ HiveFilter(condition=[=($1, 2000)])
+ HiveProject(d_date_sk=[$0], d_year=[$6])
+ HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
+ HiveProject(customer_id=[$0], year_total=[$1], CAST=[CAST(IS NOT NULL($1)):BOOLEAN])
HiveFilter(condition=[>($1, 0)])
- HiveAggregate(group=[{1}], agg#0=[sum($10)])
- HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available])
- HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16])
+ HiveAggregate(group=[{1}], agg#0=[sum($4)])
+ HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[CommonJoin], cost=[not available])
+ HiveProject(c_customer_sk=[$0], c_customer_id=[$1])
HiveTableScan(table=[[default, customer]], table:alias=[customer])
- HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available])
- HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], -=[-($17, $14)])
- HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))])
- HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales])
+ HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[MapJoin], cost=[not available])
+ HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$1], -=[-($3, $2)])
+ HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))])
+ HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4], ws_ext_discount_amt=[$22], ws_ext_list_price=[$25])
+ HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales])
HiveProject(d_date_sk=[$0])
- HiveFilter(condition=[=($6, 1999)])
- HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
- HiveProject(c_customer_id=[$0], $f1=[$1])
- HiveAggregate(group=[{1}], agg#0=[sum($10)])
- HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available])
- HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16])
- HiveTableScan(table=[[default, customer]], table:alias=[customer])
- HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available])
- HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4], -=[-($25, $22)])
- HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))])
- HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales])
- HiveProject(d_date_sk=[$0])
- HiveFilter(condition=[=($6, 2000)])
- HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
- HiveProject(customer_id=[$0], year_total=[$1], CAST=[CAST(IS NOT NULL($1)):BOOLEAN])
- HiveFilter(condition=[>($1, 0)])
- HiveAggregate(group=[{1}], agg#0=[sum($10)])
- HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available])
- HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16])
- HiveTableScan(table=[[default, customer]], table:alias=[customer])
- HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available])
- HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4], -=[-($25, $22)])
- HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))])
- HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales])
- HiveProject(d_date_sk=[$0])
- HiveFilter(condition=[=($6, 1999)])
- HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
+ HiveFilter(condition=[=($1, 1999)])
+ HiveProject(d_date_sk=[$0], d_year=[$6])
+ HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
+ HiveProject(c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_birth_country=[$14])
+ HiveTableScan(table=[[default, customer]], table:alias=[customer])
diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query4.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query4.q.out
index 987a0f3..6d5114b 100644
--- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query4.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query4.q.out
@@ -242,85 +242,101 @@ POSTHOOK: Input: default@web_sales
POSTHOOK: Output: hdfs://### HDFS PATH ###
CBO PLAN:
HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], fetch=[100])
- HiveProject(customer_id=[$2], customer_first_name=[$3], customer_last_name=[$4], customer_birth_country=[$5])
- HiveJoin(condition=[AND(=($2, $14), CASE($16, CASE($13, >(/($10, $12), /($1, $15)), false), false))], joinType=[inner], algorithm=[none], cost=[not available])
- HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available])
- HiveProject(c_customer_id=[$0], $f1=[$1])
- HiveAggregate(group=[{1}], agg#0=[sum($10)])
- HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available])
- HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16])
- HiveTableScan(table=[[default, customer]], table:alias=[customer])
- HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available])
- HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4], /=[/(+(-(-($25, $24), $22), $23), 2:DECIMAL(10, 0))])
- HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))])
- HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales])
- HiveProject(d_date_sk=[$0])
- HiveFilter(condition=[=($6, 2000)])
- HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
- HiveJoin(condition=[AND(CASE(IS NOT NULL($6), CASE($11, >(/($8, $10), /($4, $6)), false), false), =($0, $9))], joinType=[inner], algorithm=[none], cost=[not available])
- HiveJoin(condition=[=($0, $7)], joinType=[inner], algorithm=[none], cost=[not available])
- HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available])
- HiveProject(c_customer_id=[$0], c_first_name=[$1], c_last_name=[$2], c_birth_country=[$3], $f4=[$4])
- HiveAggregate(group=[{1, 2, 3, 5}], agg#0=[sum($10)])
- HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available])
- HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16])
- HiveTableScan(table=[[default, customer]], table:alias=[customer])
- HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available])
- HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], /=[/(+(-(-($17, $16), $14), $15), 2:DECIMAL(10, 0))])
- HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))])
- HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales])
- HiveProject(d_date_sk=[$0])
- HiveFilter(condition=[=($6, 2000)])
+ HiveProject($f00=[$0], c_first_name=[$2], c_last_name=[$3], c_birth_country=[$4])
+ HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[CommonJoin], cost=[not available])
+ HiveProject($f00=[$2])
+ HiveJoin(condition=[AND(=($2, $12), CASE($14, CASE($11, >(/($8, $10), /($1, $13)), false), false))], joinType=[inner], algorithm=[CommonJoin], cost=[not available])
+ HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[CommonJoin], cost=[not available])
+ HiveProject(c_customer_id=[$0], $f1=[$1])
+ HiveAggregate(group=[{1}], agg#0=[sum($4)])
+ HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[CommonJoin], cost=[not available])
+ HiveProject(c_customer_sk=[$0], c_customer_id=[$1])
+ HiveTableScan(table=[[default, customer]], table:alias=[customer])
+ HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[MapJoin], cost=[not available])
+ HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$1], /=[/(+(-(-($5, $4), $2), $3), 2:DECIMAL(10, 0))])
+ HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))])
+ HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4], ws_ext_discount_amt=[$22], ws_ext_sales_price=[$23], ws_ext_wholesale_cost=[$24], ws_ext_list_price=[$25])
+ HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales])
+ HiveProject(d_date_sk=[$0])
+ HiveFilter(condition=[=($1, 2000)])
+ HiveProject(d_date_sk=[$0], d_year=[$6])
HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
- HiveProject($f0=[$0], $f1=[$1])
+ HiveJoin(condition=[AND(CASE(IS NOT NULL($4), CASE($9, >(/($6, $8), /($2, $4)), false), false), =($0, $7))], joinType=[inner], algorithm=[CommonJoin], cost=[not available])
+ HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[CommonJoin], cost=[not available])
+ HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[SMBJoin], cost=[not available])
+ HiveProject(c_customer_id=[$0], c_birth_country=[$1], $f2=[$2])
+ HiveAggregate(group=[{1, 4}], agg#0=[sum($7)])
+ HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[CommonJoin], cost=[not available])
+ HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_birth_country=[$14])
+ HiveTableScan(table=[[default, customer]], table:alias=[customer])
+ HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[MapJoin], cost=[not available])
+ HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$1], /=[/(+(-(-($5, $4), $2), $3), 2:DECIMAL(10, 0))])
+ HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))])
+ HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], ss_ext_discount_amt=[$14], ss_ext_sales_price=[$15], ss_ext_wholesale_cost=[$16], ss_ext_list_price=[$17])
+ HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales])
+ HiveProject(d_date_sk=[$0])
+ HiveFilter(condition=[=($1, 2000)])
+ HiveProject(d_date_sk=[$0], d_year=[$6])
+ HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
+ HiveProject($f0=[$0], $f1=[$1])
+ HiveFilter(condition=[>($1, 0)])
+ HiveAggregate(group=[{1}], agg#0=[sum($4)])
+ HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[CommonJoin], cost=[not available])
+ HiveProject(c_customer_sk=[$0], c_customer_id=[$1])
+ HiveTableScan(table=[[default, customer]], table:alias=[customer])
+ HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[MapJoin], cost=[not available])
+ HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$1], /=[/(+(-(-($5, $4), $2), $3), 2:DECIMAL(10, 0))])
+ HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))])
+ HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], ss_ext_discount_amt=[$14], ss_ext_sales_price=[$15], ss_ext_wholesale_cost=[$16], ss_ext_list_price=[$17])
+ HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales])
+ HiveProject(d_date_sk=[$0])
+ HiveFilter(condition=[=($1, 1999)])
+ HiveProject(d_date_sk=[$0], d_year=[$6])
+ HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
+ HiveProject(c_customer_id=[$0], $f1=[$1])
+ HiveAggregate(group=[{1}], agg#0=[sum($4)])
+ HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[CommonJoin], cost=[not available])
+ HiveProject(c_customer_sk=[$0], c_customer_id=[$1])
+ HiveTableScan(table=[[default, customer]], table:alias=[customer])
+ HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[MapJoin], cost=[not available])
+ HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$1], /=[/(+(-(-($5, $4), $2), $3), 2:DECIMAL(10, 0))])
+ HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))])
+ HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], cs_ext_discount_amt=[$22], cs_ext_sales_price=[$23], cs_ext_wholesale_cost=[$24], cs_ext_list_price=[$25])
+ HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales])
+ HiveProject(d_date_sk=[$0])
+ HiveFilter(condition=[=($1, 2000)])
+ HiveProject(d_date_sk=[$0], d_year=[$6])
+ HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
+ HiveProject(customer_id=[$0], year_total=[$1], CAST=[CAST(IS NOT NULL($1)):BOOLEAN])
HiveFilter(condition=[>($1, 0)])
- HiveAggregate(group=[{1}], agg#0=[sum($10)])
- HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available])
- HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16])
+ HiveAggregate(group=[{1}], agg#0=[sum($4)])
+ HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[CommonJoin], cost=[not available])
+ HiveProject(c_customer_sk=[$0], c_customer_id=[$1])
HiveTableScan(table=[[default, customer]], table:alias=[customer])
- HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available])
- HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], /=[/(+(-(-($17, $16), $14), $15), 2:DECIMAL(10, 0))])
- HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))])
- HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales])
+ HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[MapJoin], cost=[not available])
+ HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$1], /=[/(+(-(-($5, $4), $2), $3), 2:DECIMAL(10, 0))])
+ HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))])
+ HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], cs_ext_discount_amt=[$22], cs_ext_sales_price=[$23], cs_ext_wholesale_cost=[$24], cs_ext_list_price=[$25])
+ HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales])
HiveProject(d_date_sk=[$0])
- HiveFilter(condition=[=($6, 1999)])
- HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
- HiveProject(c_customer_id=[$0], $f1=[$1])
- HiveAggregate(group=[{1}], agg#0=[sum($10)])
- HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available])
- HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16])
- HiveTableScan(table=[[default, customer]], table:alias=[customer])
- HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available])
- HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], /=[/(+(-(-($25, $24), $22), $23), 2:DECIMAL(10, 0))])
- HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))])
- HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales])
- HiveProject(d_date_sk=[$0])
- HiveFilter(condition=[=($6, 2000)])
- HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
+ HiveFilter(condition=[=($1, 1999)])
+ HiveProject(d_date_sk=[$0], d_year=[$6])
+ HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
HiveProject(customer_id=[$0], year_total=[$1], CAST=[CAST(IS NOT NULL($1)):BOOLEAN])
HiveFilter(condition=[>($1, 0)])
- HiveAggregate(group=[{1}], agg#0=[sum($10)])
- HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available])
- HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16])
+ HiveAggregate(group=[{1}], agg#0=[sum($4)])
+ HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[CommonJoin], cost=[not available])
+ HiveProject(c_customer_sk=[$0], c_customer_id=[$1])
HiveTableScan(table=[[default, customer]], table:alias=[customer])
- HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available])
- HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], /=[/(+(-(-($25, $24), $22), $23), 2:DECIMAL(10, 0))])
- HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))])
- HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales])
+ HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[MapJoin], cost=[not available])
+ HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$1], /=[/(+(-(-($5, $4), $2), $3), 2:DECIMAL(10, 0))])
+ HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))])
+ HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4], ws_ext_discount_amt=[$22], ws_ext_sales_price=[$23], ws_ext_wholesale_cost=[$24], ws_ext_list_price=[$25])
+ HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales])
HiveProject(d_date_sk=[$0])
- HiveFilter(condition=[=($6, 1999)])
- HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
- HiveProject(customer_id=[$0], year_total=[$1], CAST=[CAST(IS NOT NULL($1)):BOOLEAN])
- HiveFilter(condition=[>($1, 0)])
- HiveAggregate(group=[{1}], agg#0=[sum($10)])
- HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available])
- HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10], c_birth_country=[$14], c_login=[$15], c_email_address=[$16])
- HiveTableScan(table=[[default, customer]], table:alias=[customer])
- HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available])
- HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4], /=[/(+(-(-($25, $24), $22), $23), 2:DECIMAL(10, 0))])
- HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))])
- HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales])
- HiveProject(d_date_sk=[$0])
- HiveFilter(condition=[=($6, 1999)])
- HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
+ HiveFilter(condition=[=($1, 1999)])
+ HiveProject(d_date_sk=[$0], d_year=[$6])
+ HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
+ HiveProject(c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9], c_birth_country=[$14])
+ HiveTableScan(table=[[default, customer]], table:alias=[customer])
diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query74.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query74.q.out
index 289e5d2..bf43fb3 100644
--- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query74.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query74.q.out
@@ -130,58 +130,70 @@ POSTHOOK: Input: default@web_sales
POSTHOOK: Output: hdfs://### HDFS PATH ###
CBO PLAN:
HiveSortLimit(sort0=[$2], sort1=[$0], sort2=[$1], dir0=[ASC], dir1=[ASC], dir2=[ASC], fetch=[100])
- HiveProject(customer_id=[$0], customer_first_name=[$1], customer_last_name=[$2])
- HiveJoin(condition=[AND(CASE(IS NOT NULL($5), CASE($10, >(/($7, $9), /($3, $5)), false), false), =($0, $8))], joinType=[inner], algorithm=[none], cost=[not available])
- HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available])
- HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available])
- HiveProject(c_customer_id=[$0], c_first_name=[$1], c_last_name=[$2], $f3=[$3])
- HiveAggregate(group=[{1, 2, 3}], agg#0=[sum($6)])
- HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available])
- HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9])
- HiveTableScan(table=[[default, customer]], table:alias=[customer])
- HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available])
- HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], ss_net_paid=[$20])
- HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))])
- HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales])
- HiveProject(d_date_sk=[$0])
- HiveFilter(condition=[AND(=($6, 1999), IN($6, 1998, 1999))])
- HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
- HiveProject($f0=[$0], $f1=[$1])
+ HiveProject($f00=[$0], c_first_name=[$2], c_last_name=[$3])
+ HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[CommonJoin], cost=[not available])
+ HiveProject($f00=[$0])
+ HiveJoin(condition=[AND(CASE(IS NOT NULL($3), CASE($8, >(/($5, $7), /($1, $3)), false), false), =($0, $6))], joinType=[inner], algorithm=[CommonJoin], cost=[not available])
+ HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[CommonJoin], cost=[not available])
+ HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[SMBJoin], cost=[not available])
+ HiveProject(c_customer_id=[$0], $f1=[$1])
+ HiveAggregate(group=[{1}], agg#0=[sum($6)])
+ HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[CommonJoin], cost=[not available])
+ HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9])
+ HiveTableScan(table=[[default, customer]], table:alias=[customer])
+ HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[MapJoin], cost=[not available])
+ HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$1], ss_net_paid=[$2])
+ HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))])
+ HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], ss_net_paid=[$20])
+ HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales])
+ HiveProject(d_date_sk=[$0])
+ HiveFilter(condition=[AND(=($1, 1999), IN($1, 1998, 1999))])
+ HiveProject(d_date_sk=[$0], d_year=[$6])
+ HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
+ HiveProject($f0=[$0], $f1=[$1])
+ HiveFilter(condition=[>($1, 0)])
+ HiveAggregate(group=[{1}], agg#0=[sum($4)])
+ HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[CommonJoin], cost=[not available])
+ HiveProject(c_customer_sk=[$0], c_customer_id=[$1])
+ HiveTableScan(table=[[default, customer]], table:alias=[customer])
+ HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[MapJoin], cost=[not available])
+ HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$1], ss_net_paid=[$2])
+ HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))])
+ HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], ss_net_paid=[$20])
+ HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales])
+ HiveProject(d_date_sk=[$0])
+ HiveFilter(condition=[AND(=($1, 1998), IN($1, 1998, 1999))])
+ HiveProject(d_date_sk=[$0], d_year=[$6])
+ HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
+ HiveProject(c_customer_id=[$0], $f1=[$1])
+ HiveAggregate(group=[{1}], agg#0=[sum($4)])
+ HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[CommonJoin], cost=[not available])
+ HiveProject(c_customer_sk=[$0], c_customer_id=[$1])
+ HiveTableScan(table=[[default, customer]], table:alias=[customer])
+ HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[MapJoin], cost=[not available])
+ HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$1], ws_net_paid=[$2])
+ HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))])
+ HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4], ws_net_paid=[$29])
+ HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales])
+ HiveProject(d_date_sk=[$0])
+ HiveFilter(condition=[AND(=($1, 1999), IN($1, 1998, 1999))])
+ HiveProject(d_date_sk=[$0], d_year=[$6])
+ HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
+ HiveProject(customer_id=[$0], year_total=[$1], CAST=[CAST(IS NOT NULL($1)):BOOLEAN])
HiveFilter(condition=[>($1, 0)])
- HiveAggregate(group=[{1}], agg#0=[sum($6)])
- HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available])
- HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9])
+ HiveAggregate(group=[{1}], agg#0=[sum($4)])
+ HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[CommonJoin], cost=[not available])
+ HiveProject(c_customer_sk=[$0], c_customer_id=[$1])
HiveTableScan(table=[[default, customer]], table:alias=[customer])
- HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available])
- HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], ss_net_paid=[$20])
- HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))])
- HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales])
+ HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[MapJoin], cost=[not available])
+ HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$1], ws_net_paid=[$2])
+ HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($1))])
+ HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4], ws_net_paid=[$29])
+ HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales])
HiveProject(d_date_sk=[$0])
- HiveFilter(condition=[AND(=($6, 1998), IN($6, 1998, 1999))])
- HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
- HiveProject(c_customer_id=[$0], $f1=[$1])
- HiveAggregate(group=[{1}], agg#0=[sum($6)])
- HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available])
- HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9])
- HiveTableScan(table=[[default, customer]], table:alias=[customer])
- HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available])
- HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4], ws_net_paid=[$29])
- HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))])
- HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales])
- HiveProject(d_date_sk=[$0])
- HiveFilter(condition=[AND(=($6, 1999), IN($6, 1998, 1999))])
- HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
- HiveProject(customer_id=[$0], year_total=[$1], CAST=[CAST(IS NOT NULL($1)):BOOLEAN])
- HiveFilter(condition=[>($1, 0)])
- HiveAggregate(group=[{1}], agg#0=[sum($6)])
- HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available])
- HiveProject(c_customer_sk=[$0], c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9])
- HiveTableScan(table=[[default, customer]], table:alias=[customer])
- HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available])
- HiveProject(ws_sold_date_sk=[$0], ws_bill_customer_sk=[$4], ws_net_paid=[$29])
- HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($0))])
- HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales])
- HiveProject(d_date_sk=[$0])
- HiveFilter(condition=[AND(=($6, 1998), IN($6, 1998, 1999))])
- HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
+ HiveFilter(condition=[AND(=($1, 1998), IN($1, 1998, 1999))])
+ HiveProject(d_date_sk=[$0], d_year=[$6])
+ HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
+ HiveProject(c_customer_id=[$1], c_first_name=[$8], c_last_name=[$9])
+ HiveTableScan(table=[[default, customer]], table:alias=[customer])
diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query11.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query11.q.out
index 7f9df5e8..8b9488e 100644
--- a/ql/src/test/results/clientpositive/perf/tez/constraints/query11.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query11.q.out
@@ -171,268 +171,303 @@ POSTHOOK: Output: hdfs://### HDFS PATH ###
Plan optimized by CBO.
Vertex dependency in root stage
-Map 1 <- Reducer 10 (BROADCAST_EDGE)
-Map 24 <- Reducer 14 (BROADCAST_EDGE)
-Map 26 <- Reducer 18 (BROADCAST_EDGE)
-Map 27 <- Reducer 22 (BROADCAST_EDGE)
-Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE)
-Reducer 11 <- Map 24 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE)
-Reducer 12 <- Map 25 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE)
-Reducer 13 <- Reducer 12 (SIMPLE_EDGE)
-Reducer 14 <- Map 9 (CUSTOM_SIMPLE_EDGE)
-Reducer 15 <- Map 26 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE)
-Reducer 16 <- Map 25 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE)
-Reducer 17 <- Reducer 16 (SIMPLE_EDGE)
-Reducer 18 <- Map 9 (CUSTOM_SIMPLE_EDGE)
-Reducer 19 <- Map 27 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE)
-Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE)
-Reducer 20 <- Map 25 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE)
-Reducer 21 <- Reducer 20 (SIMPLE_EDGE)
-Reducer 22 <- Map 9 (CUSTOM_SIMPLE_EDGE)
-Reducer 3 <- Map 23 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+Map 1 <- Reducer 11 (BROADCAST_EDGE)
+Map 25 <- Reducer 15 (BROADCAST_EDGE)
+Map 27 <- Reducer 19 (BROADCAST_EDGE)
+Map 28 <- Reducer 23 (BROADCAST_EDGE)
+Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE)
+Reducer 12 <- Map 10 (SIMPLE_EDGE), Map 25 (SIMPLE_EDGE)
+Reducer 13 <- Map 26 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE)
+Reducer 14 <- Reducer 13 (SIMPLE_EDGE)
+Reducer 15 <- Map 10 (CUSTOM_SIMPLE_EDGE)
+Reducer 16 <- Map 10 (SIMPLE_EDGE), Map 27 (SIMPLE_EDGE)
+Reducer 17 <- Map 26 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE)
+Reducer 18 <- Reducer 17 (SIMPLE_EDGE)
+Reducer 19 <- Map 10 (CUSTOM_SIMPLE_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE)
+Reducer 20 <- Map 10 (SIMPLE_EDGE), Map 28 (SIMPLE_EDGE)
+Reducer 21 <- Map 26 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE)
+Reducer 22 <- Reducer 21 (SIMPLE_EDGE)
+Reducer 23 <- Map 10 (CUSTOM_SIMPLE_EDGE)
+Reducer 3 <- Map 24 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
-Reducer 5 <- Reducer 13 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
-Reducer 6 <- Reducer 17 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
-Reducer 7 <- Reducer 21 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE)
-Reducer 8 <- Reducer 7 (SIMPLE_EDGE)
+Reducer 5 <- Reducer 14 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
+Reducer 6 <- Reducer 18 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
+Reducer 7 <- Reducer 22 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE)
+Reducer 8 <- Map 29 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE)
+Reducer 9 <- Reducer 8 (SIMPLE_EDGE)
Stage-0
Fetch Operator
limit:100
Stage-1
- Reducer 8 vectorized
- File Output Operator [FS_346]
- Limit [LIM_345] (rows=100 width=372)
+ Reducer 9 vectorized
+ File Output Operator [FS_406]
+ Limit [LIM_405] (rows=100 width=372)
Number of rows:100
- Select Operator [SEL_344] (rows=13333333 width=372)
+ Select Operator [SEL_404] (rows=15944136 width=372)
Output:["_col0","_col1","_col2","_col3"]
- <-Reducer 7 [SIMPLE_EDGE]
- SHUFFLE [RS_89]
- Select Operator [SEL_88] (rows=13333333 width=372)
+ <-Reducer 8 [SIMPLE_EDGE]
+ SHUFFLE [RS_103]
+ Select Operator [SEL_102] (rows=15944136 width=372)
Output:["_col0","_col1","_col2","_col3"]
- Top N Key Operator [TNK_152] (rows=13333333 width=824)
- keys:_col0, _col1, _col2, _col3,top n:100
- Filter Operator [FIL_87] (rows=13333333 width=824)
- predicate:CASE WHEN (_col6 is not null) THEN (CASE WHEN (_col11) THEN (((_col8 / _col10) > (_col4 / _col6))) ELSE ((0 > (_col4 / _col6))) END) ELSE (CASE WHEN (_col11) THEN (((_col8 / _col10) > 0)) ELSE (false) END) END
- Merge Join Operator [MERGEJOIN_282] (rows=26666666 width=824)
- Conds:RS_84._col0=RS_343._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col6","_col8","_col10","_col11"]
- <-Reducer 21 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_343]
- PartitionCols:_col0
- Select Operator [SEL_342] (rows=14325562 width=216)
- Output:["_col0","_col1","_col2"]
- Filter Operator [FIL_341] (rows=14325562 width=212)
- predicate:(_col1 > 0)
- Group By Operator [GBY_340] (rows=42976686 width=212)
- Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0
- <-Reducer 20 [SIMPLE_EDGE]
- SHUFFLE [RS_74]
- PartitionCols:_col0
- Group By Operator [GBY_73] (rows=51391963 width=212)
- Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col5
- Merge Join Operator [MERGEJOIN_279] (rows=51391963 width=212)
- Conds:RS_69._col1=RS_324._col0(Inner),Output:["_col2","_col5"]
- <-Map 25 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_324]
- PartitionCols:_col0
- Select Operator [SEL_321] (rows=80000000 width=104)
- Output:["_col0","_col1"]
- TableScan [TS_25] (rows=80000000 width=104)
- default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_customer_id"]
- <-Reducer 19 [SIMPLE_EDGE]
- SHUFFLE [RS_69]
- PartitionCols:_col1
- Merge Join Operator [MERGEJOIN_278] (rows=51391963 width=115)
- Conds:RS_339._col0=RS_297._col0(Inner),Output:["_col1","_col2"]
- <-Map 9 [SIMPLE_EDGE] vectorized
- PARTITION_ONLY_SHUFFLE [RS_297]
- PartitionCols:_col0
- Select Operator [SEL_290] (rows=652 width=4)
- Output:["_col0"]
- Filter Operator [FIL_286] (rows=652 width=8)
- predicate:(d_year = 1999)
- TableScan [TS_3] (rows=73049 width=8)
- default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"]
- <-Map 27 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_339]
- PartitionCols:_col0
- Select Operator [SEL_338] (rows=143930993 width=119)
- Output:["_col0","_col1","_col2"]
- Filter Operator [FIL_337] (rows=143930993 width=231)
- predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_67_date_dim_d_date_sk_min) AND DynamicValue(RS_67_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_67_date_dim_d_date_sk_bloom_filter)))
- TableScan [TS_58] (rows=144002668 width=231)
- default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_ext_discount_amt","ws_ext_list_price"]
- <-Reducer 22 [BROADCAST_EDGE] vectorized
- BROADCAST [RS_336]
- Group By Operator [GBY_335] (rows=1 width=12)
- Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
- <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized
- PARTITION_ONLY_SHUFFLE [RS_306]
- Group By Operator [GBY_302] (rows=1 width=12)
- Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
- Select Operator [SEL_298] (rows=652 width=4)
- Output:["_col0"]
- Please refer to the previous Select Operator [SEL_290]
- <-Reducer 6 [SIMPLE_EDGE]
- SHUFFLE [RS_84]
- PartitionCols:_col0
- Merge Join Operator [MERGEJOIN_281] (rows=26666666 width=708)
- Conds:RS_81._col0=RS_334._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col6","_col8"]
- <-Reducer 17 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_334]
- PartitionCols:_col0
- Group By Operator [GBY_333] (rows=42976686 width=212)
- Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0
- <-Reducer 16 [SIMPLE_EDGE]
- SHUFFLE [RS_55]
- PartitionCols:_col0
- Group By Operator [GBY_54] (rows=51391963 width=212)
- Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col5
- Merge Join Operator [MERGEJOIN_277] (rows=51391963 width=212)
- Conds:RS_50._col1=RS_323._col0(Inner),Output:["_col2","_col5"]
- <-Map 25 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_323]
- PartitionCols:_col0
- Please refer to the previous Select Operator [SEL_321]
- <-Reducer 15 [SIMPLE_EDGE]
- SHUFFLE [RS_50]
- PartitionCols:_col1
- Merge Join Operator [MERGEJOIN_276] (rows=51391963 width=115)
- Conds:RS_332._col0=RS_295._col0(Inner),Output:["_col1","_col2"]
- <-Map 9 [SIMPLE_EDGE] vectorized
- PARTITION_ONLY_SHUFFLE [RS_295]
- PartitionCols:_col0
- Select Operator [SEL_289] (rows=652 width=4)
- Output:["_col0"]
- Filter Operator [FIL_285] (rows=652 width=8)
- predicate:(d_year = 2000)
- Please refer to the previous TableScan [TS_3]
- <-Map 26 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_332]
- PartitionCols:_col0
- Select Operator [SEL_331] (rows=143930993 width=119)
- Output:["_col0","_col1","_col2"]
- Filter Operator [FIL_330] (rows=143930993 width=231)
- predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_48_date_dim_d_date_sk_min) AND DynamicValue(RS_48_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_48_date_dim_d_date_sk_bloom_filter)))
- TableScan [TS_39] (rows=144002668 width=231)
- default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_ext_discount_amt","ws_ext_list_price"]
- <-Reducer 18 [BROADCAST_EDGE] vectorized
- BROADCAST [RS_329]
- Group By Operator [GBY_328] (rows=1 width=12)
- Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
- <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized
- PARTITION_ONLY_SHUFFLE [RS_305]
- Group By Operator [GBY_301] (rows=1 width=12)
- Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
- Select Operator [SEL_296] (rows=652 width=4)
- Output:["_col0"]
- Please refer to the previous Select Operator [SEL_289]
- <-Reducer 5 [SIMPLE_EDGE]
- SHUFFLE [RS_81]
- PartitionCols:_col0
- Merge Join Operator [MERGEJOIN_280] (rows=26666666 width=596)
- Conds:RS_315._col0=RS_327._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col6"]
- <-Reducer 13 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_327]
- PartitionCols:_col0
- Filter Operator [FIL_326] (rows=22300081 width=212)
+ Top N Key Operator [TNK_180] (rows=15944136 width=372)
+ keys:_col0, _col2, _col3, _col4,top n:100
+ Merge Join Operator [MERGEJOIN_330] (rows=15944136 width=372)
+ Conds:RS_99._col0=RS_403._col0(Inner),Output:["_col0","_col2","_col3","_col4"]
+ <-Map 29 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_403]
+ PartitionCols:_col0
+ Select Operator [SEL_402] (rows=80000000 width=372)
+ Output:["_col0","_col1","_col2","_col3"]
+ TableScan [TS_97] (rows=80000000 width=372)
+ default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_id","c_first_name","c_last_name","c_birth_country"]
+ <-Reducer 7 [SIMPLE_EDGE]
+ SHUFFLE [RS_99]
+ PartitionCols:_col0
+ Select Operator [SEL_96] (rows=13333333 width=100)
+ Output:["_col0"]
+ Filter Operator [FIL_95] (rows=13333333 width=552)
+ predicate:CASE WHEN (_col4 is not null) THEN (CASE WHEN (_col9) THEN (((_col6 / _col8) > (_col2 / _col4))) ELSE ((0 > (_col2 / _col4))) END) ELSE (CASE WHEN (_col9) THEN (((_col6 / _col8) > 0)) ELSE (false) END) END
+ Merge Join Operator [MERGEJOIN_329] (rows=26666666 width=552)
+ Conds:RS_92._col0=RS_401._col0(Inner),Output:["_col0","_col2","_col4","_col6","_col8","_col9"]
+ <-Reducer 22 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_401]
+ PartitionCols:_col0
+ Select Operator [SEL_400] (rows=14325562 width=216)
+ Output:["_col0","_col1","_col2"]
+ Filter Operator [FIL_399] (rows=14325562 width=212)
predicate:(_col1 > 0)
- Group By Operator [GBY_325] (rows=66900244 width=212)
+ Group By Operator [GBY_398] (rows=42976686 width=212)
+ Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0
+ <-Reducer 21 [SIMPLE_EDGE]
+ SHUFFLE [RS_82]
+ PartitionCols:_col0
+ Group By Operator [GBY_81] (rows=51391963 width=212)
+ Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col5
+ Merge Join Operator [MERGEJOIN_326] (rows=51391963 width=212)
+ Conds:RS_77._col1=RS_378._col0(Inner),Output:["_col2","_col5"]
+ <-Map 26 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_378]
+ PartitionCols:_col0
+ Select Operator [SEL_375] (rows=80000000 width=104)
+ Output:["_col0","_col1"]
+ TableScan [TS_29] (rows=80000000 width=104)
+ default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_customer_id"]
+ <-Reducer 20 [SIMPLE_EDGE]
+ SHUFFLE [RS_77]
+ PartitionCols:_col1
+ Merge Join Operator [MERGEJOIN_325] (rows=51391963 width=115)
+ Conds:RS_397._col0=RS_346._col0(Inner),Output:["_col1","_col2"]
+ <-Map 10 [SIMPLE_EDGE] vectorized
+ PARTITION_ONLY_SHUFFLE [RS_346]
+ PartitionCols:_col0
+ Select Operator [SEL_339] (rows=652 width=4)
+ Output:["_col0"]
+ Filter Operator [FIL_335] (rows=652 width=8)
+ predicate:(_col1 = 1999)
+ Select Operator [SEL_331] (rows=73049 width=8)
+ Output:["_col0","_col1"]
+ TableScan [TS_4] (rows=73049 width=8)
+ default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"]
+ <-Map 28 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_397]
+ PartitionCols:_col0
+ Select Operator [SEL_396] (rows=143930993 width=119)
+ Output:["_col0","_col1","_col2"]
+ Filter Operator [FIL_395] (rows=143930993 width=231)
+ predicate:(_col0 is not null and _col1 is not null)
+ Select Operator [SEL_394] (rows=144002668 width=231)
+ Output:["_col0","_col1","_col2","_col3"]
+ Filter Operator [FIL_393] (rows=144002668 width=231)
+ predicate:(ws_sold_date_sk BETWEEN DynamicValue(RS_75_date_dim_d_date_sk_min) AND DynamicValue(RS_75_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_75_date_dim_d_date_sk_bloom_filter)))
+ TableScan [TS_64] (rows=144002668 width=231)
+ default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_ext_discount_amt","ws_ext_list_price"]
+ <-Reducer 23 [BROADCAST_EDGE] vectorized
+ BROADCAST [RS_392]
+ Group By Operator [GBY_391] (rows=1 width=12)
+ Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
+ <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized
+ PARTITION_ONLY_SHUFFLE [RS_355]
+ Group By Operator [GBY_351] (rows=1 width=12)
+ Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
+ Select Operator [SEL_347] (rows=652 width=4)
+ Output:["_col0"]
+ Please refer to the previous Select Operator [SEL_339]
+ <-Reducer 6 [SIMPLE_EDGE]
+ SHUFFLE [RS_92]
+ PartitionCols:_col0
+ Merge Join Operator [MERGEJOIN_328] (rows=26666666 width=436)
+ Conds:RS_89._col0=RS_390._col0(Inner),Output:["_col0","_col2","_col4","_col6"]
+ <-Reducer 18 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_390]
+ PartitionCols:_col0
+ Group By Operator [GBY_389] (rows=42976686 width=212)
Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0
- <-Reducer 12 [SIMPLE_EDGE]
- SHUFFLE [RS_35]
+ <-Reducer 17 [SIMPLE_EDGE]
+ SHUFFLE [RS_61]
PartitionCols:_col0
- Group By Operator [GBY_34] (rows=80000000 width=212)
+ Group By Operator [GBY_60] (rows=51391963 width=212)
Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col5
- Merge Join Operator [MERGEJOIN_275] (rows=187573258 width=212)
- Conds:RS_30._col1=RS_322._col0(Inner),Output:["_col2","_col5"]
- <-Map 25 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_322]
+ Merge Join Operator [MERGEJOIN_324] (rows=51391963 width=212)
+ Conds:RS_56._col1=RS_377._col0(Inner),Output:["_col2","_col5"]
+ <-Map 26 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_377]
PartitionCols:_col0
- Please refer to the previous Select Operator [SEL_321]
- <-Reducer 11 [SIMPLE_EDGE]
- SHUFFLE [RS_30]
+ Please refer to the previous Select Operator [SEL_375]
+ <-Reducer 16 [SIMPLE_EDGE]
+ SHUFFLE [RS_56]
PartitionCols:_col1
- Merge Join Operator [MERGEJOIN_274] (rows=187573258 width=115)
- Conds:RS_320._col0=RS_293._col0(Inner),Output:["_col1","_col2"]
- <-Map 9 [SIMPLE_EDGE] vectorized
- PARTITION_ONLY_SHUFFLE [RS_293]
+ Merge Join Operator [MERGEJOIN_323] (rows=51391963 width=115)
+ Conds:RS_388._col0=RS_344._col0(Inner),Output:["_col1","_col2"]
+ <-Map 10 [SIMPLE_EDGE] vectorized
+ PARTITION_ONLY_SHUFFLE [RS_344]
PartitionCols:_col0
- Select Operator [SEL_288] (rows=652 width=4)
+ Select Operator [SEL_338] (rows=652 width=4)
Output:["_col0"]
- Filter Operator [FIL_284] (rows=652 width=8)
- predicate:(d_year = 1999)
- Please refer to the previous TableScan [TS_3]
- <-Map 24 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_320]
+ Filter Operator [FIL_334] (rows=652 width=8)
+ predicate:(_col1 = 2000)
+ Please refer to the previous Select Operator [SEL_331]
+ <-Map 27 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_388]
PartitionCols:_col0
- Select Operator [SEL_319] (rows=525327388 width=119)
+ Select Operator [SEL_387] (rows=143930993 width=119)
Output:["_col0","_col1","_col2"]
- Filter Operator [FIL_318] (rows=525327388 width=221)
- predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_28_date_dim_d_date_sk_min) AND DynamicValue(RS_28_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_28_date_dim_d_date_sk_bloom_filter)))
- TableScan [TS_19] (rows=575995635 width=221)
- default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_discount_amt","ss_ext_list_price"]
- <-Reducer 14 [BROADCAST_EDGE] vectorized
- BROADCAST [RS_317]
- Group By Operator [GBY_316] (rows=1 width=12)
- Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
- <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized
- PARTITION_ONLY_SHUFFLE [RS_304]
- Group By Operator [GBY_300] (rows=1 width=12)
- Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
- Select Operator [SEL_294] (rows=652 width=4)
- Output:["_col0"]
- Please refer to the previous Select Operator [SEL_288]
- <-Reducer 4 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_315]
- PartitionCols:_col0
- Group By Operator [GBY_314] (rows=80000000 width=484)
- Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3
- <-Reducer 3 [SIMPLE_EDGE]
- SHUFFLE [RS_16]
- PartitionCols:_col0, _col1, _col2, _col3
- Group By Operator [GBY_15] (rows=80000000 width=484)
- Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col2)"],keys:_col5, _col6, _col7, _col9
- Merge Join Operator [MERGEJOIN_273] (rows=187573258 width=484)
- Conds:RS_11._col1=RS_313._col0(Inner),Output:["_col2","_col5","_col6","_col7","_col9"]
- <-Map 23 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_313]
- PartitionCols:_col0
- Select Operator [SEL_312] (rows=80000000 width=376)
- Output:["_col0","_col1","_col2","_col3","_col5"]
- TableScan [TS_6] (rows=80000000 width=376)
- default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_customer_id","c_first_name","c_last_name","c_birth_country"]
- <-Reducer 2 [SIMPLE_EDGE]
- SHUFFLE [RS_11]
- PartitionCols:_col1
- Merge Join Operator [MERGEJOIN_272] (rows=187573258 width=115)
- Conds:RS_311._col0=RS_291._col0(Inner),Output:["_col1","_col2"]
- <-Map 9 [SIMPLE_EDGE] vectorized
- PARTITION_ONLY_SHUFFLE [RS_291]
- PartitionCols:_col0
- Select Operator [SEL_287] (rows=652 width=4)
- Output:["_col0"]
- Filter Operator [FIL_283] (rows=652 width=8)
- predicate:(d_year = 2000)
- Please refer to the previous TableScan [TS_3]
- <-Map 1 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_311]
- PartitionCols:_col0
- Select Operator [SEL_310] (rows=525327388 width=119)
- Output:["_col0","_col1","_col2"]
- Filter Operator [FIL_309] (rows=525327388 width=221)
- predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_9_date_dim_d_date_sk_min) AND DynamicValue(RS_9_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_9_date_dim_d_date_sk_bloom_filter)))
- TableScan [TS_0] (rows=575995635 width=221)
- default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_discount_amt","ss_ext_list_price"]
- <-Reducer 10 [BROADCAST_EDGE] vectorized
- BROADCAST [RS_308]
- Group By Operator [GBY_307] (rows=1 width=12)
- Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
- <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized
- PARTITION_ONLY_SHUFFLE [RS_303]
- Group By Operator [GBY_299] (rows=1 width=12)
- Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
- Select Operator [SEL_292] (rows=652 width=4)
- Output:["_col0"]
- Please refer to the previous Select Operator [SEL_287]
+ Filter Operator [FIL_386] (rows=143930993 width=231)
+ predicate:(_col0 is not null and _col1 is not null)
+ Select Operator [SEL_385] (rows=144002668 width=231)
+ Output:["_col0","_col1","_col2","_col3"]
+ Filter Operator [FIL_384] (rows=144002668 width=231)
+ predicate:(ws_sold_date_sk BETWEEN DynamicValue(RS_54_date_dim_d_date_sk_min) AND DynamicValue(RS_54_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_54_date_dim_d_date_sk_bloom_filter)))
+ TableScan [TS_43] (rows=144002668 width=231)
+ default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_ext_discount_amt","ws_ext_list_price"]
+ <-Reducer 19 [BROADCAST_EDGE] vectorized
+ BROADCAST [RS_383]
+ Group By Operator [GBY_382] (rows=1 width=12)
+ Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
+ <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized
+ PARTITION_ONLY_SHUFFLE [RS_354]
+ Group By Operator [GBY_350] (rows=1 width=12)
+ Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
+ Select Operator [SEL_345] (rows=652 width=4)
+ Output:["_col0"]
+ Please refer to the previous Select Operator [SEL_338]
+ <-Reducer 5 [SIMPLE_EDGE]
+ SHUFFLE [RS_89]
+ PartitionCols:_col0
+ Merge Join Operator [MERGEJOIN_327] (rows=26666666 width=324)
+ Conds:RS_367._col0=RS_381._col0(Inner),Output:["_col0","_col2","_col4"]
+ <-Reducer 14 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_381]
+ PartitionCols:_col0
+ Filter Operator [FIL_380] (rows=22300081 width=212)
+ predicate:(_col1 > 0)
+ Group By Operator [GBY_379] (rows=66900244 width=212)
+ Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0
+ <-Reducer 13 [SIMPLE_EDGE]
+ SHUFFLE [RS_39]
+ PartitionCols:_col0
+ Group By Operator [GBY_38] (rows=80000000 width=212)
+ Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col5
+ Merge Join Operator [MERGEJOIN_322] (rows=187573258 width=212)
+ Conds:RS_34._col1=RS_376._col0(Inner),Output:["_col2","_col5"]
+ <-Map 26 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_376]
+ PartitionCols:_col0
+ Please refer to the previous Select Operator [SEL_375]
+ <-Reducer 12 [SIMPLE_EDGE]
+ SHUFFLE [RS_34]
+ PartitionCols:_col1
+ Merge Join Operator [MERGEJOIN_321] (rows=187573258 width=115)
+ Conds:RS_374._col0=RS_342._col0(Inner),Output:["_col1","_col2"]
+ <-Map 10 [SIMPLE_EDGE] vectorized
+ PARTITION_ONLY_SHUFFLE [RS_342]
+ PartitionCols:_col0
+ Select Operator [SEL_337] (rows=652 width=4)
+ Output:["_col0"]
+ Filter Operator [FIL_333] (rows=652 width=8)
+ predicate:(_col1 = 1999)
+ Please refer to the previous Select Operator [SEL_331]
+ <-Map 25 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_374]
+ PartitionCols:_col0
+ Select Operator [SEL_373] (rows=525327388 width=119)
+ Output:["_col0","_col1","_col2"]
+ Filter Operator [FIL_372] (rows=525327388 width=221)
+ predicate:(_col0 is not null and _col1 is not null)
+ Select Operator [SEL_371] (rows=575995635 width=221)
+ Output:["_col0","_col1","_col2","_col3"]
+ Filter Operator [FIL_370] (rows=575995635 width=221)
+ predicate:(ss_sold_date_sk BETWEEN DynamicValue(RS_32_date_dim_d_date_sk_min) AND DynamicValue(RS_32_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_32_date_dim_d_date_sk_bloom_filter)))
+ TableScan [TS_21] (rows=575995635 width=221)
+ default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_discount_amt","ss_ext_list_price"]
+ <-Reducer 15 [BROADCAST_EDGE] vectorized
+ BROADCAST [RS_369]
+ Group By Operator [GBY_368] (rows=1 width=12)
+ Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
+ <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized
+ PARTITION_ONLY_SHUFFLE [RS_353]
+ Group By Operator [GBY_349] (rows=1 width=12)
+ Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
+ Select Operator [SEL_343] (rows=652 width=4)
+ Output:["_col0"]
+ Please refer to the previous Select Operator [SEL_337]
+ <-Reducer 4 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_367]
+ PartitionCols:_col0
+ Select Operator [SEL_366] (rows=80000000 width=304)
+ Output:["_col0","_col2"]
+ Group By Operator [GBY_365] (rows=80000000 width=304)
+ Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1
+ <-Reducer 3 [SIMPLE_EDGE]
+ SHUFFLE [RS_18]
+ PartitionCols:_col0, _col1
+ Group By Operator [GBY_17] (rows=80000000 width=304)
+ Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)"],keys:_col5, _col8
+ Merge Join Operator [MERGEJOIN_320] (rows=187573258 width=304)
+ Conds:RS_13._col1=RS_364._col0(Inner),Output:["_col2","_col5","_col8"]
+ <-Map 24 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_364]
+ PartitionCols:_col0
+ Select Operator [SEL_363] (rows=80000000 width=196)
+ Output:["_col0","_col1","_col4"]
+ TableScan [TS_8] (rows=80000000 width=196)
+ default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_customer_id","c_birth_country"]
+ <-Reducer 2 [SIMPLE_EDGE]
+ SHUFFLE [RS_13]
+ PartitionCols:_col1
+ Merge Join Operator [MERGEJOIN_319] (rows=187573258 width=115)
+ Conds:RS_362._col0=RS_340._col0(Inner),Output:["_col1","_col2"]
+ <-Map 10 [SIMPLE_EDGE] vectorized
+ PARTITION_ONLY_SHUFFLE [RS_340]
+ PartitionCols:_col0
+ Select Operator [SEL_336] (rows=652 width=4)
+ Output:["_col0"]
+ Filter Operator [FIL_332] (rows=652 width=8)
+ predicate:(_col1 = 2000)
+ Please refer to the previous Select Operator [SEL_331]
+ <-Map 1 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_362]
+ PartitionCols:_col0
+ Select Operator [SEL_361] (rows=525327388 width=119)
+ Output:["_col0","_col1","_col2"]
+ Filter Operator [FIL_360] (rows=525327388 width=221)
+ predicate:(_col0 is not null and _col1 is not null)
+ Select Operator [SEL_359] (rows=575995635 width=221)
+ Output:["_col0","_col1","_col2","_col3"]
+ Filter Operator [FIL_358] (rows=575995635 width=221)
+ predicate:(ss_sold_date_sk BETWEEN DynamicValue(RS_11_date_dim_d_date_sk_min) AND DynamicValue(RS_11_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_11_date_dim_d_date_sk_bloom_filter)))
+ TableScan [TS_0] (rows=575995635 width=221)
+ default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_discount_amt","ss_ext_list_price"]
+ <-Reducer 11 [BROADCAST_EDGE] vectorized
+ BROADCAST [RS_357]
+ Group By Operator [GBY_356] (rows=1 width=12)
+ Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
+ <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized
+ PARTITION_ONLY_SHUFFLE [RS_352]
+ Group By Operator [GBY_348] (rows=1 width=12)
+ Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
+ Select Operator [SEL_341] (rows=652 width=4)
+ Output:["_col0"]
+ Please refer to the previous Select Operator [SEL_336]
diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query4.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query4.q.out
index 585f4d6..c8c58a4 100644
--- a/ql/src/test/results/clientpositive/perf/tez/constraints/query4.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query4.q.out
@@ -243,390 +243,433 @@ POSTHOOK: Output: hdfs://### HDFS PATH ###
Plan optimized by CBO.
Vertex dependency in root stage
-Map 1 <- Reducer 30 (BROADCAST_EDGE)
-Map 12 <- Reducer 31 (BROADCAST_EDGE)
-Map 16 <- Reducer 32 (BROADCAST_EDGE)
-Map 20 <- Reducer 33 (BROADCAST_EDGE)
-Map 24 <- Reducer 29 (BROADCAST_EDGE)
-Map 39 <- Reducer 37 (BROADCAST_EDGE)
-Reducer 10 <- Reducer 9 (SIMPLE_EDGE)
-Reducer 13 <- Map 12 (SIMPLE_EDGE), Map 28 (SIMPLE_EDGE)
-Reducer 14 <- Map 38 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE)
-Reducer 15 <- Reducer 14 (SIMPLE_EDGE)
-Reducer 17 <- Map 16 (SIMPLE_EDGE), Map 28 (SIMPLE_EDGE)
-Reducer 18 <- Map 38 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE)
-Reducer 19 <- Reducer 18 (SIMPLE_EDGE)
-Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 28 (SIMPLE_EDGE)
-Reducer 21 <- Map 20 (SIMPLE_EDGE), Map 28 (SIMPLE_EDGE)
-Reducer 22 <- Map 38 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE)
-Reducer 23 <- Reducer 22 (SIMPLE_EDGE)
-Reducer 25 <- Map 24 (SIMPLE_EDGE), Map 28 (SIMPLE_EDGE)
-Reducer 26 <- Map 38 (SIMPLE_EDGE), Reducer 25 (SIMPLE_EDGE)
-Reducer 27 <- Reducer 26 (SIMPLE_EDGE)
-Reducer 29 <- Map 28 (CUSTOM_SIMPLE_EDGE)
-Reducer 3 <- Map 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
-Reducer 30 <- Map 28 (CUSTOM_SIMPLE_EDGE)
-Reducer 31 <- Map 28 (CUSTOM_SIMPLE_EDGE)
-Reducer 32 <- Map 28 (CUSTOM_SIMPLE_EDGE)
-Reducer 33 <- Map 28 (CUSTOM_SIMPLE_EDGE)
-Reducer 34 <- Map 28 (SIMPLE_EDGE), Map 39 (SIMPLE_EDGE)
-Reducer 35 <- Map 38 (SIMPLE_EDGE), Reducer 34 (SIMPLE_EDGE)
-Reducer 36 <- Reducer 35 (SIMPLE_EDGE)
-Reducer 37 <- Map 28 (CUSTOM_SIMPLE_EDGE)
+Map 1 <- Reducer 31 (BROADCAST_EDGE)
+Map 13 <- Reducer 32 (BROADCAST_EDGE)
+Map 17 <- Reducer 33 (BROADCAST_EDGE)
+Map 21 <- Reducer 34 (BROADCAST_EDGE)
+Map 25 <- Reducer 30 (BROADCAST_EDGE)
+Map 40 <- Reducer 38 (BROADCAST_EDGE)
+Reducer 10 <- Map 41 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE)
+Reducer 11 <- Reducer 10 (SIMPLE_EDGE)
+Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 29 (SIMPLE_EDGE)
+Reducer 15 <- Map 39 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE)
+Reducer 16 <- Reducer 15 (SIMPLE_EDGE)
+Reducer 18 <- Map 17 (SIMPLE_EDGE), Map 29 (SIMPLE_EDGE)
+Reducer 19 <- Map 39 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 29 (SIMPLE_EDGE)
+Reducer 20 <- Reducer 19 (SIMPLE_EDGE)
+Reducer 22 <- Map 21 (SIMPLE_EDGE), Map 29 (SIMPLE_EDGE)
+Reducer 23 <- Map 39 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE)
+Reducer 24 <- Reducer 23 (SIMPLE_EDGE)
+Reducer 26 <- Map 25 (SIMPLE_EDGE), Map 29 (SIMPLE_EDGE)
+Reducer 27 <- Map 39 (SIMPLE_EDGE), Reducer 26 (SIMPLE_EDGE)
+Reducer 28 <- Reducer 27 (SIMPLE_EDGE)
+Reducer 3 <- Map 12 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+Reducer 30 <- Map 29 (CUSTOM_SIMPLE_EDGE)
+Reducer 31 <- Map 29 (CUSTOM_SIMPLE_EDGE)
+Reducer 32 <- Map 29 (CUSTOM_SIMPLE_EDGE)
+Reducer 33 <- Map 29 (CUSTOM_SIMPLE_EDGE)
+Reducer 34 <- Map 29 (CUSTOM_SIMPLE_EDGE)
+Reducer 35 <- Map 29 (SIMPLE_EDGE), Map 40 (SIMPLE_EDGE)
+Reducer 36 <- Map 39 (SIMPLE_EDGE), Reducer 35 (SIMPLE_EDGE)
+Reducer 37 <- Reducer 36 (SIMPLE_EDGE)
+Reducer 38 <- Map 29 (CUSTOM_SIMPLE_EDGE)
Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
-Reducer 5 <- Reducer 15 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
-Reducer 6 <- Reducer 19 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
-Reducer 7 <- Reducer 23 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE)
-Reducer 8 <- Reducer 27 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE)
-Reducer 9 <- Reducer 36 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE)
+Reducer 5 <- Reducer 16 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
+Reducer 6 <- Reducer 20 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
+Reducer 7 <- Reducer 24 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE)
+Reducer 8 <- Reducer 28 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE)
+Reducer 9 <- Reducer 37 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE)
Stage-0
Fetch Operator
limit:100
Stage-1
- Reducer 10 vectorized
- File Output Operator [FS_556]
- Limit [LIM_555] (rows=100 width=372)
+ Reducer 11 vectorized
+ File Output Operator [FS_636]
+ Limit [LIM_635] (rows=100 width=372)
Number of rows:100
- Select Operator [SEL_554] (rows=6666666 width=372)
+ Select Operator [SEL_634] (rows=7972067 width=372)
Output:["_col0","_col1","_col2","_col3"]
- <-Reducer 9 [SIMPLE_EDGE]
- SHUFFLE [RS_135]
- Select Operator [SEL_134] (rows=6666666 width=372)
+ <-Reducer 10 [SIMPLE_EDGE]
+ SHUFFLE [RS_153]
+ Select Operator [SEL_152] (rows=7972067 width=372)
Output:["_col0","_col1","_col2","_col3"]
- Top N Key Operator [TNK_247] (rows=6666666 width=828)
- keys:_col0, _col1, _col2, _col3,top n:100
- Filter Operator [FIL_133] (rows=6666666 width=828)
- predicate:CASE WHEN (_col16) THEN (CASE WHEN (_col11) THEN (((_col8 / _col10) > (_col13 / _col15))) ELSE (false) END) ELSE (false) END
- Merge Join Operator [MERGEJOIN_464] (rows=13333333 width=828)
- Conds:RS_130._col0=RS_553._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col8","_col10","_col11","_col13","_col15","_col16"]
- <-Reducer 36 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_553]
- PartitionCols:_col0
- Select Operator [SEL_552] (rows=14325562 width=216)
- Output:["_col0","_col1","_col2"]
- Filter Operator [FIL_551] (rows=14325562 width=212)
- predicate:(_col1 > 0)
- Group By Operator [GBY_550] (rows=42976686 width=212)
- Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0
- <-Reducer 35 [SIMPLE_EDGE]
- SHUFFLE [RS_113]
- PartitionCols:_col0
- Group By Operator [GBY_112] (rows=51391963 width=212)
- Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col5
- Merge Join Operator [MERGEJOIN_459] (rows=51391963 width=212)
- Conds:RS_108._col1=RS_518._col0(Inner),Output:["_col2","_col5"]
- <-Map 38 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_518]
+ Top N Key Operator [TNK_285] (rows=7972067 width=372)
+ keys:_col0, _col2, _col3, _col4,top n:100
+ Merge Join Operator [MERGEJOIN_528] (rows=7972067 width=372)
+ Conds:RS_149._col0=RS_633._col0(Inner),Output:["_col0","_col2","_col3","_col4"]
+ <-Map 41 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_633]
+ PartitionCols:_col0
+ Select Operator [SEL_632] (rows=80000000 width=372)
+ Output:["_col0","_col1","_col2","_col3"]
+ TableScan [TS_147] (rows=80000000 width=372)
+ default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_id","c_first_name","c_last_name","c_birth_country"]
+ <-Reducer 9 [SIMPLE_EDGE]
+ SHUFFLE [RS_149]
+ PartitionCols:_col0
+ Select Operator [SEL_146] (rows=6666666 width=100)
+ Output:["_col0"]
+ Filter Operator [FIL_145] (rows=6666666 width=556)
+ predicate:CASE WHEN (_col14) THEN (CASE WHEN (_col9) THEN (((_col6 / _col8) > (_col11 / _col13))) ELSE (false) END) ELSE (false) END
+ Merge Join Operator [MERGEJOIN_527] (rows=13333333 width=556)
+ Conds:RS_142._col0=RS_631._col0(Inner),Output:["_col0","_col6","_col8","_col9","_col11","_col13","_col14"]
+ <-Reducer 37 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_631]
+ PartitionCols:_col0
+ Select Operator [SEL_630] (rows=14325562 width=216)
+ Output:["_col0","_col1","_col2"]
+ Filter Operator [FIL_629] (rows=14325562 width=212)
+ predicate:(_col1 > 0)
+ Group By Operator [GBY_628] (rows=42976686 width=212)
+ Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0
+ <-Reducer 36 [SIMPLE_EDGE]
+ SHUFFLE [RS_125]
PartitionCols:_col0
- Select Operator [SEL_513] (rows=80000000 width=104)
- Output:["_col0","_col1"]
- TableScan [TS_84] (rows=80000000 width=104)
- default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_customer_id"]
- <-Reducer 34 [SIMPLE_EDGE]
- SHUFFLE [RS_108]
- PartitionCols:_col1
- Merge Join Operator [MERGEJOIN_458] (rows=51391963 width=115)
- Conds:RS_549._col0=RS_485._col0(Inner),Output:["_col1","_col2"]
- <-Map 28 [SIMPLE_EDGE] vectorized
- PARTITION_ONLY_SHUFFLE [RS_485]
- PartitionCols:_col0
- Select Operator [SEL_474] (rows=652 width=4)
- Output:["_col0"]
- Filter Operator [FIL_469] (rows=652 width=8)
- predicate:(d_year = 1999)
- TableScan [TS_81] (rows=73049 width=8)
- default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"]
- <-Map 39 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_549]
- PartitionCols:_col0
- Select Operator [SEL_548] (rows=143930993 width=119)
- Output:["_col0","_col1","_col2"]
- Filter Operator [FIL_547] (rows=143930993 width=455)
- predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_106_date_dim_d_date_sk_min) AND DynamicValue(RS_106_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_106_date_dim_d_date_sk_bloom_filter)))
- TableScan [TS_97] (rows=144002668 width=455)
- default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_ext_discount_amt","ws_ext_sales_price","ws_ext_wholesale_cost","ws_ext_list_price"]
- <-Reducer 37 [BROADCAST_EDGE] vectorized
- BROADCAST [RS_546]
- Group By Operator [GBY_545] (rows=1 width=12)
- Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
- <-Map 28 [CUSTOM_SIMPLE_EDGE] vectorized
- PARTITION_ONLY_SHUFFLE [RS_498]
- Group By Operator [GBY_492] (rows=1 width=12)
- Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
- Select Operator [SEL_486] (rows=652 width=4)
- Output:["_col0"]
- Please refer to the previous Select Operator [SEL_474]
- <-Reducer 8 [SIMPLE_EDGE]
- SHUFFLE [RS_130]
- PartitionCols:_col0
- Merge Join Operator [MERGEJOIN_463] (rows=13333333 width=712)
- Conds:RS_127._col0=RS_544._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col8","_col10","_col11","_col13"]
- <-Reducer 27 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_544]
- PartitionCols:_col0
- Group By Operator [GBY_543] (rows=42976686 width=212)
- Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0
- <-Reducer 26 [SIMPLE_EDGE]
- SHUFFLE [RS_94]
- PartitionCols:_col0
- Group By Operator [GBY_93] (rows=51391963 width=212)
- Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col5
- Merge Join Operator [MERGEJOIN_457] (rows=51391963 width=212)
- Conds:RS_89._col1=RS_514._col0(Inner),Output:["_col2","_col5"]
- <-Map 38 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_514]
- PartitionCols:_col0
- Please refer to the previous Select Operator [SEL_513]
- <-Reducer 25 [SIMPLE_EDGE]
- SHUFFLE [RS_89]
- PartitionCols:_col1
- Merge Join Operator [MERGEJOIN_456] (rows=51391963 width=115)
- Conds:RS_542._col0=RS_475._col0(Inner),Output:["_col1","_col2"]
- <-Map 28 [SIMPLE_EDGE] vectorized
- PARTITION_ONLY_SHUFFLE [RS_475]
- PartitionCols:_col0
- Select Operator [SEL_470] (rows=652 width=4)
- Output:["_col0"]
- Filter Operator [FIL_465] (rows=652 width=8)
- predicate:(d_year = 2000)
- Please refer to the previous TableScan [TS_81]
- <-Map 24 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_542]
- PartitionCols:_col0
- Select Operator [SEL_541] (rows=143930993 width=119)
- Output:["_col0","_col1","_col2"]
- Filter Operator [FIL_540] (rows=143930993 width=455)
- predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_87_date_dim_d_date_sk_min) AND DynamicValue(RS_87_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_87_date_dim_d_date_sk_bloom_filter)))
- TableScan [TS_78] (rows=144002668 width=455)
- default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_ext_discount_amt","ws_ext_sales_price","ws_ext_wholesale_cost","ws_ext_list_price"]
- <-Reducer 29 [BROADCAST_EDGE] vectorized
- BROADCAST [RS_539]
- Group By Operator [GBY_538] (rows=1 width=12)
- Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
- <-Map 28 [CUSTOM_SIMPLE_EDGE] vectorized
- PARTITION_ONLY_SHUFFLE [RS_493]
- Group By Operator [GBY_487] (rows=1 width=12)
- Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
- Select Operator [SEL_476] (rows=652 width=4)
- Output:["_col0"]
- Please refer to the previous Select Operator [SEL_470]
- <-Reducer 7 [SIMPLE_EDGE]
- SHUFFLE [RS_127]
- PartitionCols:_col0
- Filter Operator [FIL_126] (rows=13333333 width=824)
- predicate:CASE WHEN (_col6 is not null) THEN (CASE WHEN (_col11) THEN (((_col8 / _col10) > (_col4 / _col6))) ELSE (false) END) ELSE (false) END
- Merge Join Operator [MERGEJOIN_462] (rows=26666666 width=824)
- Conds:RS_123._col0=RS_537._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col6","_col8","_col10","_col11"]
- <-Reducer 23 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_537]
- PartitionCols:_col0
- Select Operator [SEL_536] (rows=22300081 width=216)
- Output:["_col0","_col1","_col2"]
- Filter Operator [FIL_535] (rows=22300081 width=212)
- predicate:(_col1 > 0)
- Group By Operator [GBY_534] (rows=66900244 width=212)
- Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0
- <-Reducer 22 [SIMPLE_EDGE]
- SHUFFLE [RS_74]
- PartitionCols:_col0
- Group By Operator [GBY_73] (rows=80000000 width=212)
- Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col5
- Merge Join Operator [MERGEJOIN_455] (rows=101084444 width=212)
- Conds:RS_69._col1=RS_517._col0(Inner),Output:["_col2","_col5"]
- <-Map 38 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_517]
+ Group By Operator [GBY_124] (rows=51391963 width=212)
+ Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col5
+ Merge Join Operator [MERGEJOIN_522] (rows=51391963 width=212)
+ Conds:RS_120._col1=RS_588._col0(Inner),Output:["_col2","_col5"]
+ <-Map 39 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_588]
+ PartitionCols:_col0
+ Select Operator [SEL_583] (rows=80000000 width=104)
+ Output:["_col0","_col1"]
+ TableScan [TS_94] (rows=80000000 width=104)
+ default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_customer_id"]
+ <-Reducer 35 [SIMPLE_EDGE]
+ SHUFFLE [RS_120]
+ PartitionCols:_col1
+ Merge Join Operator [MERGEJOIN_521] (rows=51391963 width=115)
+ Conds:RS_627._col0=RS_550._col0(Inner),Output:["_col1","_col2"]
+ <-Map 29 [SIMPLE_EDGE] vectorized
+ PARTITION_ONLY_SHUFFLE [RS_550]
+ PartitionCols:_col0
+ Select Operator [SEL_539] (rows=652 width=4)
+ Output:["_col0"]
+ Filter Operator [FIL_534] (rows=652 width=8)
+ predicate:(_col1 = 1999)
+ Select Operator [SEL_529] (rows=73049 width=8)
+ Output:["_col0","_col1"]
+ TableScan [TS_90] (rows=73049 width=8)
+ default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"]
+ <-Map 40 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_627]
PartitionCols:_col0
- Please refer to the previous Select Operator [SEL_513]
- <-Reducer 21 [SIMPLE_EDGE]
- SHUFFLE [RS_69]
- PartitionCols:_col1
- Merge Join Operator [MERGEJOIN_454] (rows=101084444 width=115)
- Conds:RS_533._col0=RS_483._col0(Inner),Output:["_col1","_col2"]
- <-Map 28 [SIMPLE_EDGE] vectorized
- PARTITION_ONLY_SHUFFLE [RS_483]
- PartitionCols:_col0
- Select Operator [SEL_473] (rows=652 width=4)
- Output:["_col0"]
- Filter Operator [FIL_468] (rows=652 width=8)
- predicate:(d_year = 1999)
- Please refer to the previous TableScan [TS_81]
- <-Map 20 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_533]
- PartitionCols:_col0
- Select Operator [SEL_532] (rows=285117831 width=119)
- Output:["_col0","_col1","_col2"]
- Filter Operator [FIL_531] (rows=285117831 width=453)
- predicate:(cs_sold_date_sk is not null and cs_bill_customer_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_67_date_dim_d_date_sk_min) AND DynamicValue(RS_67_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_67_date_dim_d_date_sk_bloom_filter)))
- TableScan [TS_58] (rows=287989836 width=453)
- default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_ext_discount_amt","cs_ext_sales_price","cs_ext_wholesale_cost","cs_ext_list_price"]
- <-Reducer 33 [BROADCAST_EDGE] vectorized
- BROADCAST [RS_530]
- Group By Operator [GBY_529] (rows=1 width=12)
+ Select Operator [SEL_626] (rows=143930993 width=119)
+ Output:["_col0","_col1","_col2"]
+ Filter Operator [FIL_625] (rows=143930993 width=455)
+ predicate:(_col0 is not null and _col1 is not null)
+ Select Operator [SEL_624] (rows=144002668 width=455)
+ Output:["_col0","_col1","_col2","_col3","_col4","_col5"]
+ Filter Operator [FIL_623] (rows=144002668 width=455)
+ predicate:(ws_sold_date_sk BETWEEN DynamicValue(RS_118_date_dim_d_date_sk_min) AND DynamicValue(RS_118_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_118_date_dim_d_date_sk_bloom_filter)))
+ TableScan [TS_107] (rows=144002668 width=455)
+ default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_ext_discount_amt","ws_ext_sales_price","ws_ext_wholesale_cost","ws_ext_list_price"]
+ <-Reducer 38 [BROADCAST_EDGE] vectorized
+ BROADCAST [RS_622]
+ Group By Operator [GBY_621] (rows=1 width=12)
Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
- <-Map 28 [CUSTOM_SIMPLE_EDGE] vectorized
- PARTITION_ONLY_SHUFFLE [RS_497]
- Group By Operator [GBY_491] (rows=1 width=12)
+ <-Map 29 [CUSTOM_SIMPLE_EDGE] vectorized
+ PARTITION_ONLY_SHUFFLE [RS_563]
+ Group By Operator [GBY_557] (rows=1 width=12)
Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
- Select Operator [SEL_484] (rows=652 width=4)
+ Select Operator [SEL_551] (rows=652 width=4)
Output:["_col0"]
- Please refer to the previous Select Operator [SEL_473]
- <-Reducer 6 [SIMPLE_EDGE]
- SHUFFLE [RS_123]
+ Please refer to the previous Select Operator [SEL_539]
+ <-Reducer 8 [SIMPLE_EDGE]
+ SHUFFLE [RS_142]
+ PartitionCols:_col0
+ Merge Join Operator [MERGEJOIN_526] (rows=13333333 width=440)
+ Conds:RS_139._col0=RS_620._col0(Inner),Output:["_col0","_col6","_col8","_col9","_col11"]
+ <-Reducer 28 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_620]
PartitionCols:_col0
- Merge Join Operator [MERGEJOIN_461] (rows=26666666 width=708)
- Conds:RS_120._col0=RS_528._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col6","_col8"]
- <-Reducer 19 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_528]
+ Group By Operator [GBY_619] (rows=42976686 width=212)
+ Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0
+ <-Reducer 27 [SIMPLE_EDGE]
+ SHUFFLE [RS_104]
PartitionCols:_col0
- Group By Operator [GBY_527] (rows=66900244 width=212)
- Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0
- <-Reducer 18 [SIMPLE_EDGE]
- SHUFFLE [RS_55]
- PartitionCols:_col0
- Group By Operator [GBY_54] (rows=80000000 width=212)
- Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col5
- Merge Join Operator [MERGEJOIN_453] (rows=101084444 width=212)
- Conds:RS_50._col1=RS_516._col0(Inner),Output:["_col2","_col5"]
- <-Map 38 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_516]
+ Group By Operator [GBY_103] (rows=51391963 width=212)
+ Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col5
+ Merge Join Operator [MERGEJOIN_520] (rows=51391963 width=212)
+ Conds:RS_99._col1=RS_584._col0(Inner),Output:["_col2","_col5"]
+ <-Map 39 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_584]
+ PartitionCols:_col0
+ Please refer to the previous Select Operator [SEL_583]
+ <-Reducer 26 [SIMPLE_EDGE]
+ SHUFFLE [RS_99]
+ PartitionCols:_col1
+ Merge Join Operator [MERGEJOIN_519] (rows=51391963 width=115)
+ Conds:RS_618._col0=RS_540._col0(Inner),Output:["_col1","_col2"]
+ <-Map 29 [SIMPLE_EDGE] vectorized
+ PARTITION_ONLY_SHUFFLE [RS_540]
PartitionCols:_col0
- Please refer to the previous Select Operator [SEL_513]
- <-Reducer 17 [SIMPLE_EDGE]
- SHUFFLE [RS_50]
- PartitionCols:_col1
- Merge Join Operator [MERGEJOIN_452] (rows=101084444 width=115)
- Conds:RS_526._col0=RS_481._col0(Inner),Output:["_col1","_col2"]
- <-Map 28 [SIMPLE_EDGE] vectorized
- PARTITION_ONLY_SHUFFLE [RS_481]
- PartitionCols:_col0
- Select Operator [SEL_472] (rows=652 width=4)
- Output:["_col0"]
- Filter Operator [FIL_467] (rows=652 width=8)
- predicate:(d_year = 2000)
- Please refer to the previous TableScan [TS_81]
- <-Map 16 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_526]
- PartitionCols:_col0
- Select Operator [SEL_525] (rows=285117831 width=119)
- Output:["_col0","_col1","_col2"]
- Filter Operator [FIL_524] (rows=285117831 width=453)
- predicate:(cs_sold_date_sk is not null and cs_bill_customer_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_48_date_dim_d_date_sk_min) AND DynamicValue(RS_48_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_48_date_dim_d_date_sk_bloom_filter)))
- TableScan [TS_39] (rows=287989836 width=453)
- default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_ext_discount_amt","cs_ext_sales_price","cs_ext_wholesale_cost","cs_ext_list_price"]
- <-Reducer 32 [BROADCAST_EDGE] vectorized
- BROADCAST [RS_523]
- Group By Operator [GBY_522] (rows=1 width=12)
+ Select Operator [SEL_535] (rows=652 width=4)
+ Output:["_col0"]
+ Filter Operator [FIL_530] (rows=652 width=8)
+ predicate:(_col1 = 2000)
+ Please refer to the previous Select Operator [SEL_529]
+ <-Map 25 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_618]
+ PartitionCols:_col0
+ Select Operator [SEL_617] (rows=143930993 width=119)
+ Output:["_col0","_col1","_col2"]
+ Filter Operator [FIL_616] (rows=143930993 width=455)
+ predicate:(_col0 is not null and _col1 is not null)
+ Select Operator [SEL_615] (rows=144002668 width=455)
+ Output:["_col0","_col1","_col2","_col3","_col4","_col5"]
+ Filter Operator [FIL_614] (rows=144002668 width=455)
+ predicate:(ws_sold_date_sk BETWEEN DynamicValue(RS_97_date_dim_d_date_sk_min) AND DynamicValue(RS_97_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_97_date_dim_d_date_sk_bloom_filter)))
+ TableScan [TS_86] (rows=144002668 width=455)
+ default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_ext_discount_amt","ws_ext_sales_price","ws_ext_wholesale_cost","ws_ext_list_price"]
+ <-Reducer 30 [BROADCAST_EDGE] vectorized
+ BROADCAST [RS_613]
+ Group By Operator [GBY_612] (rows=1 width=12)
Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
- <-Map 28 [CUSTOM_SIMPLE_EDGE] vectorized
- PARTITION_ONLY_SHUFFLE [RS_496]
- Group By Operator [GBY_490] (rows=1 width=12)
+ <-Map 29 [CUSTOM_SIMPLE_EDGE] vectorized
+ PARTITION_ONLY_SHUFFLE [RS_558]
+ Group By Operator [GBY_552] (rows=1 width=12)
Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
- Select Operator [SEL_482] (rows=652 width=4)
+ Select Operator [SEL_541] (rows=652 width=4)
Output:["_col0"]
- Please refer to the previous Select Operator [SEL_472]
- <-Reducer 5 [SIMPLE_EDGE]
- SHUFFLE [RS_120]
- PartitionCols:_col0
- Merge Join Operator [MERGEJOIN_460] (rows=26666666 width=596)
- Conds:RS_507._col0=RS_521._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col6"]
- <-Reducer 15 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_521]
- PartitionCols:_col0
- Filter Operator [FIL_520] (rows=22300081 width=212)
+ Please refer to the previous Select Operator [SEL_535]
+ <-Reducer 7 [SIMPLE_EDGE]
+ SHUFFLE [RS_139]
+ PartitionCols:_col0
+ Filter Operator [FIL_138] (rows=13333333 width=552)
+ predicate:CASE WHEN (_col4 is not null) THEN (CASE WHEN (_col9) THEN (((_col6 / _col8) > (_col2 / _col4))) ELSE (false) END) ELSE (false) END
+ Merge Join Operator [MERGEJOIN_525] (rows=26666666 width=552)
+ Conds:RS_135._col0=RS_611._col0(Inner),Output:["_col0","_col2","_col4","_col6","_col8","_col9"]
+ <-Reducer 24 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_611]
+ PartitionCols:_col0
+ Select Operator [SEL_610] (rows=22300081 width=216)
+ Output:["_col0","_col1","_col2"]
+ Filter Operator [FIL_609] (rows=22300081 width=212)
predicate:(_col1 > 0)
- Group By Operator [GBY_519] (rows=66900244 width=212)
+ Group By Operator [GBY_608] (rows=66900244 width=212)
+ Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0
+ <-Reducer 23 [SIMPLE_EDGE]
+ SHUFFLE [RS_82]
+ PartitionCols:_col0
+ Group By Operator [GBY_81] (rows=80000000 width=212)
+ Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col5
+ Merge Join Operator [MERGEJOIN_518] (rows=101084444 width=212)
+ Conds:RS_77._col1=RS_587._col0(Inner),Output:["_col2","_col5"]
+ <-Map 39 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_587]
+ PartitionCols:_col0
+ Please refer to the previous Select Operator [SEL_583]
+ <-Reducer 22 [SIMPLE_EDGE]
+ SHUFFLE [RS_77]
+ PartitionCols:_col1
+ Merge Join Operator [MERGEJOIN_517] (rows=101084444 width=115)
+ Conds:RS_607._col0=RS_548._col0(Inner),Output:["_col1","_col2"]
+ <-Map 29 [SIMPLE_EDGE] vectorized
+ PARTITION_ONLY_SHUFFLE [RS_548]
+ PartitionCols:_col0
+ Select Operator [SEL_538] (rows=652 width=4)
+ Output:["_col0"]
+ Filter Operator [FIL_533] (rows=652 width=8)
+ predicate:(_col1 = 1999)
+ Please refer to the previous Select Operator [SEL_529]
+ <-Map 21 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_607]
+ PartitionCols:_col0
+ Select Operator [SEL_606] (rows=285117831 width=119)
+ Output:["_col0","_col1","_col2"]
+ Filter Operator [FIL_605] (rows=285117831 width=453)
+ predicate:(_col0 is not null and _col1 is not null)
+ Select Operator [SEL_604] (rows=287989836 width=453)
+ Output:["_col0","_col1","_col2","_col3","_col4","_col5"]
+ Filter Operator [FIL_603] (rows=287989836 width=453)
+ predicate:(cs_sold_date_sk BETWEEN DynamicValue(RS_75_date_dim_d_date_sk_min) AND DynamicValue(RS_75_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_75_date_dim_d_date_sk_bloom_filter)))
+ TableScan [TS_64] (rows=287989836 width=453)
+ default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_ext_discount_amt","cs_ext_sales_price","cs_ext_wholesale_cost","cs_ext_list_price"]
+ <-Reducer 34 [BROADCAST_EDGE] vectorized
+ BROADCAST [RS_602]
+ Group By Operator [GBY_601] (rows=1 width=12)
+ Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
+ <-Map 29 [CUSTOM_SIMPLE_EDGE] vectorized
+ PARTITION_ONLY_SHUFFLE [RS_562]
+ Group By Operator [GBY_556] (rows=1 width=12)
+ Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
+ Select Operator [SEL_549] (rows=652 width=4)
+ Output:["_col0"]
+ Please refer to the previous Select Operator [SEL_538]
+ <-Reducer 6 [SIMPLE_EDGE]
+ SHUFFLE [RS_135]
+ PartitionCols:_col0
+ Merge Join Operator [MERGEJOIN_524] (rows=26666666 width=436)
+ Conds:RS_132._col0=RS_600._col0(Inner),Output:["_col0","_col2","_col4","_col6"]
+ <-Reducer 20 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_600]
+ PartitionCols:_col0
+ Group By Operator [GBY_599] (rows=66900244 width=212)
Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0
- <-Reducer 14 [SIMPLE_EDGE]
- SHUFFLE [RS_35]
+ <-Reducer 19 [SIMPLE_EDGE]
+ SHUFFLE [RS_61]
PartitionCols:_col0
- Group By Operator [GBY_34] (rows=80000000 width=212)
+ Group By Operator [GBY_60] (rows=80000000 width=212)
Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col5
- Merge Join Operator [MERGEJOIN_451] (rows=187573258 width=212)
- Conds:RS_30._col1=RS_515._col0(Inner),Output:["_col2","_col5"]
- <-Map 38 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_515]
+ Merge Join Operator [MERGEJOIN_516] (rows=101084444 width=212)
+ Conds:RS_56._col1=RS_586._col0(Inner),Output:["_col2","_col5"]
+ <-Map 39 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_586]
PartitionCols:_col0
- Please refer to the previous Select Operator [SEL_513]
- <-Reducer 13 [SIMPLE_EDGE]
- SHUFFLE [RS_30]
+ Please refer to the previous Select Operator [SEL_583]
+ <-Reducer 18 [SIMPLE_EDGE]
+ SHUFFLE [RS_56]
PartitionCols:_col1
- Merge Join Operator [MERGEJOIN_450] (rows=187573258 width=115)
- Conds:RS_512._col0=RS_479._col0(Inner),Output:["_col1","_col2"]
- <-Map 28 [SIMPLE_EDGE] vectorized
- PARTITION_ONLY_SHUFFLE [RS_479]
+ Merge Join Operator [MERGEJOIN_515] (rows=101084444 width=115)
+ Conds:RS_598._col0=RS_546._col0(Inner),Output:["_col1","_col2"]
+ <-Map 29 [SIMPLE_EDGE] vectorized
+ PARTITION_ONLY_SHUFFLE [RS_546]
PartitionCols:_col0
- Select Operator [SEL_471] (rows=652 width=4)
+ Select Operator [SEL_537] (rows=652 width=4)
Output:["_col0"]
- Filter Operator [FIL_466] (rows=652 width=8)
- predicate:(d_year = 1999)
- Please refer to the previous TableScan [TS_81]
- <-Map 12 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_512]
+ Filter Operator [FIL_532] (rows=652 width=8)
+ predicate:(_col1 = 2000)
+ Please refer to the previous Select Operator [SEL_529]
+ <-Map 17 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_598]
PartitionCols:_col0
- Select Operator [SEL_511] (rows=525327388 width=119)
+ Select Operator [SEL_597] (rows=285117831 width=119)
Output:["_col0","_col1","_col2"]
- Filter Operator [FIL_510] (rows=525327388 width=435)
- predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_28_date_dim_d_date_sk_min) AND DynamicValue(RS_28_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_28_date_dim_d_date_sk_bloom_filter)))
- TableScan [TS_19] (rows=575995635 width=435)
- default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_discount_amt","ss_ext_sales_price","ss_ext_wholesale_cost","ss_ext_list_price"]
- <-Reducer 31 [BROADCAST_EDGE] vectorized
- BROADCAST [RS_509]
- Group By Operator [GBY_508] (rows=1 width=12)
- Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
- <-Map 28 [CUSTOM_SIMPLE_EDGE] vectorized
- PARTITION_ONLY_SHUFFLE [RS_495]
- Group By Operator [GBY_489] (rows=1 width=12)
- Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
- Select Operator [SEL_480] (rows=652 width=4)
- Output:["_col0"]
- Please refer to the previous Select Operator [SEL_471]
- <-Reducer 4 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_507]
- PartitionCols:_col0
- Group By Operator [GBY_506] (rows=80000000 width=484)
- Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3
- <-Reducer 3 [SIMPLE_EDGE]
- SHUFFLE [RS_16]
- PartitionCols:_col0, _col1, _col2, _col3
- Group By Operator [GBY_15] (rows=80000000 width=484)
- Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col2)"],keys:_col5, _col6, _col7, _col9
- Merge Join Operator [MERGEJOIN_449] (rows=187573258 width=484)
- Conds:RS_11._col1=RS_505._col0(Inner),Output:["_col2","_col5","_col6","_col7","_col9"]
- <-Map 11 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_505]
- PartitionCols:_col0
- Select Operator [SEL_504] (rows=80000000 width=376)
- Output:["_col0","_col1","_col2","_col3","_col5"]
- TableScan [TS_6] (rows=80000000 width=376)
- default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_customer_id","c_first_name","c_last_name","c_birth_country"]
- <-Reducer 2 [SIMPLE_EDGE]
- SHUFFLE [RS_11]
- PartitionCols:_col1
- Merge Join Operator [MERGEJOIN_448] (rows=187573258 width=115)
- Conds:RS_503._col0=RS_477._col0(Inner),Output:["_col1","_col2"]
- <-Map 28 [SIMPLE_EDGE] vectorized
- PARTITION_ONLY_SHUFFLE [RS_477]
- PartitionCols:_col0
- Please refer to the previous Select Operator [SEL_470]
- <-Map 1 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_503]
- PartitionCols:_col0
- Select Operator [SEL_502] (rows=525327388 width=119)
- Output:["_col0","_col1","_col2"]
- Filter Operator [FIL_501] (rows=525327388 width=435)
- predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_9_date_dim_d_date_sk_min) AND DynamicValue(RS_9_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_9_date_dim_d_date_sk_bloom_filter)))
- TableScan [TS_0] (rows=575995635 width=435)
- default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_discount_amt","ss_ext_sales_price","ss_ext_wholesale_cost","ss_ext_list_price"]
- <-Reducer 30 [BROADCAST_EDGE] vectorized
- BROADCAST [RS_500]
- Group By Operator [GBY_499] (rows=1 width=12)
- Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
- <-Map 28 [CUSTOM_SIMPLE_EDGE] vectorized
- PARTITION_ONLY_SHUFFLE [RS_494]
- Group By Operator [GBY_488] (rows=1 width=12)
- Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
- Select Operator [SEL_478] (rows=652 width=4)
- Output:["_col0"]
- Please refer to the previous Select Operator [SEL_470]
+ Filter Operator [FIL_596] (rows=285117831 width=453)
+ predicate:(_col0 is not null and _col1 is not null)
+ Select Operator [SEL_595] (rows=287989836 width=453)
+ Output:["_col0","_col1","_col2","_col3","_col4","_col5"]
+ Filter Operator [FIL_594] (rows=287989836 width=453)
+ predicate:(cs_sold_date_sk BETWEEN DynamicValue(RS_54_date_dim_d_date_sk_min) AND DynamicValue(RS_54_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_54_date_dim_d_date_sk_bloom_filter)))
+ TableScan [TS_43] (rows=287989836 width=453)
+ default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_ext_discount_amt","cs_ext_sales_price","cs_ext_wholesale_cost","cs_ext_list_price"]
+ <-Reducer 33 [BROADCAST_EDGE] vectorized
+ BROADCAST [RS_593]
+ Group By Operator [GBY_592] (rows=1 width=12)
+ Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
+ <-Map 29 [CUSTOM_SIMPLE_EDGE] vectorized
+ PARTITION_ONLY_SHUFFLE [RS_561]
+ Group By Operator [GBY_555] (rows=1 width=12)
+ Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
+ Select Operator [SEL_547] (rows=652 width=4)
+ Output:["_col0"]
+ Please refer to the previous Select Operator [SEL_537]
+ <-Reducer 5 [SIMPLE_EDGE]
+ SHUFFLE [RS_132]
+ PartitionCols:_col0
+ Merge Join Operator [MERGEJOIN_523] (rows=26666666 width=324)
+ Conds:RS_575._col0=RS_591._col0(Inner),Output:["_col0","_col2","_col4"]
+ <-Reducer 16 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_591]
+ PartitionCols:_col0
+ Filter Operator [FIL_590] (rows=22300081 width=212)
+ predicate:(_col1 > 0)
+ Group By Operator [GBY_589] (rows=66900244 width=212)
+ Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0
+ <-Reducer 15 [SIMPLE_EDGE]
+ SHUFFLE [RS_39]
+ PartitionCols:_col0
+ Group By Operator [GBY_38] (rows=80000000 width=212)
+ Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col5
+ Merge Join Operator [MERGEJOIN_514] (rows=187573258 width=212)
+ Conds:RS_34._col1=RS_585._col0(Inner),Output:["_col2","_col5"]
+ <-Map 39 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_585]
+ PartitionCols:_col0
+ Please refer to the previous Select Operator [SEL_583]
+ <-Reducer 14 [SIMPLE_EDGE]
+ SHUFFLE [RS_34]
+ PartitionCols:_col1
+ Merge Join Operator [MERGEJOIN_513] (rows=187573258 width=115)
+ Conds:RS_582._col0=RS_544._col0(Inner),Output:["_col1","_col2"]
+ <-Map 29 [SIMPLE_EDGE] vectorized
+ PARTITION_ONLY_SHUFFLE [RS_544]
+ PartitionCols:_col0
+ Select Operator [SEL_536] (rows=652 width=4)
+ Output:["_col0"]
+ Filter Operator [FIL_531] (rows=652 width=8)
+ predicate:(_col1 = 1999)
+ Please refer to the previous Select Operator [SEL_529]
+ <-Map 13 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_582]
+ PartitionCols:_col0
+ Select Operator [SEL_581] (rows=525327388 width=119)
+ Output:["_col0","_col1","_col2"]
+ Filter Operator [FIL_580] (rows=525327388 width=435)
+ predicate:(_col0 is not null and _col1 is not null)
+ Select Operator [SEL_579] (rows=575995635 width=435)
+ Output:["_col0","_col1","_col2","_col3","_col4","_col5"]
+ Filter Operator [FIL_578] (rows=575995635 width=435)
+ predicate:(ss_sold_date_sk BETWEEN DynamicValue(RS_32_date_dim_d_date_sk_min) AND DynamicValue(RS_32_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_32_date_dim_d_date_sk_bloom_filter)))
+ TableScan [TS_21] (rows=575995635 width=435)
+ default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_discount_amt","ss_ext_sales_price","ss_ext_wholesale_cost","ss_ext_list_price"]
+ <-Reducer 32 [BROADCAST_EDGE] vectorized
+ BROADCAST [RS_577]
+ Group By Operator [GBY_576] (rows=1 width=12)
+ Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
+ <-Map 29 [CUSTOM_SIMPLE_EDGE] vectorized
+ PARTITION_ONLY_SHUFFLE [RS_560]
+ Group By Operator [GBY_554] (rows=1 width=12)
+ Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
+ Select Operator [SEL_545] (rows=652 width=4)
+ Output:["_col0"]
+ Please refer to the previous Select Operator [SEL_536]
+ <-Reducer 4 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_575]
+ PartitionCols:_col0
+ Select Operator [SEL_574] (rows=80000000 width=304)
+ Output:["_col0","_col2"]
+ Group By Operator [GBY_573] (rows=80000000 width=304)
+ Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1
+ <-Reducer 3 [SIMPLE_EDGE]
+ SHUFFLE [RS_18]
+ PartitionCols:_col0, _col1
+ Group By Operator [GBY_17] (rows=80000000 width=304)
+ Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)"],keys:_col5, _col8
+ Merge Join Operator [MERGEJOIN_512] (rows=187573258 width=304)
+ Conds:RS_13._col1=RS_572._col0(Inner),Output:["_col2","_col5","_col8"]
+ <-Map 12 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_572]
+ PartitionCols:_col0
+ Select Operator [SEL_571] (rows=80000000 width=196)
+ Output:["_col0","_col1","_col4"]
+ TableScan [TS_8] (rows=80000000 width=196)
+ default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_customer_id","c_birth_country"]
+ <-Reducer 2 [SIMPLE_EDGE]
+ SHUFFLE [RS_13]
+ PartitionCols:_col1
+ Merge Join Operator [MERGEJOIN_511] (rows=187573258 width=115)
+ Conds:RS_570._col0=RS_542._col0(Inner),Output:["_col1","_col2"]
+ <-Map 29 [SIMPLE_EDGE] vectorized
+ PARTITION_ONLY_SHUFFLE [RS_542]
+ PartitionCols:_col0
+ Please refer to the previous Select Operator [SEL_535]
+ <-Map 1 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_570]
+ PartitionCols:_col0
+ Select Operator [SEL_569] (rows=525327388 width=119)
+ Output:["_col0","_col1","_col2"]
+ Filter Operator [FIL_568] (rows=525327388 width=435)
+ predicate:(_col0 is not null and _col1 is not null)
+ Select Operator [SEL_567] (rows=575995635 width=435)
+ Output:["_col0","_col1","_col2","_col3","_col4","_col5"]
+ Filter Operator [FIL_566] (rows=575995635 width=435)
+ predicate:(ss_sold_date_sk BETWEEN DynamicValue(RS_11_date_dim_d_date_sk_min) AND DynamicValue(RS_11_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_11_date_dim_d_date_sk_bloom_filter)))
+ TableScan [TS_0] (rows=575995635 width=435)
+ default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_discount_amt","ss_ext_sales_price","ss_ext_wholesale_cost","ss_ext_list_price"]
+ <-Reducer 31 [BROADCAST_EDGE] vectorized
+ BROADCAST [RS_565]
+ Group By Operator [GBY_564] (rows=1 width=12)
+ Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
+ <-Map 29 [CUSTOM_SIMPLE_EDGE] vectorized
+ PARTITION_ONLY_SHUFFLE [RS_559]
+ Group By Operator [GBY_553] (rows=1 width=12)
+ Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
+ Select Operator [SEL_543] (rows=652 width=4)
+ Output:["_col0"]
+ Please refer to the previous Select Operator [SEL_535]
diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query74.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query74.q.out
index 39c76fc..af7b8b0 100644
--- a/ql/src/test/results/clientpositive/perf/tez/constraints/query74.q.out
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query74.q.out
@@ -131,268 +131,290 @@ POSTHOOK: Output: hdfs://### HDFS PATH ###
Plan optimized by CBO.
Vertex dependency in root stage
-Map 1 <- Reducer 10 (BROADCAST_EDGE)
-Map 24 <- Reducer 14 (BROADCAST_EDGE)
-Map 26 <- Reducer 18 (BROADCAST_EDGE)
-Map 27 <- Reducer 22 (BROADCAST_EDGE)
-Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE)
-Reducer 11 <- Map 24 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE)
-Reducer 12 <- Map 25 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE)
-Reducer 13 <- Reducer 12 (SIMPLE_EDGE)
-Reducer 14 <- Map 9 (CUSTOM_SIMPLE_EDGE)
-Reducer 15 <- Map 26 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE)
-Reducer 16 <- Map 25 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE)
-Reducer 17 <- Reducer 16 (SIMPLE_EDGE)
-Reducer 18 <- Map 9 (CUSTOM_SIMPLE_EDGE)
-Reducer 19 <- Map 27 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE)
-Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE)
-Reducer 20 <- Map 25 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE)
-Reducer 21 <- Reducer 20 (SIMPLE_EDGE)
-Reducer 22 <- Map 9 (CUSTOM_SIMPLE_EDGE)
-Reducer 3 <- Map 23 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+Map 1 <- Reducer 11 (BROADCAST_EDGE)
+Map 25 <- Reducer 15 (BROADCAST_EDGE)
+Map 26 <- Reducer 19 (BROADCAST_EDGE)
+Map 27 <- Reducer 23 (BROADCAST_EDGE)
+Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE)
+Reducer 12 <- Map 10 (SIMPLE_EDGE), Map 25 (SIMPLE_EDGE)
+Reducer 13 <- Map 24 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE)
+Reducer 14 <- Reducer 13 (SIMPLE_EDGE)
+Reducer 15 <- Map 10 (CUSTOM_SIMPLE_EDGE)
+Reducer 16 <- Map 10 (SIMPLE_EDGE), Map 26 (SIMPLE_EDGE)
+Reducer 17 <- Map 24 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE)
+Reducer 18 <- Reducer 17 (SIMPLE_EDGE)
+Reducer 19 <- Map 10 (CUSTOM_SIMPLE_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE)
+Reducer 20 <- Map 10 (SIMPLE_EDGE), Map 27 (SIMPLE_EDGE)
+Reducer 21 <- Map 24 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE)
+Reducer 22 <- Reducer 21 (SIMPLE_EDGE)
+Reducer 23 <- Map 10 (CUSTOM_SIMPLE_EDGE)
+Reducer 3 <- Map 24 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
-Reducer 5 <- Reducer 13 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
-Reducer 6 <- Reducer 17 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
-Reducer 7 <- Reducer 21 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE)
-Reducer 8 <- Reducer 7 (SIMPLE_EDGE)
+Reducer 5 <- Reducer 14 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
+Reducer 6 <- Reducer 18 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
+Reducer 7 <- Reducer 22 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE)
+Reducer 8 <- Map 28 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE)
+Reducer 9 <- Reducer 8 (SIMPLE_EDGE)
Stage-0
Fetch Operator
limit:100
Stage-1
- Reducer 8 vectorized
- File Output Operator [FS_346]
- Limit [LIM_345] (rows=100 width=280)
+ Reducer 9 vectorized
+ File Output Operator [FS_400]
+ Limit [LIM_399] (rows=100 width=280)
Number of rows:100
- Select Operator [SEL_344] (rows=13333333 width=280)
+ Select Operator [SEL_398] (rows=13333332 width=280)
Output:["_col0","_col1","_col2"]
- <-Reducer 7 [SIMPLE_EDGE]
- SHUFFLE [RS_89]
- Select Operator [SEL_88] (rows=13333333 width=280)
+ <-Reducer 8 [SIMPLE_EDGE]
+ SHUFFLE [RS_103]
+ Select Operator [SEL_102] (rows=13333332 width=280)
Output:["_col0","_col1","_col2"]
- Top N Key Operator [TNK_152] (rows=13333333 width=732)
- keys:_col2, _col0, _col1,top n:100
- Filter Operator [FIL_87] (rows=13333333 width=732)
- predicate:CASE WHEN (_col5 is not null) THEN (CASE WHEN (_col10) THEN (((_col7 / _col9) > (_col3 / _col5))) ELSE (false) END) ELSE (false) END
- Merge Join Operator [MERGEJOIN_282] (rows=26666666 width=732)
- Conds:RS_84._col0=RS_343._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col5","_col7","_col9","_col10"]
- <-Reducer 21 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_343]
- PartitionCols:_col0
- Select Operator [SEL_342] (rows=14325562 width=216)
- Output:["_col0","_col1","_col2"]
- Filter Operator [FIL_341] (rows=14325562 width=212)
- predicate:(_col1 > 0)
- Group By Operator [GBY_340] (rows=42976686 width=212)
- Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0
- <-Reducer 20 [SIMPLE_EDGE]
- SHUFFLE [RS_74]
- PartitionCols:_col0
- Group By Operator [GBY_73] (rows=51391963 width=212)
- Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col5
- Merge Join Operator [MERGEJOIN_279] (rows=51391963 width=211)
- Conds:RS_69._col1=RS_324._col0(Inner),Output:["_col2","_col5"]
- <-Map 25 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_324]
- PartitionCols:_col0
- Select Operator [SEL_321] (rows=80000000 width=104)
- Output:["_col0","_col1"]
- TableScan [TS_25] (rows=80000000 width=104)
- default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_customer_id"]
- <-Reducer 19 [SIMPLE_EDGE]
- SHUFFLE [RS_69]
- PartitionCols:_col1
- Merge Join Operator [MERGEJOIN_278] (rows=51391963 width=115)
- Conds:RS_339._col0=RS_297._col0(Inner),Output:["_col1","_col2"]
- <-Map 9 [SIMPLE_EDGE] vectorized
- PARTITION_ONLY_SHUFFLE [RS_297]
- PartitionCols:_col0
- Select Operator [SEL_290] (rows=652 width=4)
- Output:["_col0"]
- Filter Operator [FIL_286] (rows=652 width=8)
- predicate:((d_year = 1998) and (d_year) IN (1998, 1999))
- TableScan [TS_3] (rows=73049 width=8)
- default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"]
- <-Map 27 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_339]
- PartitionCols:_col0
- Select Operator [SEL_338] (rows=143930993 width=119)
- Output:["_col0","_col1","_col2"]
- Filter Operator [FIL_337] (rows=143930993 width=119)
- predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_67_date_dim_d_date_sk_min) AND DynamicValue(RS_67_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_67_date_dim_d_date_sk_bloom_filter)))
- TableScan [TS_58] (rows=144002668 width=119)
- default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_net_paid"]
- <-Reducer 22 [BROADCAST_EDGE] vectorized
- BROADCAST [RS_336]
- Group By Operator [GBY_335] (rows=1 width=12)
- Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
- <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized
- PARTITION_ONLY_SHUFFLE [RS_306]
- Group By Operator [GBY_302] (rows=1 width=12)
- Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
- Select Operator [SEL_298] (rows=652 width=4)
- Output:["_col0"]
- Please refer to the previous Select Operator [SEL_290]
- <-Reducer 6 [SIMPLE_EDGE]
- SHUFFLE [RS_84]
- PartitionCols:_col0
- Merge Join Operator [MERGEJOIN_281] (rows=26666666 width=616)
- Conds:RS_81._col0=RS_334._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col5","_col7"]
- <-Reducer 17 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_334]
- PartitionCols:_col0
- Group By Operator [GBY_333] (rows=42976686 width=212)
- Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0
- <-Reducer 16 [SIMPLE_EDGE]
- SHUFFLE [RS_55]
- PartitionCols:_col0
- Group By Operator [GBY_54] (rows=51391963 width=212)
- Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col5
- Merge Join Operator [MERGEJOIN_277] (rows=51391963 width=211)
- Conds:RS_50._col1=RS_323._col0(Inner),Output:["_col2","_col5"]
- <-Map 25 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_323]
- PartitionCols:_col0
- Please refer to the previous Select Operator [SEL_321]
- <-Reducer 15 [SIMPLE_EDGE]
- SHUFFLE [RS_50]
- PartitionCols:_col1
- Merge Join Operator [MERGEJOIN_276] (rows=51391963 width=115)
- Conds:RS_332._col0=RS_295._col0(Inner),Output:["_col1","_col2"]
- <-Map 9 [SIMPLE_EDGE] vectorized
- PARTITION_ONLY_SHUFFLE [RS_295]
- PartitionCols:_col0
- Select Operator [SEL_289] (rows=652 width=4)
- Output:["_col0"]
- Filter Operator [FIL_285] (rows=652 width=8)
- predicate:((d_year = 1999) and (d_year) IN (1998, 1999))
- Please refer to the previous TableScan [TS_3]
- <-Map 26 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_332]
- PartitionCols:_col0
- Select Operator [SEL_331] (rows=143930993 width=119)
- Output:["_col0","_col1","_col2"]
- Filter Operator [FIL_330] (rows=143930993 width=119)
- predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_48_date_dim_d_date_sk_min) AND DynamicValue(RS_48_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_48_date_dim_d_date_sk_bloom_filter)))
- TableScan [TS_39] (rows=144002668 width=119)
- default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_net_paid"]
- <-Reducer 18 [BROADCAST_EDGE] vectorized
- BROADCAST [RS_329]
- Group By Operator [GBY_328] (rows=1 width=12)
- Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
- <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized
- PARTITION_ONLY_SHUFFLE [RS_305]
- Group By Operator [GBY_301] (rows=1 width=12)
- Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
- Select Operator [SEL_296] (rows=652 width=4)
- Output:["_col0"]
- Please refer to the previous Select Operator [SEL_289]
- <-Reducer 5 [SIMPLE_EDGE]
- SHUFFLE [RS_81]
- PartitionCols:_col0
- Merge Join Operator [MERGEJOIN_280] (rows=26666666 width=504)
- Conds:RS_315._col0=RS_327._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col5"]
- <-Reducer 13 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_327]
- PartitionCols:_col0
- Filter Operator [FIL_326] (rows=22300081 width=212)
+ Top N Key Operator [TNK_180] (rows=13333332 width=280)
+ keys:_col3, _col0, _col2,top n:100
+ Merge Join Operator [MERGEJOIN_330] (rows=13333332 width=280)
+ Conds:RS_99._col0=RS_397._col0(Inner),Output:["_col0","_col2","_col3"]
+ <-Map 28 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_397]
+ PartitionCols:_col0
+ Select Operator [SEL_396] (rows=80000000 width=280)
+ Output:["_col0","_col1","_col2"]
+ TableScan [TS_97] (rows=80000000 width=280)
+ default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_id","c_first_name","c_last_name"]
+ <-Reducer 7 [SIMPLE_EDGE]
+ SHUFFLE [RS_99]
+ PartitionCols:_col0
+ Select Operator [SEL_96] (rows=11150040 width=100)
+ Output:["_col0"]
+ Filter Operator [FIL_95] (rows=11150040 width=552)
+ predicate:CASE WHEN (_col3 is not null) THEN (CASE WHEN (_col8) THEN (((_col5 / _col7) > (_col1 / _col3))) ELSE (false) END) ELSE (false) END
+ Merge Join Operator [MERGEJOIN_329] (rows=22300081 width=552)
+ Conds:RS_92._col0=RS_395._col0(Inner),Output:["_col0","_col1","_col3","_col5","_col7","_col8"]
+ <-Reducer 22 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_395]
+ PartitionCols:_col0
+ Select Operator [SEL_394] (rows=14325562 width=216)
+ Output:["_col0","_col1","_col2"]
+ Filter Operator [FIL_393] (rows=14325562 width=212)
predicate:(_col1 > 0)
- Group By Operator [GBY_325] (rows=66900244 width=212)
+ Group By Operator [GBY_392] (rows=42976686 width=212)
Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0
- <-Reducer 12 [SIMPLE_EDGE]
- SHUFFLE [RS_35]
+ <-Reducer 21 [SIMPLE_EDGE]
+ SHUFFLE [RS_82]
PartitionCols:_col0
- Group By Operator [GBY_34] (rows=80000000 width=212)
+ Group By Operator [GBY_81] (rows=51391963 width=212)
Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col5
- Merge Join Operator [MERGEJOIN_275] (rows=187573258 width=197)
- Conds:RS_30._col1=RS_322._col0(Inner),Output:["_col2","_col5"]
- <-Map 25 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_322]
+ Merge Join Operator [MERGEJOIN_326] (rows=51391963 width=211)
+ Conds:RS_77._col1=RS_366._col0(Inner),Output:["_col2","_col5"]
+ <-Map 24 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_366]
PartitionCols:_col0
- Please refer to the previous Select Operator [SEL_321]
- <-Reducer 11 [SIMPLE_EDGE]
- SHUFFLE [RS_30]
+ Select Operator [SEL_362] (rows=80000000 width=104)
+ Output:["_col0","_col1"]
+ TableScan [TS_8] (rows=80000000 width=104)
+ default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_customer_id"]
+ <-Reducer 20 [SIMPLE_EDGE]
+ SHUFFLE [RS_77]
PartitionCols:_col1
- Merge Join Operator [MERGEJOIN_274] (rows=187573258 width=101)
- Conds:RS_320._col0=RS_293._col0(Inner),Output:["_col1","_col2"]
- <-Map 9 [SIMPLE_EDGE] vectorized
- PARTITION_ONLY_SHUFFLE [RS_293]
+ Merge Join Operator [MERGEJOIN_325] (rows=51391963 width=115)
+ Conds:RS_391._col0=RS_346._col0(Inner),Output:["_col1","_col2"]
+ <-Map 10 [SIMPLE_EDGE] vectorized
+ PARTITION_ONLY_SHUFFLE [RS_346]
PartitionCols:_col0
- Select Operator [SEL_288] (rows=652 width=4)
+ Select Operator [SEL_339] (rows=652 width=4)
Output:["_col0"]
- Filter Operator [FIL_284] (rows=652 width=8)
- predicate:((d_year = 1998) and (d_year) IN (1998, 1999))
- Please refer to the previous TableScan [TS_3]
- <-Map 24 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_320]
+ Filter Operator [FIL_335] (rows=652 width=8)
+ predicate:((_col1 = 1998) and (_col1) IN (1998, 1999))
+ Select Operator [SEL_331] (rows=73049 width=8)
+ Output:["_col0","_col1"]
+ TableScan [TS_4] (rows=73049 width=8)
+ default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"]
+ <-Map 27 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_391]
+ PartitionCols:_col0
+ Filter Operator [FIL_390] (rows=143930993 width=119)
+ predicate:(_col0 is not null and _col1 is not null)
+ Select Operator [SEL_389] (rows=144002668 width=119)
+ Output:["_col0","_col1","_col2"]
+ Filter Operator [FIL_388] (rows=144002668 width=119)
+ predicate:(ws_sold_date_sk BETWEEN DynamicValue(RS_75_date_dim_d_date_sk_min) AND DynamicValue(RS_75_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_75_date_dim_d_date_sk_bloom_filter)))
+ TableScan [TS_64] (rows=144002668 width=119)
+ default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_net_paid"]
+ <-Reducer 23 [BROADCAST_EDGE] vectorized
+ BROADCAST [RS_387]
+ Group By Operator [GBY_386] (rows=1 width=12)
+ Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
+ <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized
+ PARTITION_ONLY_SHUFFLE [RS_355]
+ Group By Operator [GBY_351] (rows=1 width=12)
+ Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
+ Select Operator [SEL_347] (rows=652 width=4)
+ Output:["_col0"]
+ Please refer to the previous Select Operator [SEL_339]
+ <-Reducer 6 [SIMPLE_EDGE]
+ SHUFFLE [RS_92]
+ PartitionCols:_col0
+ Merge Join Operator [MERGEJOIN_328] (rows=22300081 width=436)
+ Conds:RS_89._col0=RS_385._col0(Inner),Output:["_col0","_col1","_col3","_col5"]
+ <-Reducer 18 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_385]
+ PartitionCols:_col0
+ Group By Operator [GBY_384] (rows=42976686 width=212)
+ Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0
+ <-Reducer 17 [SIMPLE_EDGE]
+ SHUFFLE [RS_61]
+ PartitionCols:_col0
+ Group By Operator [GBY_60] (rows=51391963 width=212)
+ Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col5
+ Merge Join Operator [MERGEJOIN_324] (rows=51391963 width=211)
+ Conds:RS_56._col1=RS_365._col0(Inner),Output:["_col2","_col5"]
+ <-Map 24 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_365]
+ PartitionCols:_col0
+ Please refer to the previous Select Operator [SEL_362]
+ <-Reducer 16 [SIMPLE_EDGE]
+ SHUFFLE [RS_56]
+ PartitionCols:_col1
+ Merge Join Operator [MERGEJOIN_323] (rows=51391963 width=115)
+ Conds:RS_383._col0=RS_344._col0(Inner),Output:["_col1","_col2"]
+ <-Map 10 [SIMPLE_EDGE] vectorized
+ PARTITION_ONLY_SHUFFLE [RS_344]
+ PartitionCols:_col0
+ Select Operator [SEL_338] (rows=652 width=4)
+ Output:["_col0"]
+ Filter Operator [FIL_334] (rows=652 width=8)
+ predicate:((_col1 = 1999) and (_col1) IN (1998, 1999))
+ Please refer to the previous Select Operator [SEL_331]
+ <-Map 26 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_383]
PartitionCols:_col0
- Select Operator [SEL_319] (rows=525327388 width=114)
- Output:["_col0","_col1","_col2"]
- Filter Operator [FIL_318] (rows=525327388 width=114)
- predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_28_date_dim_d_date_sk_min) AND DynamicValue(RS_28_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_28_date_dim_d_date_sk_bloom_filter)))
- TableScan [TS_19] (rows=575995635 width=114)
- default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_net_paid"]
- <-Reducer 14 [BROADCAST_EDGE] vectorized
- BROADCAST [RS_317]
- Group By Operator [GBY_316] (rows=1 width=12)
- Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
- <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized
- PARTITION_ONLY_SHUFFLE [RS_304]
- Group By Operator [GBY_300] (rows=1 width=12)
- Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
- Select Operator [SEL_294] (rows=652 width=4)
- Output:["_col0"]
- Please refer to the previous Select Operator [SEL_288]
- <-Reducer 4 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_315]
- PartitionCols:_col0
- Group By Operator [GBY_314] (rows=80000000 width=392)
- Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2
- <-Reducer 3 [SIMPLE_EDGE]
- SHUFFLE [RS_16]
- PartitionCols:_col0, _col1, _col2
- Group By Operator [GBY_15] (rows=80000000 width=392)
- Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col2)"],keys:_col5, _col6, _col7
- Merge Join Operator [MERGEJOIN_273] (rows=187573258 width=377)
- Conds:RS_11._col1=RS_313._col0(Inner),Output:["_col2","_col5","_col6","_col7"]
- <-Map 23 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_313]
+ Filter Operator [FIL_382] (rows=143930993 width=119)
+ predicate:(_col0 is not null and _col1 is not null)
+ Select Operator [SEL_381] (rows=144002668 width=119)
+ Output:["_col0","_col1","_col2"]
+ Filter Operator [FIL_380] (rows=144002668 width=119)
+ predicate:(ws_sold_date_sk BETWEEN DynamicValue(RS_54_date_dim_d_date_sk_min) AND DynamicValue(RS_54_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_54_date_dim_d_date_sk_bloom_filter)))
+ TableScan [TS_43] (rows=144002668 width=119)
+ default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk","ws_net_paid"]
+ <-Reducer 19 [BROADCAST_EDGE] vectorized
+ BROADCAST [RS_379]
+ Group By Operator [GBY_378] (rows=1 width=12)
+ Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
+ <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized
+ PARTITION_ONLY_SHUFFLE [RS_354]
+ Group By Operator [GBY_350] (rows=1 width=12)
+ Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
+ Select Operator [SEL_345] (rows=652 width=4)
+ Output:["_col0"]
+ Please refer to the previous Select Operator [SEL_338]
+ <-Reducer 5 [SIMPLE_EDGE]
+ SHUFFLE [RS_89]
+ PartitionCols:_col0
+ Merge Join Operator [MERGEJOIN_327] (rows=22300081 width=324)
+ Conds:RS_368._col0=RS_377._col0(Inner),Output:["_col0","_col1","_col3"]
+ <-Reducer 14 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_377]
+ PartitionCols:_col0
+ Filter Operator [FIL_376] (rows=22300081 width=212)
+ predicate:(_col1 > 0)
+ Group By Operator [GBY_375] (rows=66900244 width=212)
+ Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0
+ <-Reducer 13 [SIMPLE_EDGE]
+ SHUFFLE [RS_39]
+ PartitionCols:_col0
+ Group By Operator [GBY_38] (rows=80000000 width=212)
+ Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col5
+ Merge Join Operator [MERGEJOIN_322] (rows=187573258 width=197)
+ Conds:RS_34._col1=RS_364._col0(Inner),Output:["_col2","_col5"]
+ <-Map 24 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_364]
+ PartitionCols:_col0
+ Please refer to the previous Select Operator [SEL_362]
+ <-Reducer 12 [SIMPLE_EDGE]
+ SHUFFLE [RS_34]
+ PartitionCols:_col1
+ Merge Join Operator [MERGEJOIN_321] (rows=187573258 width=101)
+ Conds:RS_374._col0=RS_342._col0(Inner),Output:["_col1","_col2"]
+ <-Map 10 [SIMPLE_EDGE] vectorized
+ PARTITION_ONLY_SHUFFLE [RS_342]
+ PartitionCols:_col0
+ Select Operator [SEL_337] (rows=652 width=4)
+ Output:["_col0"]
+ Filter Operator [FIL_333] (rows=652 width=8)
+ predicate:((_col1 = 1998) and (_col1) IN (1998, 1999))
+ Please refer to the previous Select Operator [SEL_331]
+ <-Map 25 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_374]
+ PartitionCols:_col0
+ Filter Operator [FIL_373] (rows=525327388 width=114)
+ predicate:(_col0 is not null and _col1 is not null)
+ Select Operator [SEL_372] (rows=575995635 width=114)
+ Output:["_col0","_col1","_col2"]
+ Filter Operator [FIL_371] (rows=575995635 width=114)
+ predicate:(ss_sold_date_sk BETWEEN DynamicValue(RS_32_date_dim_d_date_sk_min) AND DynamicValue(RS_32_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_32_date_dim_d_date_sk_bloom_filter)))
+ TableScan [TS_21] (rows=575995635 width=114)
+ default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_net_paid"]
+ <-Reducer 15 [BROADCAST_EDGE] vectorized
+ BROADCAST [RS_370]
+ Group By Operator [GBY_369] (rows=1 width=12)
+ Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
+ <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized
+ PARTITION_ONLY_SHUFFLE [RS_353]
+ Group By Operator [GBY_349] (rows=1 width=12)
+ Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
+ Select Operator [SEL_343] (rows=652 width=4)
+ Output:["_col0"]
+ Please refer to the previous Select Operator [SEL_337]
+ <-Reducer 4 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_368]
+ PartitionCols:_col0
+ Group By Operator [GBY_367] (rows=66900244 width=212)
+ Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0
+ <-Reducer 3 [SIMPLE_EDGE]
+ SHUFFLE [RS_18]
PartitionCols:_col0
- Select Operator [SEL_312] (rows=80000000 width=284)
- Output:["_col0","_col1","_col2","_col3"]
- TableScan [TS_6] (rows=80000000 width=284)
- default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_customer_id","c_first_name","c_last_name"]
- <-Reducer 2 [SIMPLE_EDGE]
- SHUFFLE [RS_11]
- PartitionCols:_col1
- Merge Join Operator [MERGEJOIN_272] (rows=187573258 width=101)
- Conds:RS_311._col0=RS_291._col0(Inner),Output:["_col1","_col2"]
- <-Map 9 [SIMPLE_EDGE] vectorized
- PARTITION_ONLY_SHUFFLE [RS_291]
- PartitionCols:_col0
- Select Operator [SEL_287] (rows=652 width=4)
- Output:["_col0"]
- Filter Operator [FIL_283] (rows=652 width=8)
- predicate:((d_year = 1999) and (d_year) IN (1998, 1999))
- Please refer to the previous TableScan [TS_3]
- <-Map 1 [SIMPLE_EDGE] vectorized
- SHUFFLE [RS_311]
- PartitionCols:_col0
- Select Operator [SEL_310] (rows=525327388 width=114)
- Output:["_col0","_col1","_col2"]
- Filter Operator [FIL_309] (rows=525327388 width=114)
- predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_9_date_dim_d_date_sk_min) AND DynamicValue(RS_9_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_9_date_dim_d_date_sk_bloom_filter)))
- TableScan [TS_0] (rows=575995635 width=114)
- default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_net_paid"]
- <-Reducer 10 [BROADCAST_EDGE] vectorized
- BROADCAST [RS_308]
- Group By Operator [GBY_307] (rows=1 width=12)
- Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
- <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized
- PARTITION_ONLY_SHUFFLE [RS_303]
- Group By Operator [GBY_299] (rows=1 width=12)
- Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
- Select Operator [SEL_292] (rows=652 width=4)
- Output:["_col0"]
- Please refer to the previous Select Operator [SEL_287]
+ Group By Operator [GBY_17] (rows=80000000 width=212)
+ Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col5
+ Merge Join Operator [MERGEJOIN_320] (rows=187573258 width=197)
+ Conds:RS_13._col1=RS_363._col0(Inner),Output:["_col2","_col5"]
+ <-Map 24 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_363]
+ PartitionCols:_col0
+ Please refer to the previous Select Operator [SEL_362]
+ <-Reducer 2 [SIMPLE_EDGE]
+ SHUFFLE [RS_13]
+ PartitionCols:_col1
+ Merge Join Operator [MERGEJOIN_319] (rows=187573258 width=101)
+ Conds:RS_361._col0=RS_340._col0(Inner),Output:["_col1","_col2"]
+ <-Map 10 [SIMPLE_EDGE] vectorized
+ PARTITION_ONLY_SHUFFLE [RS_340]
+ PartitionCols:_col0
+ Select Operator [SEL_336] (rows=652 width=4)
+ Output:["_col0"]
+ Filter Operator [FIL_332] (rows=652 width=8)
+ predicate:((_col1 = 1999) and (_col1) IN (1998, 1999))
+ Please refer to the previous Select Operator [SEL_331]
+ <-Map 1 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_361]
+ PartitionCols:_col0
+ Filter Operator [FIL_360] (rows=525327388 width=114)
+ predicate:(_col0 is not null and _col1 is not null)
+ Select Operator [SEL_359] (rows=575995635 width=114)
+ Output:["_col0","_col1","_col2"]
+ Filter Operator [FIL_358] (rows=575995635 width=114)
+ predicate:(ss_sold_date_sk BETWEEN DynamicValue(RS_11_date_dim_d_date_sk_min) AND DynamicValue(RS_11_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_11_date_dim_d_date_sk_bloom_filter)))
+ TableScan [TS_0] (rows=575995635 width=114)
+ default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_net_paid"]
+ <-Reducer 11 [BROADCAST_EDGE] vectorized
+ BROADCAST [RS_357]
+ Group By Operator [GBY_356] (rows=1 width=12)
+ Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
+ <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized
+ PARTITION_ONLY_SHUFFLE [RS_352]
+ Group By Operator [GBY_348] (rows=1 width=12)
+ Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
+ Select Operator [SEL_341] (rows=652 width=4)
+ Output:["_col0"]
+ Please refer to the previous Select Operator [SEL_336]