You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by kr...@apache.org on 2022/03/23 03:01:26 UTC
[hive] branch master updated: HIVE-26043: Use constraint info when creating RexNodes (Krisztian Kasa, reviewed by Stamatis Zampetakis)
This is an automated email from the ASF dual-hosted git repository.
krisztiankasa pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 13016e5 HIVE-26043: Use constraint info when creating RexNodes (Krisztian Kasa, reviewed by Stamatis Zampetakis)
13016e5 is described below
commit 13016e514bce3b3e39cd7253d5acab5bf955cbfe
Author: Krisztian Kasa <ka...@gmail.com>
AuthorDate: Wed Mar 23 04:01:10 2022 +0100
HIVE-26043: Use constraint info when creating RexNodes (Krisztian Kasa, reviewed by Stamatis Zampetakis)
---
.../org/apache/hadoop/hive/ql/exec/ColumnInfo.java | 29 ++-
.../calcite/translator/TypeConverter.java | 22 +-
.../hadoop/hive/ql/parse/CalcitePlanner.java | 82 ++-----
.../hive/ql/parse/type/RexNodeExprFactory.java | 6 +-
.../perf/tpcds30tb/tez/cbo_query45.q.out | 22 +-
.../perf/tpcds30tb/tez/query45.q.out | 244 +++++++++------------
6 files changed, 171 insertions(+), 234 deletions(-)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnInfo.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnInfo.java
index 3f237af..04ff844 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnInfo.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnInfo.java
@@ -61,7 +61,10 @@ public class ColumnInfo implements Serializable {
private String typeName;
+ private final boolean nullable;
+
public ColumnInfo() {
+ nullable = true;
}
public ColumnInfo(String internalName, TypeInfo type, String tabAlias,
@@ -69,6 +72,11 @@ public class ColumnInfo implements Serializable {
this(internalName, type, tabAlias, isVirtualCol, false);
}
+ public ColumnInfo(String internalName, TypeInfo type, boolean nullable, String tabAlias,
+ boolean isVirtualCol) {
+ this(internalName, type, nullable, tabAlias, isVirtualCol, false);
+ }
+
public ColumnInfo(String internalName, Class type, String tabAlias,
boolean isVirtualCol) {
this(internalName, TypeInfoFactory
@@ -80,6 +88,17 @@ public class ColumnInfo implements Serializable {
boolean isVirtualCol, boolean isHiddenVirtualCol) {
this(internalName,
TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(type),
+ true,
+ tabAlias,
+ isVirtualCol,
+ isHiddenVirtualCol);
+ }
+
+ public ColumnInfo(String internalName, TypeInfo type, boolean nullable, String tabAlias,
+ boolean isVirtualCol, boolean isHiddenVirtualCol) {
+ this(internalName,
+ TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(type),
+ nullable,
tabAlias,
isVirtualCol,
isHiddenVirtualCol);
@@ -87,16 +106,17 @@ public class ColumnInfo implements Serializable {
public ColumnInfo(String internalName, ObjectInspector objectInspector,
String tabAlias, boolean isVirtualCol) {
- this(internalName, objectInspector, tabAlias, isVirtualCol, false);
+ this(internalName, objectInspector, true, tabAlias, isVirtualCol, false);
}
- public ColumnInfo(String internalName, ObjectInspector objectInspector,
+ public ColumnInfo(String internalName, ObjectInspector objectInspector, boolean nullable,
String tabAlias, boolean isVirtualCol, boolean isHiddenVirtualCol) {
this.internalName = internalName;
this.objectInspector = objectInspector;
this.tabAlias = tabAlias;
this.isVirtualCol = isVirtualCol;
this.isHiddenVirtualCol = isHiddenVirtualCol;
+ this.nullable = nullable;
setTypeName(getType().getTypeName());
}
@@ -107,6 +127,7 @@ public class ColumnInfo implements Serializable {
this.tabAlias = columnInfo.getTabAlias();
this.isVirtualCol = columnInfo.getIsVirtualCol();
this.isHiddenVirtualCol = columnInfo.isHiddenVirtualCol();
+ this.nullable = columnInfo.nullable;
this.setType(columnInfo.getType());
}
@@ -254,4 +275,8 @@ public class ColumnInfo implements Serializable {
public void setObjectinspector(ObjectInspector writableObjectInspector) {
this.objectInspector = writableObjectInspector;
}
+
+ public boolean isNullable() {
+ return nullable;
+ }
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/TypeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/TypeConverter.java
index e95ff18..7d22797 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/TypeConverter.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/TypeConverter.java
@@ -128,25 +128,29 @@ public class TypeConverter {
RexBuilder rexBuilder = cluster.getRexBuilder();
RelDataTypeFactory dtFactory = rexBuilder.getTypeFactory();
RowSchema rs = rr.getRowSchema();
- List<RelDataType> fieldTypes = new LinkedList<RelDataType>();
- List<String> fieldNames = new LinkedList<String>();
+ List<RelDataType> fieldTypes = new LinkedList<>();
+ List<String> fieldNames = new LinkedList<>();
for (ColumnInfo ci : rs.getSignature()) {
if (neededCols == null || neededCols.contains(ci.getInternalName())) {
- fieldTypes.add(convert(ci.getType(), dtFactory));
+ fieldTypes.add(convert(ci.getType(), ci.isNullable(), dtFactory));
fieldNames.add(ci.getInternalName());
}
}
return dtFactory.createStructType(fieldTypes, fieldNames);
}
- public static RelDataType convert(TypeInfo type, RelDataTypeFactory dtFactory)
+ public static RelDataType convert(TypeInfo type, RelDataTypeFactory dtFactory) throws CalciteSemanticException {
+ return convert(type, true, dtFactory);
+ }
+
+ public static RelDataType convert(TypeInfo type, boolean nullable, RelDataTypeFactory dtFactory)
throws CalciteSemanticException {
RelDataType convertedType = null;
switch (type.getCategory()) {
case PRIMITIVE:
- convertedType = convert((PrimitiveTypeInfo) type, dtFactory);
+ convertedType = convert((PrimitiveTypeInfo) type, nullable, dtFactory);
break;
case LIST:
convertedType = convert((ListTypeInfo) type, dtFactory);
@@ -162,10 +166,14 @@ public class TypeConverter {
break;
}
// hive does not have concept of not nullable types
- return dtFactory.createTypeWithNullability(convertedType, true);
+ return dtFactory.createTypeWithNullability(convertedType, nullable);
}
public static RelDataType convert(PrimitiveTypeInfo type, RelDataTypeFactory dtFactory) {
+ return convert(type, true, dtFactory);
+ }
+
+ public static RelDataType convert(PrimitiveTypeInfo type, boolean nullable, RelDataTypeFactory dtFactory) {
RelDataType convertedType = null;
switch (type.getPrimitiveCategory()) {
@@ -242,7 +250,7 @@ public class TypeConverter {
throw new RuntimeException("Unsupported Type : " + type.getTypeName());
}
- return dtFactory.createTypeWithNullability(convertedType, true);
+ return dtFactory.createTypeWithNullability(convertedType, nullable);
}
public static RelDataType convert(ListTypeInfo lstType,
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index bc55d0e..a76ce37 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -2913,13 +2913,17 @@ public class CalcitePlanner extends SemanticAnalyzer {
List<? extends StructField> fields = rowObjectInspector.getAllStructFieldRefs();
ColumnInfo colInfo;
String colName;
- ArrayList<ColumnInfo> cInfoLst = new ArrayList<ColumnInfo>();
- for (int i = 0; i < fields.size(); i++) {
- colName = fields.get(i).getFieldName();
+ ArrayList<ColumnInfo> cInfoLst = new ArrayList<>();
+
+ final NotNullConstraint nnc = tabMetaData.getNotNullConstraint();
+ final PrimaryKeyInfo pkc = tabMetaData.getPrimaryKeyInfo();
+
+ for (StructField structField : fields) {
+ colName = structField.getFieldName();
colInfo = new ColumnInfo(
- fields.get(i).getFieldName(),
- TypeInfoUtils.getTypeInfoFromObjectInspector(fields.get(i).getFieldObjectInspector()),
- tableAlias, false);
+ structField.getFieldName(),
+ TypeInfoUtils.getTypeInfoFromObjectInspector(structField.getFieldObjectInspector()),
+ isNullable(colName, nnc, pkc), tableAlias, false);
colInfo.setSkewedCol(isSkewedCol(tableAlias, qb, colName));
rr.put(tableAlias, colName, colInfo);
cInfoLst.add(colInfo);
@@ -2932,7 +2936,8 @@ public class CalcitePlanner extends SemanticAnalyzer {
for (FieldSchema part_col : tabMetaData.getPartCols()) {
colName = part_col.getName();
colInfo = new ColumnInfo(colName,
- TypeInfoFactory.getPrimitiveTypeInfo(part_col.getType()), tableAlias, true);
+ TypeInfoFactory.getPrimitiveTypeInfo(part_col.getType()),
+ isNullable(colName, nnc, pkc), tableAlias, true);
rr.put(tableAlias, colName, colInfo);
cInfoLst.add(colInfo);
partitionColumns.add(colInfo);
@@ -3082,7 +3087,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
}
} else {
// Build row type from field <type, name>
- RelDataType rowType = inferNotNullableColumns(tabMetaData, TypeConverter.getType(cluster, rr, null));
+ RelDataType rowType = TypeConverter.getType(cluster, rr, null);
// Build RelOptAbstractTable
List<String> fullyQualifiedTabName = new ArrayList<>();
if (tabMetaData.getDbName() != null && !tabMetaData.getDbName().isEmpty()) {
@@ -3119,65 +3124,16 @@ public class CalcitePlanner extends SemanticAnalyzer {
return tableRel;
}
- private RelDataType inferNotNullableColumns(Table tabMetaData, RelDataType rowType)
- throws HiveException {
- final NotNullConstraint nnc = tabMetaData.getNotNullConstraint();
- final PrimaryKeyInfo pkc = tabMetaData.getPrimaryKeyInfo();
- if ((nnc == null || nnc.getNotNullConstraints().isEmpty()) &&
- (pkc == null || pkc.getColNames().isEmpty())) {
- return rowType;
+ private boolean isNullable(String colName, NotNullConstraint notNullConstraints, PrimaryKeyInfo primaryKeyInfo) {
+ if (notNullConstraints != null && notNullConstraints.getNotNullConstraints().containsValue(colName)) {
+ return false;
}
- // Build the bitset with not null columns
- ImmutableBitSet.Builder builder = ImmutableBitSet.builder();
- if (nnc != null) {
- for (String nnCol : nnc.getNotNullConstraints().values()) {
- int nnPos = -1;
- for (int i = 0; i < rowType.getFieldNames().size(); i++) {
- if (rowType.getFieldNames().get(i).equals(nnCol)) {
- nnPos = i;
- break;
- }
- }
- if (nnPos == -1) {
- LOG.error("Column for not null constraint definition " + nnCol + " not found");
- return rowType;
- }
- builder.set(nnPos);
- }
- }
- if (pkc != null) {
- for (String pkCol : pkc.getColNames().values()) {
- int pkPos = -1;
- for (int i = 0; i < rowType.getFieldNames().size(); i++) {
- if (rowType.getFieldNames().get(i).equals(pkCol)) {
- pkPos = i;
- break;
- }
- }
- if (pkPos == -1) {
- LOG.error("Column for not null constraint definition " + pkCol + " not found");
- return rowType;
- }
- builder.set(pkPos);
- }
+ if (primaryKeyInfo != null && primaryKeyInfo.getColNames().containsValue(colName)) {
+ return false;
}
- ImmutableBitSet bitSet = builder.build();
- RexBuilder rexBuilder = cluster.getRexBuilder();
- RelDataTypeFactory dtFactory = rexBuilder.getTypeFactory();
-
- List<RelDataType> fieldTypes = new LinkedList<RelDataType>();
- List<String> fieldNames = new LinkedList<String>();
- for (RelDataTypeField rdtf : rowType.getFieldList()) {
- if (bitSet.indexOf(rdtf.getIndex()) != -1) {
- fieldTypes.add(dtFactory.createTypeWithNullability(rdtf.getType(), false));
- } else {
- fieldTypes.add(rdtf.getType());
- }
- fieldNames.add(rdtf.getName());
- }
- return dtFactory.createStructType(fieldTypes, fieldNames);
+ return true;
}
private TableType obtainTableType(Table tabMetaData) {
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/type/RexNodeExprFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/type/RexNodeExprFactory.java
index 979b4f6..a572038 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/type/RexNodeExprFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/type/RexNodeExprFactory.java
@@ -31,7 +31,6 @@ import org.apache.calcite.avatica.util.TimeUnit;
import org.apache.calcite.plan.RelOptCluster;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.type.RelDataType;
-import org.apache.calcite.rel.type.RelDataTypeField;
import org.apache.calcite.rex.RexBuilder;
import org.apache.calcite.rex.RexCall;
import org.apache.calcite.rex.RexInputRef;
@@ -62,7 +61,6 @@ import org.apache.hadoop.hive.common.type.TimestampTZ;
import org.apache.hadoop.hive.common.type.TimestampTZUtil;
import org.apache.hadoop.hive.ql.ErrorMsg;
import org.apache.hadoop.hive.ql.exec.ColumnInfo;
-import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.FunctionInfo;
import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException;
import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException.UnsupportedFeature;
@@ -76,8 +74,6 @@ import org.apache.hadoop.hive.ql.parse.QBSubQueryParseInfo;
import org.apache.hadoop.hive.ql.parse.RowResolver;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.plan.SubqueryType;
-import org.apache.hadoop.hive.ql.udf.SettableUDF;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
@@ -133,7 +129,7 @@ public class RexNodeExprFactory extends ExprFactory<RexNode> {
throw new CalciteSemanticException("Unexpected error: Cannot find column");
}
return rexBuilder.makeInputRef(
- TypeConverter.convert(colInfo.getType(), rexBuilder.getTypeFactory()), index + offset);
+ TypeConverter.convert(colInfo.getType(), colInfo.isNullable(), rexBuilder.getTypeFactory()), index + offset);
}
private static RexNode toPrimitiveConstDesc(
diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query45.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query45.q.out
index 47a3c35..b2de1c3 100644
--- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query45.q.out
+++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query45.q.out
@@ -1,21 +1,15 @@
-Warning: Map Join MAPJOIN[127][bigTable=?] in task 'Map 1' is a cross product
CBO PLAN:
HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100])
HiveProject(ca_zip=[$1], ca_county=[$0], $f2=[$2])
HiveAggregate(group=[{7, 8}], agg#0=[sum($2)])
- HiveFilter(condition=[OR(AND(<>($14, 0), IS NOT NULL($16)), IN(substr($8, 1, 5), _UTF-16LE'85669':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'86197':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'88274':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'83405':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'86475':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'85392':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'85460':VARCHAR(21 [...]
- HiveProject(ws_item_sk=[$6], ws_bill_customer_sk=[$7], ws_sales_price=[$8], ws_sold_date_sk=[$9], c_customer_sk=[$0], c_current_addr_sk=[$1], ca_address_sk=[$3], ca_county=[$4], ca_zip=[$5], d_date_sk=[$10], d_year=[$11], d_qoy=[$12], i_item_sk=[$13], i_item_id=[$14], c=[$2], i_item_id0=[$15], literalTrue=[$16])
- HiveJoin(condition=[=($6, $13)], joinType=[inner], algorithm=[none], cost=[not available])
- HiveJoin(condition=[=($7, $0)], joinType=[inner], algorithm=[none], cost=[not available])
- HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available])
- HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available])
- HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$4])
- HiveFilter(condition=[IS NOT NULL($4)])
- HiveTableScan(table=[[default, customer]], table:alias=[customer])
- HiveProject(c=[$0])
- HiveAggregate(group=[{}], c=[COUNT()])
- HiveFilter(condition=[IN($0, 2:BIGINT, 3:BIGINT, 5:BIGINT, 7:BIGINT, 11:BIGINT, 13:BIGINT, 17:BIGINT, 19:BIGINT, 23:BIGINT, 29:BIGINT)])
- HiveTableScan(table=[[default, item]], table:alias=[item])
+ HiveFilter(condition=[OR(IS NOT NULL($15), IN(substr($8, 1, 5), _UTF-16LE'85669':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'86197':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'88274':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'83405':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'86475':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'85392':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'85460':VARCHAR(2147483647) CHARACT [...]
+ HiveProject(ws_item_sk=[$5], ws_bill_customer_sk=[$6], ws_sales_price=[$7], ws_sold_date_sk=[$8], c_customer_sk=[$0], c_current_addr_sk=[$1], ca_address_sk=[$2], ca_county=[$3], ca_zip=[$4], d_date_sk=[$9], d_year=[$10], d_qoy=[$11], i_item_sk=[$12], i_item_id=[$13], i_item_id0=[$14], literalTrue=[$15])
+ HiveJoin(condition=[=($5, $12)], joinType=[inner], algorithm=[none], cost=[not available])
+ HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available])
+ HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available])
+ HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$4])
+ HiveFilter(condition=[IS NOT NULL($4)])
+ HiveTableScan(table=[[default, customer]], table:alias=[customer])
HiveProject(ca_address_sk=[$0], ca_county=[$7], ca_zip=[$9])
HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address])
HiveProject(ws_item_sk=[$0], ws_bill_customer_sk=[$1], ws_sales_price=[$2], ws_sold_date_sk=[$3], d_date_sk=[$4], d_year=[$5], d_qoy=[$6])
diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query45.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query45.q.out
index e75a25d..90e29e7 100644
--- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query45.q.out
+++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query45.q.out
@@ -1,4 +1,3 @@
-Warning: Map Join MAPJOIN[127][bigTable=?] in task 'Map 1' is a cross product
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
@@ -8,15 +7,13 @@ STAGE PLANS:
Tez
#### A masked pattern was here ####
Edges:
- Map 1 <- Reducer 4 (BROADCAST_EDGE)
- Map 3 <- Reducer 12 (BROADCAST_EDGE)
- Map 6 <- Map 10 (BROADCAST_EDGE)
- Reducer 12 <- Map 11 (SIMPLE_EDGE)
- Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 5 (CUSTOM_SIMPLE_EDGE)
- Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE)
- Reducer 7 <- Map 3 (BROADCAST_EDGE), Map 6 (CUSTOM_SIMPLE_EDGE), Reducer 2 (CUSTOM_SIMPLE_EDGE)
- Reducer 8 <- Reducer 7 (SIMPLE_EDGE)
- Reducer 9 <- Reducer 8 (SIMPLE_EDGE)
+ Map 4 <- Map 8 (BROADCAST_EDGE)
+ Map 9 <- Reducer 11 (BROADCAST_EDGE)
+ Reducer 11 <- Map 10 (SIMPLE_EDGE)
+ Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE)
+ Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE), Map 9 (BROADCAST_EDGE), Reducer 2 (CUSTOM_SIMPLE_EDGE)
+ Reducer 6 <- Reducer 5 (SIMPLE_EDGE)
+ Reducer 7 <- Reducer 6 (SIMPLE_EDGE)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -32,63 +29,16 @@ STAGE PLANS:
expressions: c_customer_sk (type: bigint), c_current_addr_sk (type: bigint)
outputColumnNames: _col0, _col1
Statistics: Num rows: 80000000 Data size: 1280000000 Basic stats: COMPLETE Column stats: COMPLETE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0
- 1
- outputColumnNames: _col0, _col1, _col2
- input vertices:
- 1 Reducer 4
- Statistics: Num rows: 80000000 Data size: 1920000000 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col1 (type: bigint)
- null sort order: z
- sort order: +
- Map-reduce partition columns: _col1 (type: bigint)
- Statistics: Num rows: 80000000 Data size: 1920000000 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: bigint), _col2 (type: bigint)
- Execution mode: vectorized, llap
- LLAP IO: may be used (ACID table)
- Map 10
- Map Operator Tree:
- TableScan
- alias: date_dim
- filterExpr: ((d_year = 2000) and (d_qoy = 2)) (type: boolean)
- Statistics: Num rows: 73049 Data size: 1168784 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: ((d_year = 2000) and (d_qoy = 2)) (type: boolean)
- Statistics: Num rows: 92 Data size: 1472 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: d_date_sk (type: bigint)
- outputColumnNames: _col0
- Statistics: Num rows: 92 Data size: 736 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
- key expressions: _col0 (type: bigint)
+ key expressions: _col1 (type: bigint)
null sort order: z
sort order: +
- Map-reduce partition columns: _col0 (type: bigint)
- Statistics: Num rows: 92 Data size: 736 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- expressions: _col0 (type: bigint)
- outputColumnNames: _col0
- Statistics: Num rows: 92 Data size: 736 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- keys: _col0 (type: bigint)
- minReductionHashAggr: 0.4
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 92 Data size: 736 Basic stats: COMPLETE Column stats: COMPLETE
- Dynamic Partitioning Event Operator
- Target column: ws_sold_date_sk (bigint)
- Target Input: web_sales
- Partition key expr: ws_sold_date_sk
- Statistics: Num rows: 92 Data size: 736 Basic stats: COMPLETE Column stats: COMPLETE
- Target Vertex: Map 6
+ Map-reduce partition columns: _col1 (type: bigint)
+ Statistics: Num rows: 80000000 Data size: 1280000000 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint)
Execution mode: vectorized, llap
LLAP IO: may be used (ACID table)
- Map 11
+ Map 10
Map Operator Tree:
TableScan
alias: item
@@ -118,50 +68,6 @@ STAGE PLANS:
Map 3
Map Operator Tree:
TableScan
- alias: item
- Statistics: Num rows: 462000 Data size: 3696000 Basic stats: COMPLETE Column stats: COMPLETE
- Filter Operator
- predicate: (i_item_sk) IN (2L, 3L, 5L, 7L, 11L, 13L, 17L, 19L, 23L, 29L) (type: boolean)
- Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE
- Select Operator
- Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: count()
- minReductionHashAggr: 0.9
- mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- null sort order:
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: bigint)
- Select Operator
- expressions: i_item_sk (type: bigint), i_item_id (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 462000 Data size: 49896000 Basic stats: COMPLETE Column stats: COMPLETE
- Map Join Operator
- condition map:
- Left Outer Join 0 to 1
- keys:
- 0 _col1 (type: string)
- 1 _col0 (type: string)
- outputColumnNames: _col0, _col3
- input vertices:
- 1 Reducer 12
- Statistics: Num rows: 462018 Data size: 3696220 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: bigint)
- null sort order: z
- sort order: +
- Map-reduce partition columns: _col0 (type: bigint)
- Statistics: Num rows: 462018 Data size: 3696220 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col3 (type: boolean)
- Execution mode: vectorized, llap
- LLAP IO: may be used (ACID table)
- Map 5
- Map Operator Tree:
- TableScan
alias: customer_address
Statistics: Num rows: 40000000 Data size: 7800000000 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
@@ -177,7 +83,7 @@ STAGE PLANS:
value expressions: _col1 (type: varchar(30)), _col2 (type: char(10))
Execution mode: vectorized, llap
LLAP IO: may be used (ACID table)
- Map 6
+ Map 4
Map Operator Tree:
TableScan
alias: web_sales
@@ -198,7 +104,7 @@ STAGE PLANS:
1 _col0 (type: bigint)
outputColumnNames: _col0, _col1, _col2
input vertices:
- 1 Map 10
+ 1 Map 8
Statistics: Num rows: 1087859571 Data size: 138922052728 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col1 (type: bigint)
@@ -209,7 +115,72 @@ STAGE PLANS:
value expressions: _col0 (type: bigint), _col2 (type: decimal(7,2))
Execution mode: vectorized, llap
LLAP IO: may be used (ACID table)
- Reducer 12
+ Map 8
+ Map Operator Tree:
+ TableScan
+ alias: date_dim
+ filterExpr: ((d_year = 2000) and (d_qoy = 2)) (type: boolean)
+ Statistics: Num rows: 73049 Data size: 1168784 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: ((d_year = 2000) and (d_qoy = 2)) (type: boolean)
+ Statistics: Num rows: 92 Data size: 1472 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: d_date_sk (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 92 Data size: 736 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: bigint)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: bigint)
+ Statistics: Num rows: 92 Data size: 736 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 92 Data size: 736 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: _col0 (type: bigint)
+ minReductionHashAggr: 0.4
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 92 Data size: 736 Basic stats: COMPLETE Column stats: COMPLETE
+ Dynamic Partitioning Event Operator
+ Target column: ws_sold_date_sk (bigint)
+ Target Input: web_sales
+ Partition key expr: ws_sold_date_sk
+ Statistics: Num rows: 92 Data size: 736 Basic stats: COMPLETE Column stats: COMPLETE
+ Target Vertex: Map 4
+ Execution mode: vectorized, llap
+ LLAP IO: may be used (ACID table)
+ Map 9
+ Map Operator Tree:
+ TableScan
+ alias: item
+ Statistics: Num rows: 462000 Data size: 49896000 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: i_item_sk (type: bigint), i_item_id (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 462000 Data size: 49896000 Basic stats: COMPLETE Column stats: COMPLETE
+ Map Join Operator
+ condition map:
+ Left Outer Join 0 to 1
+ keys:
+ 0 _col1 (type: string)
+ 1 _col0 (type: string)
+ outputColumnNames: _col0, _col3
+ input vertices:
+ 1 Reducer 11
+ Statistics: Num rows: 462018 Data size: 3696220 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: bigint)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: bigint)
+ Statistics: Num rows: 462018 Data size: 3696220 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col3 (type: boolean)
+ Execution mode: vectorized, llap
+ LLAP IO: may be used (ACID table)
+ Reducer 11
Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
@@ -237,32 +208,19 @@ STAGE PLANS:
keys:
0 KEY.reducesinkkey0 (type: bigint)
1 KEY.reducesinkkey0 (type: bigint)
- outputColumnNames: _col0, _col2, _col4, _col5
+ outputColumnNames: _col0, _col3, _col4
input vertices:
- 1 Map 5
- Statistics: Num rows: 80000000 Data size: 16240000000 Basic stats: COMPLETE Column stats: COMPLETE
+ 1 Map 3
+ Statistics: Num rows: 80000000 Data size: 15600000000 Basic stats: COMPLETE Column stats: COMPLETE
DynamicPartitionHashJoin: true
Reduce Output Operator
key expressions: _col0 (type: bigint)
null sort order: z
sort order: +
Map-reduce partition columns: _col0 (type: bigint)
- Statistics: Num rows: 80000000 Data size: 16240000000 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col2 (type: bigint), _col4 (type: varchar(30)), _col5 (type: char(10))
- Reducer 4
- Execution mode: vectorized, llap
- Reduce Operator Tree:
- Group By Operator
- aggregations: count(VALUE._col0)
- mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- null sort order:
- sort order:
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: bigint)
- Reducer 7
+ Statistics: Num rows: 80000000 Data size: 15600000000 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col3 (type: varchar(30)), _col4 (type: char(10))
+ Reducer 5
Execution mode: vectorized, llap
Reduce Operator Tree:
Map Join Operator
@@ -271,38 +229,38 @@ STAGE PLANS:
keys:
0 KEY.reducesinkkey0 (type: bigint)
1 KEY.reducesinkkey0 (type: bigint)
- outputColumnNames: _col2, _col4, _col5, _col6, _col8
+ outputColumnNames: _col3, _col4, _col5, _col7
input vertices:
0 Reducer 2
- Statistics: Num rows: 1087859571 Data size: 342373338881 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1087859571 Data size: 333670462313 Basic stats: COMPLETE Column stats: COMPLETE
DynamicPartitionHashJoin: true
Map Join Operator
condition map:
Inner Join 0 to 1
keys:
- 0 _col6 (type: bigint)
+ 0 _col5 (type: bigint)
1 _col0 (type: bigint)
- outputColumnNames: _col2, _col4, _col5, _col8, _col16
+ outputColumnNames: _col3, _col4, _col7, _col15
input vertices:
- 1 Map 3
- Statistics: Num rows: 1087859571 Data size: 338020052601 Basic stats: COMPLETE Column stats: COMPLETE
+ 1 Map 9
+ Statistics: Num rows: 1087859571 Data size: 329317176033 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
- expressions: _col8 (type: decimal(7,2)), _col4 (type: varchar(30)), _col5 (type: char(10)), _col2 (type: bigint), _col16 (type: boolean)
- outputColumnNames: _col2, _col7, _col8, _col14, _col16
- Statistics: Num rows: 1087859571 Data size: 338020052601 Basic stats: COMPLETE Column stats: COMPLETE
+ expressions: _col7 (type: decimal(7,2)), _col3 (type: varchar(30)), _col4 (type: char(10)), _col15 (type: boolean)
+ outputColumnNames: _col2, _col7, _col8, _col15
+ Statistics: Num rows: 1087859571 Data size: 329317176033 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
- predicate: (((_col14 <> 0L) and _col16 is not null) or (substr(_col8, 1, 5)) IN ('85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', '81792')) (type: boolean)
- Statistics: Num rows: 1087859571 Data size: 338020052601 Basic stats: COMPLETE Column stats: COMPLETE
+ predicate: (_col15 is not null or (substr(_col8, 1, 5)) IN ('85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', '81792')) (type: boolean)
+ Statistics: Num rows: 1087859571 Data size: 329317176033 Basic stats: COMPLETE Column stats: COMPLETE
Top N Key Operator
sort order: ++
keys: _col8 (type: char(10)), _col7 (type: varchar(30))
null sort order: zz
- Statistics: Num rows: 1087859571 Data size: 338020052601 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1087859571 Data size: 329317176033 Basic stats: COMPLETE Column stats: COMPLETE
top n: 100
Select Operator
expressions: _col2 (type: decimal(7,2)), _col7 (type: varchar(30)), _col8 (type: char(10))
outputColumnNames: _col2, _col7, _col8
- Statistics: Num rows: 1087859571 Data size: 338020052601 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1087859571 Data size: 329317176033 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: sum(_col2)
keys: _col8 (type: char(10)), _col7 (type: varchar(30))
@@ -317,7 +275,7 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: char(10)), _col1 (type: varchar(30))
Statistics: Num rows: 1087859571 Data size: 325270011729 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col2 (type: decimal(17,2))
- Reducer 8
+ Reducer 6
Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
@@ -332,7 +290,7 @@ STAGE PLANS:
sort order: ++
Statistics: Num rows: 18408340 Data size: 5504093660 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col2 (type: decimal(17,2))
- Reducer 9
+ Reducer 7
Execution mode: vectorized, llap
Reduce Operator Tree:
Select Operator