You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2018/02/14 01:09:09 UTC
[6/6] impala git commit: IMPALA-6392: Consistent explain format for
parquet predicate statistics
IMPALA-6392: Consistent explain format for parquet predicate statistics
In EXPLAIN_LEVEL=2+, change the explain format for parquet predicate
statistics to output each tuple descriptor per line. This change is to
make it consistent with the output of other predicates.
Before:
parquet statistics predicates: c_custkey < 10, o_orderkey < 5, l_linenumber < 3
After:
parquet statistics predicates: c_custkey < 10
parquet statistics predicates on o: o_orderkey < 5
parquet statistics predicates on o_lineitems: l_linenumber < 3
Testing:
- Ran existing planner tests and updated the ones that are affected by
this change.
- Ran end-to-end tests in query_test
Change-Id: Ia3d55ab6a1ae551867a9f68b3622844102cc854e
Reviewed-on: http://gerrit.cloudera.org:8080/9223
Tested-by: Impala Public Jenkins
Reviewed-by: Alex Behm <al...@cloudera.com>
Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/3d7d8209
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/3d7d8209
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/3d7d8209
Branch: refs/heads/2.x
Commit: 3d7d8209edf77216b8d990ea5b0eb6a16d06fc07
Parents: 1a632e7
Author: Fredy Wijaya <fw...@cloudera.com>
Authored: Tue Feb 6 01:05:14 2018 -0600
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Tue Feb 13 21:10:13 2018 +0000
----------------------------------------------------------------------
.../org/apache/impala/planner/HdfsScanNode.java | 46 +++++++++++++++-----
.../queries/PlannerTest/constant-folding.test | 3 +-
.../queries/PlannerTest/mt-dop-validation.test | 12 +++--
.../queries/PlannerTest/parquet-filtering.test | 8 ++--
4 files changed, 51 insertions(+), 18 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/impala/blob/3d7d8209/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java b/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
index 45ad8d6..7735f98 100644
--- a/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
+++ b/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
@@ -225,9 +225,10 @@ public class HdfsScanNode extends ScanNode {
// data when scanning Parquet files.
private final List<Expr> minMaxConjuncts_ = Lists.newArrayList();
- // List of PlanNode conjuncts that have been transformed into conjuncts in
- // 'minMaxConjuncts_'.
- private final List<Expr> minMaxOriginalConjuncts_ = Lists.newArrayList();
+ // Map from TupleDescriptor to list of PlanNode conjuncts that have been transformed
+ // into conjuncts in 'minMaxConjuncts_'.
+ private final Map<TupleDescriptor, List<Expr>> minMaxOriginalConjuncts_ =
+ Maps.newLinkedHashMap();
// Tuple that is used to materialize statistics when scanning Parquet files. For each
// column it can contain 0, 1, or 2 slots, depending on whether the column needs to be
@@ -470,10 +471,10 @@ public class HdfsScanNode extends ScanNode {
BinaryPredicate.Operator op = binaryPred.getOp();
if (op == BinaryPredicate.Operator.LT || op == BinaryPredicate.Operator.LE ||
op == BinaryPredicate.Operator.GE || op == BinaryPredicate.Operator.GT) {
- minMaxOriginalConjuncts_.add(binaryPred);
+ addMinMaxOriginalConjunct(slotRef.getDesc().getParent(), binaryPred);
buildStatsPredicate(analyzer, slotRef, binaryPred, op);
} else if (op == BinaryPredicate.Operator.EQ) {
- minMaxOriginalConjuncts_.add(binaryPred);
+ addMinMaxOriginalConjunct(slotRef.getDesc().getParent(), binaryPred);
// TODO: this could be optimized for boolean columns.
buildStatsPredicate(analyzer, slotRef, binaryPred, BinaryPredicate.Operator.LE);
buildStatsPredicate(analyzer, slotRef, binaryPred, BinaryPredicate.Operator.GE);
@@ -513,11 +514,20 @@ public class HdfsScanNode extends ScanNode {
BinaryPredicate maxBound = new BinaryPredicate(BinaryPredicate.Operator.LE,
children.get(0).clone(), max.clone());
- minMaxOriginalConjuncts_.add(inPred);
+ addMinMaxOriginalConjunct(slotRef.getDesc().getParent(), inPred);
buildStatsPredicate(analyzer, slotRef, minBound, minBound.getOp());
buildStatsPredicate(analyzer, slotRef, maxBound, maxBound.getOp());
}
+ private void addMinMaxOriginalConjunct(TupleDescriptor tupleDesc, Expr expr) {
+ List<Expr> exprs = minMaxOriginalConjuncts_.get(tupleDesc);
+ if (exprs == null) {
+ exprs = new ArrayList<Expr>();
+ minMaxOriginalConjuncts_.put(tupleDesc, exprs);
+ }
+ exprs.add(expr);
+ }
+
private void tryComputeMinMaxPredicate(Analyzer analyzer, Expr pred) {
if (pred instanceof BinaryPredicate) {
tryComputeBinaryMinMaxPredicate(analyzer, (BinaryPredicate) pred);
@@ -1080,16 +1090,32 @@ public class HdfsScanNode extends ScanNode {
numPartitionsNoDiskIds_, numPartitions_, numFilesNoDiskIds_,
totalFiles_, numScanRangesNoDiskIds_, scanRanges_.size()));
}
- if (!minMaxOriginalConjuncts_.isEmpty()) {
- output.append(String.format("%sparquet statistics predicates: %s\n",
- detailPrefix, getExplainString(minMaxOriginalConjuncts_)));
- }
+ // Groups the min max original conjuncts by tuple descriptor.
+ output.append(getMinMaxOriginalConjunctsExplainString(detailPrefix));
// Groups the dictionary filterable conjuncts by tuple descriptor.
output.append(getDictionaryConjunctsExplainString(detailPrefix));
}
return output.toString();
}
+ // Helper method that prints min max original conjuncts by tuple descriptor.
+ private String getMinMaxOriginalConjunctsExplainString(String prefix) {
+ StringBuilder output = new StringBuilder();
+ for (Map.Entry<TupleDescriptor, List<Expr>> entry :
+ minMaxOriginalConjuncts_.entrySet()) {
+ TupleDescriptor tupleDesc = entry.getKey();
+ List<Expr> exprs = entry.getValue();
+ if (tupleDesc == getTupleDesc()) {
+ output.append(String.format("%sparquet statistics predicates: %s\n", prefix,
+ getExplainString(exprs)));
+ } else {
+ output.append(String.format("%sparquet statistics predicates on %s: %s\n",
+ prefix, tupleDesc.getAlias(), getExplainString(exprs)));
+ }
+ }
+ return output.toString();
+ }
+
// Helper method that prints the dictionary filterable conjuncts by tuple descriptor.
private String getDictionaryConjunctsExplainString(String prefix) {
StringBuilder output = new StringBuilder();
http://git-wip-us.apache.org/repos/asf/impala/blob/3d7d8209/testdata/workloads/functional-planner/queries/PlannerTest/constant-folding.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/constant-folding.test b/testdata/workloads/functional-planner/queries/PlannerTest/constant-folding.test
index 2b2d5ef..f25ad0a 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/constant-folding.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/constant-folding.test
@@ -54,7 +54,8 @@ PLAN-ROOT SINK
table: rows=150000 size=292.36MB
columns missing stats: c_orders
extrapolated-rows=disabled
- parquet statistics predicates: c_custkey > 10, o_orderkey = 4
+ parquet statistics predicates: c_custkey > 10
+ parquet statistics predicates on o: o_orderkey = 4
parquet dictionary predicates: c_custkey > 10
parquet dictionary predicates on o: o_orderkey = 4
parquet dictionary predicates on o_lineitems: 20 + l_linenumber < 0
http://git-wip-us.apache.org/repos/asf/impala/blob/3d7d8209/testdata/workloads/functional-planner/queries/PlannerTest/mt-dop-validation.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/mt-dop-validation.test b/testdata/workloads/functional-planner/queries/PlannerTest/mt-dop-validation.test
index f3a46de..61d646b 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/mt-dop-validation.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/mt-dop-validation.test
@@ -251,7 +251,9 @@ PLAN-ROOT SINK
table: rows=150000 size=292.36MB
columns missing stats: c_orders
extrapolated-rows=disabled
- parquet statistics predicates: c_custkey < 10, o_orderkey < 5, l_linenumber < 3
+ parquet statistics predicates: c_custkey < 10
+ parquet statistics predicates on o: o_orderkey < 5
+ parquet statistics predicates on o_lineitems: l_linenumber < 3
parquet dictionary predicates: c_custkey < 10
parquet dictionary predicates on o: o_orderkey < 5
parquet dictionary predicates on o_lineitems: l_linenumber < 3
@@ -314,7 +316,9 @@ Per-Host Resources: mem-estimate=264.00MB mem-reservation=0B
table: rows=150000 size=292.36MB
columns missing stats: c_orders
extrapolated-rows=disabled
- parquet statistics predicates: c_custkey < 10, o_orderkey < 5, l_linenumber < 3
+ parquet statistics predicates: c_custkey < 10
+ parquet statistics predicates on o: o_orderkey < 5
+ parquet statistics predicates on o_lineitems: l_linenumber < 3
parquet dictionary predicates: c_custkey < 10
parquet dictionary predicates on o: o_orderkey < 5
parquet dictionary predicates on o_lineitems: l_linenumber < 3
@@ -368,7 +372,7 @@ PLAN-ROOT SINK
table: rows=150000 size=292.36MB
columns missing stats: c_orders, c_orders
extrapolated-rows=disabled
- parquet statistics predicates: o1.o_orderkey < 5
+ parquet statistics predicates on o1: o1.o_orderkey < 5
parquet dictionary predicates on o1: o1.o_orderkey < 5
mem-estimate=88.00MB mem-reservation=0B
tuple-ids=0 row-size=270B cardinality=150000
@@ -421,7 +425,7 @@ Per-Host Resources: mem-estimate=269.81MB mem-reservation=5.81MB
table: rows=150000 size=292.36MB
columns missing stats: c_orders, c_orders
extrapolated-rows=disabled
- parquet statistics predicates: o1.o_orderkey < 5
+ parquet statistics predicates on o1: o1.o_orderkey < 5
parquet dictionary predicates on o1: o1.o_orderkey < 5
mem-estimate=88.00MB mem-reservation=0B
tuple-ids=0 row-size=270B cardinality=150000
http://git-wip-us.apache.org/repos/asf/impala/blob/3d7d8209/testdata/workloads/functional-planner/queries/PlannerTest/parquet-filtering.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/parquet-filtering.test b/testdata/workloads/functional-planner/queries/PlannerTest/parquet-filtering.test
index e7dee4e..2b602c9 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/parquet-filtering.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/parquet-filtering.test
@@ -150,7 +150,7 @@ PLAN-ROOT SINK
table: rows=unavailable size=unavailable
columns missing stats: id
extrapolated-rows=disabled
- parquet statistics predicates: a.item.e < -10
+ parquet statistics predicates on a: a.item.e < -10
parquet dictionary predicates on a: a.item.e < -10
mem-estimate=32.00MB mem-reservation=0B
tuple-ids=0 row-size=24B cardinality=unavailable
@@ -327,7 +327,9 @@ PLAN-ROOT SINK
table: rows=150000 size=292.36MB
columns missing stats: c_orders
extrapolated-rows=disabled
- parquet statistics predicates: c_custkey > 0, o.o_orderkey > 0, l.l_partkey > 0
+ parquet statistics predicates: c_custkey > 0
+ parquet statistics predicates on o: o.o_orderkey > 0
+ parquet statistics predicates on l: l.l_partkey > 0
parquet dictionary predicates: c_custkey > 0
parquet dictionary predicates on o: o.o_orderkey > 0
parquet dictionary predicates on l: l.l_partkey > 0
@@ -435,7 +437,7 @@ PLAN-ROOT SINK
table: rows=150000 size=292.36MB
columns missing stats: c_orders
extrapolated-rows=disabled
- parquet statistics predicates: l.l_shipdate = '1994-08-19', l.l_receiptdate = '1994-08-24', l.l_shipmode = 'RAIL', l.l_returnflag = 'R'
+ parquet statistics predicates on l: l.l_shipdate = '1994-08-19', l.l_receiptdate = '1994-08-24', l.l_shipmode = 'RAIL', l.l_returnflag = 'R'
parquet dictionary predicates on l: l.l_shipdate = '1994-08-19', l.l_receiptdate = '1994-08-24', l.l_shipmode = 'RAIL', l.l_returnflag = 'R'
mem-estimate=176.00MB mem-reservation=0B
tuple-ids=0 row-size=50B cardinality=150000