You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by he...@apache.org on 2016/12/09 02:52:57 UTC
[1/3] incubator-impala git commit: IMPALA-4608: Fix fragment
completion times for INSERTs
Repository: incubator-impala
Updated Branches:
refs/heads/master 02b5cce84 -> 1762dd1b8
IMPALA-4608: Fix fragment completion times for INSERTs
Fix a bug where completion times weren't computed if the query was an
INSERT, because the code presumed that instance 0 was always the
coordinator fragment and skipped completion time computation.
It may be that the special-casing can be removed entirely, but needs
further investigation to make sure that wouldn't trigger any div-by-0 bugs.
Change-Id: I3ce56f70d30c9e398b14b32520c815d87f81f893
Reviewed-on: http://gerrit.cloudera.org:8080/5418
Reviewed-by: Henry Robinson <he...@cloudera.com>
Tested-by: Impala Public Jenkins
Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/cc57a229
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/cc57a229
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/cc57a229
Branch: refs/heads/master
Commit: cc57a229fdc7ad9fc6b3b6f2cfa3a64f7aacbd10
Parents: 02b5cce
Author: Henry Robinson <he...@cloudera.com>
Authored: Tue Dec 6 11:40:42 2016 -0800
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Fri Dec 9 01:33:09 2016 +0000
----------------------------------------------------------------------
be/src/runtime/coordinator.cc | 15 ++++++---------
1 file changed, 6 insertions(+), 9 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/cc57a229/be/src/runtime/coordinator.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/coordinator.cc b/be/src/runtime/coordinator.cc
index 2b446df..651d349 100644
--- a/be/src/runtime/coordinator.cc
+++ b/be/src/runtime/coordinator.cc
@@ -1668,17 +1668,14 @@ void Coordinator::ReportQuerySummary() {
if (!fragment_instance_states_.empty()) {
// Average all fragment instances for each fragment.
for (FragmentInstanceState* state: fragment_instance_states_) {
- // TODO: make profiles uniform across all fragments so we don't have
- // to keep special-casing the coord fragment
- if (state->fragment_idx() == 0) {
- state->profile()->ComputeTimeInProfile();
- UpdateExecSummary(*state);
- } else {
- state->profile()->ComputeTimeInProfile();
- UpdateAverageProfile(state);
+ state->profile()->ComputeTimeInProfile();
+ UpdateAverageProfile(state);
+ // Skip coordinator fragment, if one exists.
+ // TODO: Can we remove the special casing here?
+ if (executor_ == nullptr || state->fragment_idx() != 0) {
ComputeFragmentSummaryStats(state);
- UpdateExecSummary(*state);
}
+ UpdateExecSummary(*state);
}
InstanceComparator comparator;
[2/3] incubator-impala git commit: IMPALA-3126: Conservative
assignment of inner-join On-clause predicates.
Posted by he...@apache.org.
IMPALA-3126: Conservative assignment of inner-join On-clause predicates.
Implements the following conservative but correct policy for assigning
predicates from the On-clause of an inner join:
If the predicate references an outer-joined tuple, then evaluate it at
the inner join that the On-clause belongs to.
Cleans up Analyzer.canEvalPredicate().
Change-Id: Idf45323ed9102ffb45c9d94a130ea3692286f215
Reviewed-on: http://gerrit.cloudera.org:8080/4982
Reviewed-by: Alex Behm <al...@cloudera.com>
Tested-by: Internal Jenkins
Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/80f85179
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/80f85179
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/80f85179
Branch: refs/heads/master
Commit: 80f85179f99ff36d6ecad65b2041b45015ffb716
Parents: cc57a22
Author: Alex Behm <al...@cloudera.com>
Authored: Mon Nov 7 14:15:45 2016 -0800
Committer: Internal Jenkins <cl...@gerrit.cloudera.org>
Committed: Fri Dec 9 02:12:46 2016 +0000
----------------------------------------------------------------------
.../org/apache/impala/analysis/Analyzer.java | 101 ++++++++++---------
.../queries/PlannerTest/outer-joins.test | 72 ++++++++++++-
2 files changed, 121 insertions(+), 52 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/80f85179/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
index 1e88862..61d1c20 100644
--- a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
+++ b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
@@ -1195,6 +1195,10 @@ public class Analyzer {
return globalState_.ijClauseByConjunct.containsKey(e.getId());
}
+ public boolean isSjConjunct(Expr e) {
+ return globalState_.sjClauseByConjunct.containsKey(e.getId());
+ }
+
public TableRef getFullOuterJoinRef(Expr e) {
return globalState_.fullOuterJoinedConjuncts.get(e.getId());
}
@@ -1353,12 +1357,24 @@ public class Analyzer {
/**
* Returns true if predicate 'e' can be correctly evaluated by a tree materializing
* 'tupleIds', otherwise false:
- * - the predicate needs to be bound by tupleIds
- * - an On clause predicate against the non-nullable side of an Outer Join clause
- * can only be correctly evaluated by the join node that materializes the
- * Outer Join clause
- * - otherwise, a predicate can only be correctly evaluated if for all outer-joined
- * referenced tids the last join to outer-join this tid has been materialized
+ * - The predicate needs to be bound by tupleIds.
+ * - For On-clause predicates:
+ * - If the predicate is from an anti-join On-clause it must be evaluated by the
+ * corresponding anti-join node.
+ * - Predicates from the On-clause of an inner or semi join are evaluated at the
+ * node that materializes the required tuple ids, unless they reference outer
+ * joined tuple ids. In that case, the predicates are evaluated at the join node
+ * of the corresponding On-clause.
+ * - Predicates referencing full-outer joined tuples are assigned at the originating
+ * join if it is a full-outer join, otherwise at the last full-outer join that does
+ * not materialize the table ref ids of the originating join.
+ * - Predicates from the On-clause of a left/right outer join are assigned at
+ * the corresponding outer join node with the exception of simple predicates
+ * that only reference a single tuple id. Those may be assigned below the
+ * outer join node if they are from the same On-clause that makes the tuple id
+ * nullable.
+ * - Otherwise, a predicate can only be correctly evaluated if for all outer-joined
+ * referenced tids the last join to outer-join this tid has been materialized.
*/
public boolean canEvalPredicate(List<TupleId> tupleIds, Expr e) {
if (!e.isBoundByTupleIds(tupleIds)) return false;
@@ -1367,58 +1383,43 @@ public class Analyzer {
if (tids.isEmpty()) return true;
if (e.isOnClauseConjunct()) {
- if (tids.size() > 1) {
- // If the conjunct is from the ON-clause of an anti join, check if we can
- // assign it to this node.
- if (isAntiJoinedConjunct(e)) return canEvalAntiJoinedConjunct(e, tupleIds);
- // bail if this is from an OJ On clause; the join node will pick
- // it up later via getUnassignedOjConjuncts()
- if (globalState_.ojClauseByConjunct.containsKey(e.getId())) return false;
- // If this is not from an OJ On clause (e.g. where clause or On clause of an
- // inner join) and is full-outer joined, we need to make sure it is not
- // assigned below the full outer join node that outer-joined it.
- return canEvalFullOuterJoinedConjunct(e, tupleIds);
+ if (isAntiJoinedConjunct(e)) return canEvalAntiJoinedConjunct(e, tupleIds);
+
+ if (isIjConjunct(e) || isSjConjunct(e)) {
+ if (!containsOuterJoinedTid(tids)) return true;
+ // If the predicate references an outer-joined tuple, then evaluate it at
+ // the join that the On-clause belongs to.
+ TableRef onClauseTableRef = null;
+ if (isIjConjunct(e)) {
+ onClauseTableRef = globalState_.ijClauseByConjunct.get(e.getId());
+ } else {
+ onClauseTableRef = globalState_.sjClauseByConjunct.get(e.getId());
+ }
+ Preconditions.checkNotNull(onClauseTableRef);
+ return tupleIds.containsAll(onClauseTableRef.getAllTableRefIds());
}
- TupleId tid = tids.get(0);
- if (globalState_.ojClauseByConjunct.containsKey(e.getId())) {
- // OJ On-clause predicate: okay if it's from
- // the same On clause that makes tid nullable
- // (otherwise e needn't be true when that tuple is set)
- if (!globalState_.outerJoinedTupleIds.containsKey(tid)) return false;
- if (globalState_.ojClauseByConjunct.get(e.getId())
- != globalState_.outerJoinedTupleIds.get(tid)) {
- return false;
- }
- // Single tuple id conjuncts specified in the FOJ On-clause are not allowed to be
- // assigned below that full outer join in the operator tree.
- TableRef tblRef = globalState_.ojClauseByConjunct.get(e.getId());
- if (tblRef.getJoinOp().isFullOuterJoin()) return false;
- } else {
- // Non-OJ On-clause conjunct.
- if (isOuterJoined(tid)) {
- // If the conjunct references an outer-joined tuple, then evaluate the
- // conjunct at the join that the On-clause belongs to.
- TableRef onClauseTableRef = globalState_.ijClauseByConjunct.get(e.getId());
- Preconditions.checkNotNull(onClauseTableRef);
- return tupleIds.containsAll(onClauseTableRef.getAllTableRefIds());
- }
- // If this single tid conjunct is from the On-clause of an anti-join, check if we
- // can assign it to this node.
- if (isAntiJoinedConjunct(e)) return canEvalAntiJoinedConjunct(e, tupleIds);
+ if (isFullOuterJoined(e)) return canEvalFullOuterJoinedConjunct(e, tupleIds);
+ if (isOjConjunct(e)) {
+ // Force this predicate to be evaluated by the corresponding outer join node.
+ // The join node will pick up the predicate later via getUnassignedOjConjuncts().
+ if (tids.size() > 1) return false;
+ // Optimization for single-tid predicates: Legal to assign below the outer join
+ // if the predicate is from the same On-clause that makes tid nullable
+ // (otherwise e needn't be true when that tuple is set).
+ TupleId tid = tids.get(0);
+ return globalState_.ojClauseByConjunct.get(e.getId()) == getLastOjClause(tid);
}
- // Single tid predicate that is not from an OJ On-clause and is outer-joined by a
- // full outer join cannot be assigned below that full outer join in the
- // operator tree.
- return canEvalFullOuterJoinedConjunct(e, tupleIds);
+
+ // Should have returned in one of the cases above.
+ Preconditions.checkState(false);
}
- if (isAntiJoinedConjunct(e)) return canEvalAntiJoinedConjunct(e, tupleIds);
for (TupleId tid: tids) {
TableRef rhsRef = getLastOjClause(tid);
- // this is not outer-joined; ignore
+ // Ignore 'tid' because it is not outer-joined.
if (rhsRef == null) continue;
- // check whether the last join to outer-join 'tid' is materialized by tupleIds
+ // Check whether the last join to outer-join 'tid' is materialized by tupleIds.
if (!tupleIds.containsAll(rhsRef.getAllTableRefIds())) return false;
}
return true;
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/80f85179/testdata/workloads/functional-planner/queries/PlannerTest/outer-joins.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/outer-joins.test b/testdata/workloads/functional-planner/queries/PlannerTest/outer-joins.test
index 95d16f8..5b82c2d 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/outer-joins.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/outer-joins.test
@@ -582,7 +582,7 @@ PLAN-ROOT SINK
|
05:HASH JOIN [INNER JOIN]
| hash predicates: b.smallint_col = c.smallint_col
-| other predicates: b.id < 10
+| other predicates: a.int_col < b.int_col, b.id < 10
| runtime filters: RF000 <- c.smallint_col
|
|--02:SCAN HDFS [functional.alltypes c]
@@ -590,7 +590,6 @@ PLAN-ROOT SINK
|
04:HASH JOIN [FULL OUTER JOIN]
| hash predicates: a.id = b.id
-| other predicates: a.int_col < b.int_col
|
|--01:SCAN HDFS [functional.alltypes b]
| partitions=24/24 files=24 size=478.45KB
@@ -948,3 +947,72 @@ PLAN-ROOT SINK
00:SCAN HDFS [functional.alltypes t1]
partitions=24/24 files=24 size=478.45KB
====
+# IMPALA-3126: Test assignment of an inner join On-clause predicate. The predicate
+# may not be assigned below the join materializing 'd'.
+select 1 from functional.alltypes a
+left outer join functional.alltypes b
+ on a.id = b.id
+right outer join functional.alltypes c
+ on b.id = c.id
+inner join functional.alltypes d
+ on a.int_col = b.int_col
+---- PLAN
+PLAN-ROOT SINK
+|
+06:NESTED LOOP JOIN [INNER JOIN]
+| predicates: a.int_col = b.int_col
+|
+|--03:SCAN HDFS [functional.alltypes d]
+| partitions=24/24 files=24 size=478.45KB
+|
+05:HASH JOIN [RIGHT OUTER JOIN]
+| hash predicates: b.id = c.id
+| runtime filters: RF000 <- c.id
+|
+|--02:SCAN HDFS [functional.alltypes c]
+| partitions=24/24 files=24 size=478.45KB
+|
+04:HASH JOIN [LEFT OUTER JOIN]
+| hash predicates: a.id = b.id
+|
+|--01:SCAN HDFS [functional.alltypes b]
+| partitions=24/24 files=24 size=478.45KB
+| runtime filters: RF000 -> b.id
+|
+00:SCAN HDFS [functional.alltypes a]
+ partitions=24/24 files=24 size=478.45KB
+====
+# IMPALA-3126: Same as above but with a semi join at the end.
+select 1 from functional.alltypes a
+left outer join functional.alltypes b
+ on a.id = b.id
+right outer join functional.alltypes c
+ on b.id = c.id
+left semi join functional.alltypes d
+ on a.int_col = b.int_col
+---- PLAN
+PLAN-ROOT SINK
+|
+06:NESTED LOOP JOIN [LEFT SEMI JOIN]
+| join predicates: a.int_col = b.int_col
+|
+|--03:SCAN HDFS [functional.alltypes d]
+| partitions=24/24 files=24 size=478.45KB
+|
+05:HASH JOIN [RIGHT OUTER JOIN]
+| hash predicates: b.id = c.id
+| runtime filters: RF000 <- c.id
+|
+|--02:SCAN HDFS [functional.alltypes c]
+| partitions=24/24 files=24 size=478.45KB
+|
+04:HASH JOIN [LEFT OUTER JOIN]
+| hash predicates: a.id = b.id
+|
+|--01:SCAN HDFS [functional.alltypes b]
+| partitions=24/24 files=24 size=478.45KB
+| runtime filters: RF000 -> b.id
+|
+00:SCAN HDFS [functional.alltypes a]
+ partitions=24/24 files=24 size=478.45KB
+====
[3/3] incubator-impala git commit: IMPALA-4609: prefix thread
counters in fragment profile
Posted by he...@apache.org.
IMPALA-4609: prefix thread counters in fragment profile
This adds a prefix to the counter names so that it's more obvious that
the thread counters are aggregates across all fragment threads.
Example output for "select * from tpch_parquet.lineitem where l_orderkey < 0":
Averaged Fragment F00:(Total: 102.949ms, non-child: 0.000ns, % non-child: 0.00%)
split sizes: min: 55.70 MB, max: 69.10 MB, avg: 64.54 MB, stddev: 6.25 MB
completion times: min:4s629ms max:5s542ms mean: 5s197ms stddev:404.911ms
execution rates: min:12.03 MB/sec max:12.69 MB/sec mean:12.40 MB/sec stddev:281.88 KB/sec
num instances: 3
- AverageThreadTokens: 2.00
- BloomFilterBytes: 0
- PeakMemoryUsage: 88.46 MB (92753209)
- PerHostPeakMemUsage: 88.46 MB (92755940)
- RowsProduced: 0 (0)
- TotalNetworkReceiveTime: 0.000ns
- TotalNetworkSendTime: 307.000ns
- TotalStorageWaitTime: 414.738ms
- TotalThreadsInvoluntaryContextSwitches: 62 (62)
- TotalThreadsTotalWallClockTime: 10s228ms
- TotalThreadsSysTime: 118.666ms
- TotalThreadsUserTime: 4s630ms
- TotalThreadsVoluntaryContextSwitches: 174 (174)
Change-Id: Icb8cfbddc758d06b25a14343310bfd9a932ad1f0
Reviewed-on: http://gerrit.cloudera.org:8080/5392
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
Tested-by: Impala Public Jenkins
Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/1762dd1b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/1762dd1b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/1762dd1b
Branch: refs/heads/master
Commit: 1762dd1b815b43d4507ce28e1879091dd2e4de57
Parents: 80f8517
Author: Tim Armstrong <ta...@cloudera.com>
Authored: Tue Dec 6 18:27:30 2016 -0800
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Fri Dec 9 02:19:41 2016 +0000
----------------------------------------------------------------------
be/src/runtime/runtime-state.cc | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1762dd1b/be/src/runtime/runtime-state.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/runtime-state.cc b/be/src/runtime/runtime-state.cc
index 7954f22..a0c3335 100644
--- a/be/src/runtime/runtime-state.cc
+++ b/be/src/runtime/runtime-state.cc
@@ -134,7 +134,7 @@ Status RuntimeState::Init(ExecEnv* exec_env) {
DCHECK(resource_pool_ != NULL);
}
- total_thread_statistics_ = ADD_THREAD_COUNTERS(runtime_profile(), "");
+ total_thread_statistics_ = ADD_THREAD_COUNTERS(runtime_profile(), "TotalThreads");
total_storage_wait_timer_ = ADD_TIMER(runtime_profile(), "TotalStorageWaitTime");
total_network_send_timer_ = ADD_TIMER(runtime_profile(), "TotalNetworkSendTime");
total_network_receive_timer_ = ADD_TIMER(runtime_profile(), "TotalNetworkReceiveTime");