You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2018/08/08 22:04:50 UTC
[2/2] impala git commit: Switch a couple of lists to deques in
AnalyticEvalNode
Switch a couple of lists to deques in AnalyticEvalNode
I noticed this inefficiency while looking at this code.
std::deque generally offers better performance because it does fewer
memory allocations and has better memory locality. The main advantages
of std::list are O(1) insert/delete in the middle of the list and
stable iterators that remain valid through list modifications,
but neither property is useful for these result_tuples_ and
window_tuples_ because we only append at the back, remove from the front
and iterate over the whole list at once.
Change-Id: Iaa716dcf241a0a9e4f5221e5cf7a1596c75aecc0
Reviewed-on: http://gerrit.cloudera.org:8080/11159
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/3c9fef2a
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/3c9fef2a
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/3c9fef2a
Branch: refs/heads/master
Commit: 3c9fef2aeebcf65c8b87f6e9abe744ab89198828
Parents: b9d44af
Author: Tim Armstrong <ta...@cloudera.com>
Authored: Tue Aug 7 11:58:07 2018 -0700
Committer: Impala Public Jenkins <im...@cloudera.com>
Committed: Wed Aug 8 22:03:07 2018 +0000
----------------------------------------------------------------------
be/src/exec/analytic-eval-node.cc | 22 ++++++++++------------
be/src/exec/analytic-eval-node.h | 5 +++--
2 files changed, 13 insertions(+), 14 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/impala/blob/3c9fef2a/be/src/exec/analytic-eval-node.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/analytic-eval-node.cc b/be/src/exec/analytic-eval-node.cc
index f51a5b5..68170a2 100644
--- a/be/src/exec/analytic-eval-node.cc
+++ b/be/src/exec/analytic-eval-node.cc
@@ -295,18 +295,16 @@ string AnalyticEvalNode::DebugStateString(bool detailed = false) const {
<< " last_result_idx=" << last_result_idx_;
if (detailed) {
ss << " result_tuples idx: [";
- for (list<pair<int64_t, Tuple*>>::const_iterator it = result_tuples_.begin();
- it != result_tuples_.end(); ++it) {
- ss << it->first;
- if (*it != result_tuples_.back()) ss << ", ";
+ for (const pair<int64_t, Tuple*>& result_tuple : result_tuples_) {
+ ss << result_tuple.first;
+ if (&result_tuple != &result_tuples_.back()) ss << ", ";
}
ss << "]";
if (fn_scope_ == ROWS && window_.__isset.window_start) {
ss << " window_tuples idx: [";
- for (list<pair<int64_t, Tuple*>>::const_iterator it = window_tuples_.begin();
- it != window_tuples_.end(); ++it) {
- ss << it->first;
- if (*it != window_tuples_.back()) ss << ", ";
+ for (const pair<int64_t, Tuple*>& window_tuple : window_tuples_) {
+ ss << window_tuple.first;
+ if (&window_tuple != &window_tuples_.back()) ss << ", ";
}
ss << "]";
}
@@ -338,7 +336,7 @@ inline Status AnalyticEvalNode::AddRow(int64_t stream_idx, TupleRow* row) {
VLOG_ROW << id() << " Adding tuple to window at idx=" << stream_idx;
Tuple* tuple = row->GetTuple(0)->DeepCopy(
*child(0)->row_desc()->tuple_descriptors()[0], curr_tuple_pool_.get());
- window_tuples_.push_back(pair<int64_t, Tuple*>(stream_idx, tuple));
+ window_tuples_.emplace_back(stream_idx, tuple);
}
}
@@ -388,7 +386,7 @@ Status AnalyticEvalNode::AddResultTuple(int64_t stream_idx) {
}
DCHECK_GT(stream_idx, last_result_idx_);
- result_tuples_.push_back(pair<int64_t, Tuple*>(stream_idx, result_tuple));
+ result_tuples_.emplace_back(stream_idx, result_tuple);
last_result_idx_ = stream_idx;
VLOG_ROW << id() << " Added result tuple, final state: " << DebugStateString(true);
return Status::OK();
@@ -520,8 +518,8 @@ inline Status AnalyticEvalNode::InitNextPartition(RuntimeState* state,
// prev_partition_last_result_tuple was the last result tuple in the partition, add
// it back with the index of the last row in the partition so that all output rows
// in this partition get the correct value.
- result_tuples_.push_back(pair<int64_t, Tuple*>(curr_partition_idx_ - 1,
- prev_partition_last_result_tuple));
+ result_tuples_.emplace_back(
+ curr_partition_idx_ - 1, prev_partition_last_result_tuple);
}
DCHECK(!result_tuples_.empty());
last_result_idx_ = result_tuples_.back().first;
http://git-wip-us.apache.org/repos/asf/impala/blob/3c9fef2a/be/src/exec/analytic-eval-node.h
----------------------------------------------------------------------
diff --git a/be/src/exec/analytic-eval-node.h b/be/src/exec/analytic-eval-node.h
index bf15ebf..696969a 100644
--- a/be/src/exec/analytic-eval-node.h
+++ b/be/src/exec/analytic-eval-node.h
@@ -18,6 +18,7 @@
#ifndef IMPALA_EXEC_ANALYTIC_EVAL_NODE_H
#define IMPALA_EXEC_ANALYTIC_EVAL_NODE_H
+#include <deque>
#include <memory>
#include "exec/exec-node.h"
@@ -274,7 +275,7 @@ class AnalyticEvalNode : public ExecNode {
/// output row and result_tuples_.size() may be one less than the row batch size, in
/// which case we will process another input row batch (inserting one result tuple per
/// input row) before returning a row batch.
- std::list<std::pair<int64_t, Tuple*>> result_tuples_;
+ std::deque<std::pair<int64_t, Tuple*>> result_tuples_;
/// Index in input_stream_ of the most recently added result tuple.
int64_t last_result_idx_;
@@ -284,7 +285,7 @@ class AnalyticEvalNode : public ExecNode {
/// or FOLLOWING. Tuples in this list are deep copied and owned by
/// curr_window_tuple_pool_.
/// TODO: Remove and use BufferedTupleStream (needs support for multiple readers).
- std::list<std::pair<int64_t, Tuple*>> window_tuples_;
+ std::deque<std::pair<int64_t, Tuple*>> window_tuples_;
/// The index of the last row from input_stream_ associated with output row containing
/// resources in prev_tuple_pool_. -1 when the pool is empty. Resources from