You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2018/08/08 22:04:50 UTC
[2/2] impala git commit: Switch a couple of lists to deques in AnalyticEvalNode

Switch a couple of lists to deques in AnalyticEvalNode

I noticed this inefficiency while looking at this code.

std::deque generally offers better performance because it does fewer
memory allocations and has better memory locality. The main advantages
of std::list are O(1) insert/delete in the middle of the list and
stable iterators that remain valid through list modifications,
but neither property is useful for these result_tuples_ and
window_tuples_ because we only append at the back, remove from the front
and iterate over the whole list at once.

Change-Id: Iaa716dcf241a0a9e4f5221e5cf7a1596c75aecc0
Reviewed-on: http://gerrit.cloudera.org:8080/11159
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/3c9fef2a
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/3c9fef2a
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/3c9fef2a

Branch: refs/heads/master
Commit: 3c9fef2aeebcf65c8b87f6e9abe744ab89198828
Parents: b9d44af
Author: Tim Armstrong <ta...@cloudera.com>
Authored: Tue Aug 7 11:58:07 2018 -0700
Committer: Impala Public Jenkins <im...@cloudera.com>
Committed: Wed Aug 8 22:03:07 2018 +0000

----------------------------------------------------------------------
 be/src/exec/analytic-eval-node.cc | 22 ++++++++++------------
 be/src/exec/analytic-eval-node.h  |  5 +++--
 2 files changed, 13 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/3c9fef2a/be/src/exec/analytic-eval-node.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/analytic-eval-node.cc b/be/src/exec/analytic-eval-node.cc
index f51a5b5..68170a2 100644
--- a/be/src/exec/analytic-eval-node.cc
+++ b/be/src/exec/analytic-eval-node.cc
@@ -295,18 +295,16 @@ string AnalyticEvalNode::DebugStateString(bool detailed = false) const {
      << " last_result_idx=" << last_result_idx_;
   if (detailed) {
     ss << " result_tuples idx: [";
-    for (list<pair<int64_t, Tuple*>>::const_iterator it = result_tuples_.begin();
-        it != result_tuples_.end(); ++it) {
-      ss << it->first;
-      if (*it != result_tuples_.back()) ss << ", ";
+    for (const pair<int64_t, Tuple*>& result_tuple : result_tuples_) {
+      ss << result_tuple.first;
+      if (&result_tuple != &result_tuples_.back()) ss << ", ";
     }
     ss << "]";
     if (fn_scope_ == ROWS && window_.__isset.window_start) {
       ss << " window_tuples idx: [";
-      for (list<pair<int64_t, Tuple*>>::const_iterator it = window_tuples_.begin();
-          it != window_tuples_.end(); ++it) {
-        ss << it->first;
-        if (*it != window_tuples_.back()) ss << ", ";
+    for (const pair<int64_t, Tuple*>& window_tuple : window_tuples_) {
+        ss << window_tuple.first;
+        if (&window_tuple != &window_tuples_.back()) ss << ", ";
       }
       ss << "]";
     }
@@ -338,7 +336,7 @@ inline Status AnalyticEvalNode::AddRow(int64_t stream_idx, TupleRow* row) {
       VLOG_ROW << id() << " Adding tuple to window at idx=" << stream_idx;
       Tuple* tuple = row->GetTuple(0)->DeepCopy(
           *child(0)->row_desc()->tuple_descriptors()[0], curr_tuple_pool_.get());
-      window_tuples_.push_back(pair<int64_t, Tuple*>(stream_idx, tuple));
+      window_tuples_.emplace_back(stream_idx, tuple);
     }
   }
 
@@ -388,7 +386,7 @@ Status AnalyticEvalNode::AddResultTuple(int64_t stream_idx) {
   }
 
   DCHECK_GT(stream_idx, last_result_idx_);
-  result_tuples_.push_back(pair<int64_t, Tuple*>(stream_idx, result_tuple));
+  result_tuples_.emplace_back(stream_idx, result_tuple);
   last_result_idx_ = stream_idx;
   VLOG_ROW << id() << " Added result tuple, final state: " << DebugStateString(true);
   return Status::OK();
@@ -520,8 +518,8 @@ inline Status AnalyticEvalNode::InitNextPartition(RuntimeState* state,
       // prev_partition_last_result_tuple was the last result tuple in the partition, add
       // it back with the index of the last row in the partition so that all output rows
       // in this partition get the correct value.
-      result_tuples_.push_back(pair<int64_t, Tuple*>(curr_partition_idx_ - 1,
-          prev_partition_last_result_tuple));
+      result_tuples_.emplace_back(
+          curr_partition_idx_ - 1, prev_partition_last_result_tuple);
     }
     DCHECK(!result_tuples_.empty());
     last_result_idx_ = result_tuples_.back().first;

http://git-wip-us.apache.org/repos/asf/impala/blob/3c9fef2a/be/src/exec/analytic-eval-node.h
----------------------------------------------------------------------
diff --git a/be/src/exec/analytic-eval-node.h b/be/src/exec/analytic-eval-node.h
index bf15ebf..696969a 100644
--- a/be/src/exec/analytic-eval-node.h
+++ b/be/src/exec/analytic-eval-node.h
@@ -18,6 +18,7 @@
 #ifndef IMPALA_EXEC_ANALYTIC_EVAL_NODE_H
 #define IMPALA_EXEC_ANALYTIC_EVAL_NODE_H
 
+#include <deque>
 #include <memory>
 
 #include "exec/exec-node.h"
@@ -274,7 +275,7 @@ class AnalyticEvalNode : public ExecNode {
   /// output row and result_tuples_.size() may be one less than the row batch size, in
   /// which case we will process another input row batch (inserting one result tuple per
   /// input row) before returning a row batch.
-  std::list<std::pair<int64_t, Tuple*>> result_tuples_;
+  std::deque<std::pair<int64_t, Tuple*>> result_tuples_;
 
   /// Index in input_stream_ of the most recently added result tuple.
   int64_t last_result_idx_;
@@ -284,7 +285,7 @@ class AnalyticEvalNode : public ExecNode {
   /// or FOLLOWING. Tuples in this list are deep copied and owned by
   /// curr_window_tuple_pool_.
   /// TODO: Remove and use BufferedTupleStream (needs support for multiple readers).
-  std::list<std::pair<int64_t, Tuple*>> window_tuples_;
+  std::deque<std::pair<int64_t, Tuple*>> window_tuples_;
 
   /// The index of the last row from input_stream_ associated with output row containing
   /// resources in prev_tuple_pool_. -1 when the pool is empty. Resources from