You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by GitBox <gi...@apache.org> on 2021/01/29 07:24:27 UTC

[GitHub] [incubator-doris] HappenLee commented on a change in pull request #5313: [optimization] use replace top instead of push pop in priority #5312

HappenLee commented on a change in pull request #5313:
URL: https://github.com/apache/incubator-doris/pull/5313#discussion_r566575619



##########
File path: be/src/exec/topn_node.cpp
##########
@@ -180,17 +177,18 @@ Status TopNNode::close(RuntimeState* state) {
 
 // Insert if either not at the limit or it's a new TopN tuple_row
 void TopNNode::insert_tuple_row(TupleRow* input_row) {
-    Tuple* insert_tuple = NULL;
-
     if (_priority_queue->size() < _offset + _limit) {
+        Tuple* insert_tuple = nullptr;
         insert_tuple = reinterpret_cast<Tuple*>(

Review comment:
       auto* insert_tuple

##########
File path: be/src/util/sort_heap.h
##########
@@ -0,0 +1,121 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+#include <algorithm>
+#include <cassert>
+#include <queue>
+#include <utility>
+
+#include "common/compiler_util.h"
+
+namespace doris {
+
+template <typename T, typename _Sequence, typename _Compare>
+class SortingHeap {
+public:
+    SortingHeap(const _Compare& comp) : _comp(comp) {}
+
+    bool is_valid() const { return !_queue.empty(); }
+
+    T& current() { return _queue.front(); }
+
+    size_t size() { return _queue.size(); }
+
+    bool empty() { return _queue.empty(); }
+
+    T& next_child() { return _queue[_next_child_index()]; }
+
+    void replace_top(T new_top) {
+        current() = new_top;
+        updateTop();
+    }
+
+    void remove_top() {
+        std::pop_heap(_queue.begin(), _queue.end(), _comp);
+        _queue.pop_back();
+        next_idx = 0;
+    }
+
+    void push(T cursor) {
+        _queue.emplace_back(cursor);
+        std::push_heap(_queue.begin(), _queue.end(), _comp);
+        next_idx = 0;
+    }
+
+    _Sequence&& sorted_seq() {
+        std::sort_heap(_queue.begin(), _queue.end(), _comp);
+        return std::move(_queue);
+    }
+
+private:
+    _Sequence _queue;
+    _Compare _comp;
+
+    /// Cache comparison between first and second child if the order in queue has not been changed.
+    size_t next_idx = 0;

Review comment:
       _next_idx 

##########
File path: be/src/util/sort_heap.h
##########
@@ -0,0 +1,121 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+#include <algorithm>
+#include <cassert>
+#include <queue>
+#include <utility>
+
+#include "common/compiler_util.h"
+
+namespace doris {
+
+template <typename T, typename _Sequence, typename _Compare>
+class SortingHeap {
+public:
+    SortingHeap(const _Compare& comp) : _comp(comp) {}
+
+    bool is_valid() const { return !_queue.empty(); }
+
+    T& current() { return _queue.front(); }
+
+    size_t size() { return _queue.size(); }
+
+    bool empty() { return _queue.empty(); }
+
+    T& next_child() { return _queue[_next_child_index()]; }
+
+    void replace_top(T new_top) {
+        current() = new_top;
+        updateTop();
+    }
+
+    void remove_top() {
+        std::pop_heap(_queue.begin(), _queue.end(), _comp);
+        _queue.pop_back();
+        next_idx = 0;
+    }
+
+    void push(T cursor) {
+        _queue.emplace_back(cursor);
+        std::push_heap(_queue.begin(), _queue.end(), _comp);
+        next_idx = 0;
+    }
+
+    _Sequence&& sorted_seq() {
+        std::sort_heap(_queue.begin(), _queue.end(), _comp);
+        return std::move(_queue);
+    }
+
+private:
+    _Sequence _queue;
+    _Compare _comp;
+
+    /// Cache comparison between first and second child if the order in queue has not been changed.
+    size_t next_idx = 0;
+
+    size_t _next_child_index() {
+        if (next_idx == 0) {
+            next_idx = 1;
+            if (_queue.size() > 2 && _comp(_queue[1], _queue[2])) ++next_idx;
+        }
+
+        return next_idx;
+    }
+
+    void updateTop() {

Review comment:
       do not use Name Rule like JAVA

##########
File path: be/src/exec/topn_node.h
##########
@@ -101,12 +102,8 @@ class TopNNode : public ExecNode {
     // Number of rows skipped. Used for adhering to _offset.
     int64_t _num_rows_skipped;
 
-    // The priority queue will never have more elements in it than the LIMIT.  The stl
-    // priority queue doesn't support a max size, so to get that functionality, the order
-    // of the queue is the opposite of what the ORDER BY clause specifies, such that the top
-    // of the queue is the last sorted element.
-    boost::scoped_ptr<std::priority_queue<Tuple*, std::vector<Tuple*>, TupleRowComparator>>
-            _priority_queue;
+    // The priority queue will never have more elements in it than the LIMIT.      
+    std::unique_ptr<SortingHeap<Tuple*, std::vector<Tuple*>, TupleRowComparator>> _priority_queue;

Review comment:
       Do not use the older name of `_priority_queue`, replace with `SortingHeap`

##########
File path: be/src/exec/topn_node.cpp
##########
@@ -180,17 +177,18 @@ Status TopNNode::close(RuntimeState* state) {
 
 // Insert if either not at the limit or it's a new TopN tuple_row
 void TopNNode::insert_tuple_row(TupleRow* input_row) {
-    Tuple* insert_tuple = NULL;
-
     if (_priority_queue->size() < _offset + _limit) {
+        Tuple* insert_tuple = nullptr;
         insert_tuple = reinterpret_cast<Tuple*>(
                 _tuple_pool->allocate(_materialized_tuple_desc->byte_size()));
         insert_tuple->materialize_exprs<false>(input_row, *_materialized_tuple_desc,
                                                _sort_exec_exprs.sort_tuple_slot_expr_ctxs(),
                                                _tuple_pool.get(), NULL, NULL);
+        _priority_queue->push(insert_tuple);
     } else {
+        Tuple* insert_tuple = nullptr;
         DCHECK(!_priority_queue->empty());
-        Tuple* top_tuple = _priority_queue->top();
+        Tuple* top_tuple = _priority_queue->current();

Review comment:
       The same to up




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org