You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by GitBox <gi...@apache.org> on 2019/12/25 03:27:19 UTC

[GitHub] [incubator-doris] gaodayue commented on a change in pull request #2547: [Segment V2] Support lazy materialization read

gaodayue commented on a change in pull request #2547: [Segment V2] Support lazy materialization read
URL: https://github.com/apache/incubator-doris/pull/2547#discussion_r361248501
 
 

 ##########
 File path: be/src/olap/rowset/segment_v2/segment_iterator.cpp
 ##########
 @@ -34,13 +34,71 @@ using strings::Substitute;
 namespace doris {
 namespace segment_v2 {
 
+// A fast range iterator for roaring bitmap. Output ranges use closed-open form, like [from, to).
+// Example:
+//   input bitmap:  [0 1 4 5 6 7 10 15 16 17 18 19]
+//   output ranges: [0,2), [4,8), [10,11), [15,20) (when max_range_size=10)
+//   output ranges: [0,2), [4,8), [10,11), [15,18), [18,20) (when max_range_size=3)
+class SegmentIterator::BitmapRangeIterator {
+public:
+    explicit BitmapRangeIterator(const Roaring& bitmap) {
+        roaring_init_iterator(&bitmap.roaring, &_iter);
+        _last_val = 0;
+        _buf = new uint32_t[256];
+        _read_next_batch();
+    }
+
+    ~BitmapRangeIterator() {
+        delete[] _buf;
+    }
+
+    bool has_more_range() const { return !_eof; }
+
+    // read next range into [*from, *to) whose size <= max_range_size.
+    // return false when there is no more range.
+    bool next_range(uint32_t max_range_size, uint32_t* from, uint32_t* to) {
+        if (_eof) {
+            return false;
+        }
+        *from = _buf[_buf_pos];
+        uint32_t range_size = 0;
+        do {
+            _last_val = _buf[_buf_pos];
+            _buf_pos++;
+            range_size++;
+            if (_buf_pos == _buf_size) { // read next batch
+                _read_next_batch();
+            }
+        } while (range_size < max_range_size && !_eof && _buf[_buf_pos] == _last_val + 1);
+        *to = *from + range_size;
+        return true;
+    }
+
+private:
+    void _read_next_batch() {
+        uint32_t n = roaring_read_uint32_iterator(&_iter, _buf, kBatchSize);
+        _buf_pos = 0;
+        _buf_size = n;
+        _eof = n == 0;
+    }
+
+    static const uint32_t kBatchSize = 256;
+    roaring_uint32_iterator_t _iter;
+    uint32_t _last_val;
+    uint32_t* _buf = nullptr;
+    uint32_t _buf_pos;
+    uint32_t _buf_size;
+    bool _eof;
 
 Review comment:
   ok

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org