You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by GitBox <gi...@apache.org> on 2022/06/08 09:23:54 UTC

[GitHub] [incubator-doris] Gabriel39 commented on a diff in pull request #10016: [enhancement]Cache parsed page data for bitshuffle encoding

Gabriel39 commented on code in PR #10016:
URL: https://github.com/apache/incubator-doris/pull/10016#discussion_r892117835


##########
be/src/olap/rowset/segment_v2/column_reader.h:
##########
@@ -298,10 +298,10 @@ class FileColumnIterator final : public ColumnIterator {
     ParsedPage _page;
 
     // keep dict page decoder
-    std::unique_ptr<PageDecoder> _dict_decoder;
+    std::shared_ptr<BinaryPlainPageDecoder> _dict_decoder;
 
     // keep dict page handle to avoid released
-    PageHandle _dict_page_handle;
+    std::shared_ptr<PageHandle> _dict_page_handle;

Review Comment:
   Could this variable be deleted safely? I notice that this variable will be set to a new value each time so I think it's unnecessary to use as a member variable.



##########
be/src/olap/rowset/segment_v2/column_reader.cpp:
##########
@@ -679,22 +702,29 @@ Status FileColumnIterator::_read_data_page(const OrdinalPageIndexIterator& iter)
                 Slice dict_data;
                 PageFooterPB dict_footer;
                 _opts.type = INDEX_PAGE;
+                _dict_page_handle = std::make_shared<PageHandle>();
                 RETURN_IF_ERROR(_reader->read_page(_opts, _reader->get_dict_page_pointer(),
-                                                   &_dict_page_handle, &dict_data, &dict_footer,
-                                                   _compress_codec.get()));
+                                                   _dict_page_handle.get(), &dict_data,
+                                                   &dict_footer, _compress_codec.get()));
                 // ignore dict_footer.dict_page_footer().encoding() due to only
                 // PLAIN_ENCODING is supported for dict page right now
-                _dict_decoder = std::make_unique<BinaryPlainPageDecoder>(dict_data);
+                _dict_decoder = std::make_shared<BinaryPlainPageDecoder>(dict_data);
                 RETURN_IF_ERROR(_dict_decoder->init());
 
-                auto* pd_decoder = (BinaryPlainPageDecoder*)_dict_decoder.get();
+                auto* pd_decoder = _dict_decoder.get();
                 _dict_word_info.reset(new StringRef[pd_decoder->_num_elems]);
                 pd_decoder->get_dict_word_info(_dict_word_info.get());
+                _page.dict_page_handle = _dict_page_handle;
             }
 
-            dict_page_decoder->set_dict_decoder(_dict_decoder.get(), _dict_word_info.get());
+            dict_page_decoder->set_dict_decoder(_dict_decoder, _dict_word_info);
         }
     }
+
+    if (_need_cache_parsed_page) {
+        ParsedPage* cache_page = new ParsedPage(_page);

Review Comment:
   Why we need to clone a cache page? Looks like we have already cached this page and don't need to duplicate another one



##########
be/src/olap/rowset/segment_v2/column_reader.cpp:
##########
@@ -468,6 +468,12 @@ FileColumnIterator::FileColumnIterator(ColumnReader* reader) : _reader(reader) {
 Status FileColumnIterator::init(const ColumnIteratorOptions& opts) {
     _opts = opts;
     RETURN_IF_ERROR(get_block_compression_codec(_reader->get_compression(), _compress_codec));
+    auto encoding = _reader->encoding_info()->encoding();
+    _need_cache_parsed_page =

Review Comment:
   should we add a new config to enable or disable this feature here? If budget for page cache is poor, I'm worried that it will lead to serious performance fallback as we cache a totally parsed page.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org