You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by yi...@apache.org on 2022/06/30 03:27:28 UTC
[doris] branch master updated: [Fix] avoid core dump cause by malformed bitmap type data (#10458)
This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new d259770b86 [Fix] avoid core dump cause by malformed bitmap type data (#10458)
d259770b86 is described below
commit d259770b86955a4cb34102d4cc95ae1a731326af
Author: Zhengguo Yang <ya...@gmail.com>
AuthorDate: Thu Jun 30 11:27:22 2022 +0800
[Fix] avoid core dump cause by malformed bitmap type data (#10458)
---
be/src/olap/rowset/segment_v2/binary_dict_page.cpp | 8 ++++----
be/src/olap/rowset/segment_v2/binary_dict_page.h | 4 ++--
be/src/olap/rowset/segment_v2/binary_plain_page.h | 20 +++++++++++++++++++-
be/src/olap/rowset/segment_v2/column_reader.cpp | 6 ++++--
be/src/olap/rowset/segment_v2/encoding_info.cpp | 4 ++--
.../olap/rowset/segment_v2/indexed_column_reader.cpp | 4 +++-
.../olap/rowset/segment_v2/indexed_column_writer.cpp | 6 ++++--
be/src/olap/rowset/segment_v2/options.h | 8 +++++---
be/src/olap/rowset/segment_v2/parsed_page.h | 4 ++--
be/src/tools/meta_tool.cpp | 1 -
be/src/util/bitmap_value.h | 14 +++++++++++++-
.../olap/rowset/segment_v2/binary_dict_page_test.cpp | 10 ++++++----
.../rowset/segment_v2/binary_plain_page_test.cpp | 3 ++-
13 files changed, 66 insertions(+), 26 deletions(-)
diff --git a/be/src/olap/rowset/segment_v2/binary_dict_page.cpp b/be/src/olap/rowset/segment_v2/binary_dict_page.cpp
index b53ff52ade..ea9fe66dfc 100644
--- a/be/src/olap/rowset/segment_v2/binary_dict_page.cpp
+++ b/be/src/olap/rowset/segment_v2/binary_dict_page.cpp
@@ -45,7 +45,7 @@ BinaryDictPageBuilder::BinaryDictPageBuilder(const PageBuilderOptions& options)
_data_page_builder.reset(new BitshufflePageBuilder<OLAP_FIELD_TYPE_INT>(options));
PageBuilderOptions dict_builder_options;
dict_builder_options.data_page_size = _options.dict_page_size;
- _dict_builder.reset(new BinaryPlainPageBuilder(dict_builder_options));
+ _dict_builder.reset(new BinaryPlainPageBuilder<OLAP_FIELD_TYPE_VARCHAR>(dict_builder_options));
reset();
}
@@ -135,7 +135,7 @@ void BinaryDictPageBuilder::reset() {
_buffer.resize(BINARY_DICT_PAGE_HEADER_SIZE);
if (_encoding_type == DICT_ENCODING && _dict_builder->is_page_full()) {
- _data_page_builder.reset(new BinaryPlainPageBuilder(_options));
+ _data_page_builder.reset(new BinaryPlainPageBuilder<OLAP_FIELD_TYPE_VARCHAR>(_options));
_encoding_type = PLAIN_ENCODING;
} else {
_data_page_builder->reset();
@@ -206,7 +206,7 @@ Status BinaryDictPageDecoder::init() {
_bit_shuffle_ptr = new BitShufflePageDecoder<OLAP_FIELD_TYPE_INT>(_data, _options));
} else if (_encoding_type == PLAIN_ENCODING) {
DCHECK_EQ(_encoding_type, PLAIN_ENCODING);
- _data_page_decoder.reset(new BinaryPlainPageDecoder(_data, _options));
+ _data_page_decoder.reset(new BinaryPlainPageDecoder<OLAP_FIELD_TYPE_INT>(_data, _options));
} else {
LOG(WARNING) << "invalid encoding type:" << _encoding_type;
return Status::Corruption(strings::Substitute("invalid encoding type:$0", _encoding_type));
@@ -228,7 +228,7 @@ bool BinaryDictPageDecoder::is_dict_encoding() const {
}
void BinaryDictPageDecoder::set_dict_decoder(PageDecoder* dict_decoder, StringRef* dict_word_info) {
- _dict_decoder = (BinaryPlainPageDecoder*)dict_decoder;
+ _dict_decoder = (BinaryPlainPageDecoder<OLAP_FIELD_TYPE_VARCHAR>*)dict_decoder;
_dict_word_info = dict_word_info;
};
diff --git a/be/src/olap/rowset/segment_v2/binary_dict_page.h b/be/src/olap/rowset/segment_v2/binary_dict_page.h
index e1dcdba49b..d57f7cf974 100644
--- a/be/src/olap/rowset/segment_v2/binary_dict_page.h
+++ b/be/src/olap/rowset/segment_v2/binary_dict_page.h
@@ -78,7 +78,7 @@ private:
std::unique_ptr<PageBuilder> _data_page_builder;
- std::unique_ptr<BinaryPlainPageBuilder> _dict_builder;
+ std::unique_ptr<BinaryPlainPageBuilder<OLAP_FIELD_TYPE_VARCHAR>> _dict_builder;
EncodingTypePB _encoding_type;
struct HashOfSlice {
@@ -122,7 +122,7 @@ private:
Slice _data;
PageDecoderOptions _options;
std::unique_ptr<PageDecoder> _data_page_decoder;
- BinaryPlainPageDecoder* _dict_decoder = nullptr;
+ BinaryPlainPageDecoder<OLAP_FIELD_TYPE_VARCHAR>* _dict_decoder = nullptr;
BitShufflePageDecoder<OLAP_FIELD_TYPE_INT>* _bit_shuffle_ptr = nullptr;
bool _parsed;
EncodingTypePB _encoding_type;
diff --git a/be/src/olap/rowset/segment_v2/binary_plain_page.h b/be/src/olap/rowset/segment_v2/binary_plain_page.h
index cac9da3709..1919efcba8 100644
--- a/be/src/olap/rowset/segment_v2/binary_plain_page.h
+++ b/be/src/olap/rowset/segment_v2/binary_plain_page.h
@@ -44,6 +44,7 @@
namespace doris {
namespace segment_v2 {
+template <FieldType Type>
class BinaryPlainPageBuilder : public PageBuilder {
public:
BinaryPlainPageBuilder(const PageBuilderOptions& options)
@@ -64,6 +65,11 @@ public:
// If the page is full, should stop adding more items.
while (!is_page_full() && i < *count) {
auto src = reinterpret_cast<const Slice*>(vals);
+ if constexpr (Type == OLAP_FIELD_TYPE_OBJECT) {
+ if (_options.need_check_bitmap) {
+ RETURN_IF_ERROR(BitmapTypeCode::validate(*(src->data)));
+ }
+ }
size_t offset = _buffer.size();
_offsets.push_back(offset);
_buffer.append(src->data, src->size);
@@ -154,6 +160,7 @@ private:
faststring _last_value;
};
+template <FieldType Type>
class BinaryPlainPageDecoder : public PageDecoder {
public:
BinaryPlainPageDecoder(Slice data) : BinaryPlainPageDecoder(data, PageDecoderOptions()) {}
@@ -204,6 +211,11 @@ public:
size_t mem_len[max_fetch];
for (size_t i = 0; i < max_fetch; i++, out++, _cur_idx++) {
*out = string_at_index(_cur_idx);
+ if constexpr (Type == OLAP_FIELD_TYPE_OBJECT) {
+ if (_options.need_check_bitmap) {
+ RETURN_IF_ERROR(BitmapTypeCode::validate(*(out->data)));
+ }
+ }
mem_len[i] = out->size;
}
@@ -246,6 +258,11 @@ public:
uint32_t len = offset(_cur_idx + 1) - start_offset;
len_array[i] = len;
start_offset_array[i] = start_offset;
+ if constexpr (Type == OLAP_FIELD_TYPE_OBJECT) {
+ if (_options.need_check_bitmap) {
+ RETURN_IF_ERROR(BitmapTypeCode::validate(*(_data.data + start_offset)));
+ }
+ }
}
dst->insert_many_binary_data(_data.mutable_data(), len_array, start_offset_array,
max_fetch);
@@ -271,8 +288,9 @@ public:
}
void get_dict_word_info(StringRef* dict_word_info) {
- if (_num_elems <= 0) [[unlikely]]
+ if (UNLIKELY(_num_elems <= 0)) {
return;
+ }
char* data_begin = (char*)&_data[0];
char* offset_ptr = (char*)&_data[_offsets_pos];
diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp b/be/src/olap/rowset/segment_v2/column_reader.cpp
index 7243736281..a7d8c99899 100644
--- a/be/src/olap/rowset/segment_v2/column_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/column_reader.cpp
@@ -747,10 +747,12 @@ Status FileColumnIterator::_read_data_page(const OrdinalPageIndexIterator& iter)
_compress_codec.get()));
// ignore dict_footer.dict_page_footer().encoding() due to only
// PLAIN_ENCODING is supported for dict page right now
- _dict_decoder = std::make_unique<BinaryPlainPageDecoder>(dict_data);
+ _dict_decoder = std::make_unique<BinaryPlainPageDecoder<OLAP_FIELD_TYPE_VARCHAR>>(
+ dict_data);
RETURN_IF_ERROR(_dict_decoder->init());
- auto* pd_decoder = (BinaryPlainPageDecoder*)_dict_decoder.get();
+ auto* pd_decoder =
+ (BinaryPlainPageDecoder<OLAP_FIELD_TYPE_VARCHAR>*)_dict_decoder.get();
_dict_word_info.reset(new StringRef[pd_decoder->_num_elems]);
pd_decoder->get_dict_word_info(_dict_word_info.get());
}
diff --git a/be/src/olap/rowset/segment_v2/encoding_info.cpp b/be/src/olap/rowset/segment_v2/encoding_info.cpp
index ac3a1a624f..4f9e2d5d14 100644
--- a/be/src/olap/rowset/segment_v2/encoding_info.cpp
+++ b/be/src/olap/rowset/segment_v2/encoding_info.cpp
@@ -58,12 +58,12 @@ struct TypeEncodingTraits<type, PLAIN_ENCODING, CppType> {
template <FieldType type>
struct TypeEncodingTraits<type, PLAIN_ENCODING, Slice> {
static Status create_page_builder(const PageBuilderOptions& opts, PageBuilder** builder) {
- *builder = new BinaryPlainPageBuilder(opts);
+ *builder = new BinaryPlainPageBuilder<type>(opts);
return Status::OK();
}
static Status create_page_decoder(const Slice& data, const PageDecoderOptions& opts,
PageDecoder** decoder) {
- *decoder = new BinaryPlainPageDecoder(data, opts);
+ *decoder = new BinaryPlainPageDecoder<type>(data, opts);
return Status::OK();
}
};
diff --git a/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp b/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp
index 6a0b2ca81e..682f620ea8 100644
--- a/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp
@@ -110,8 +110,10 @@ Status IndexedColumnIterator::_read_data_page(const PagePointer& pp) {
_compress_codec.get()));
// parse data page
// note that page_index is not used in IndexedColumnIterator, so we pass 0
+ PageDecoderOptions opts;
+ opts.need_check_bitmap = false;
return ParsedPage::create(std::move(handle), body, footer.data_page_footer(),
- _reader->encoding_info(), pp, 0, &_data_page);
+ _reader->encoding_info(), pp, 0, &_data_page, opts);
}
Status IndexedColumnIterator::seek_to_ordinal(ordinal_t idx) {
diff --git a/be/src/olap/rowset/segment_v2/indexed_column_writer.cpp b/be/src/olap/rowset/segment_v2/indexed_column_writer.cpp
index 4c7259c90c..979481adbc 100644
--- a/be/src/olap/rowset/segment_v2/indexed_column_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/indexed_column_writer.cpp
@@ -59,8 +59,10 @@ Status IndexedColumnWriter::init() {
// because the default encoding of a data type can be changed in the future
DCHECK_NE(_options.encoding, DEFAULT_ENCODING);
- PageBuilder* data_page_builder;
- RETURN_IF_ERROR(encoding_info->create_page_builder(PageBuilderOptions(), &data_page_builder));
+ PageBuilder* data_page_builder = nullptr;
+ PageBuilderOptions builder_option;
+ builder_option.need_check_bitmap = false;
+ RETURN_IF_ERROR(encoding_info->create_page_builder(builder_option, &data_page_builder));
_data_page_builder.reset(data_page_builder);
if (_options.write_ordinal_index) {
diff --git a/be/src/olap/rowset/segment_v2/options.h b/be/src/olap/rowset/segment_v2/options.h
index 0cb2dafa39..9405eb19cf 100644
--- a/be/src/olap/rowset/segment_v2/options.h
+++ b/be/src/olap/rowset/segment_v2/options.h
@@ -22,17 +22,19 @@
namespace doris {
namespace segment_v2 {
-class BinaryPlainPageDecoder;
-
static constexpr size_t DEFAULT_PAGE_SIZE = 1024 * 1024; // default size: 1M
struct PageBuilderOptions {
size_t data_page_size = DEFAULT_PAGE_SIZE;
size_t dict_page_size = DEFAULT_PAGE_SIZE;
+
+ bool need_check_bitmap = true;
};
-struct PageDecoderOptions {};
+struct PageDecoderOptions {
+ bool need_check_bitmap = true;
+};
} // namespace segment_v2
} // namespace doris
diff --git a/be/src/olap/rowset/segment_v2/parsed_page.h b/be/src/olap/rowset/segment_v2/parsed_page.h
index c70ebd62a6..232c347c38 100644
--- a/be/src/olap/rowset/segment_v2/parsed_page.h
+++ b/be/src/olap/rowset/segment_v2/parsed_page.h
@@ -38,7 +38,8 @@ namespace segment_v2 {
struct ParsedPage {
static Status create(PageHandle handle, const Slice& body, const DataPageFooterPB& footer,
const EncodingInfo* encoding, const PagePointer& page_pointer,
- uint32_t page_index, ParsedPage* result) {
+ uint32_t page_index, ParsedPage* result,
+ PageDecoderOptions opts = PageDecoderOptions()) {
result->~ParsedPage();
ParsedPage* page = new (result)(ParsedPage);
page->page_handle = std::move(handle);
@@ -53,7 +54,6 @@ struct ParsedPage {
}
Slice data_slice(body.data, body.size - null_size);
- PageDecoderOptions opts;
RETURN_IF_ERROR(encoding->create_page_decoder(data_slice, opts, &page->data_decoder));
RETURN_IF_ERROR(page->data_decoder->init());
diff --git a/be/src/tools/meta_tool.cpp b/be/src/tools/meta_tool.cpp
index 60b56b35cb..f4e5e6087c 100644
--- a/be/src/tools/meta_tool.cpp
+++ b/be/src/tools/meta_tool.cpp
@@ -56,7 +56,6 @@ using doris::RandomAccessFile;
using strings::Substitute;
using doris::segment_v2::SegmentFooterPB;
using doris::segment_v2::ColumnReader;
-using doris::segment_v2::BinaryPlainPageDecoder;
using doris::segment_v2::PageHandle;
using doris::segment_v2::PagePointer;
using doris::segment_v2::ColumnReaderOptions;
diff --git a/be/src/util/bitmap_value.h b/be/src/util/bitmap_value.h
index 4b1445ba59..7eaeb11e79 100644
--- a/be/src/util/bitmap_value.h
+++ b/be/src/util/bitmap_value.h
@@ -71,6 +71,16 @@ struct BitmapTypeCode {
// added in 0.12
BITMAP64 = 4
};
+ Status static inline validate(int bitmap_type) {
+ if (UNLIKELY(bitmap_type < type::EMPTY || bitmap_type > type::BITMAP64)) {
+ std::string err_msg =
+ fmt::format("BitmapTypeCode invalid, should between: {} and {} actrual is {}",
+ BitmapTypeCode::EMPTY, BitmapTypeCode::BITMAP64, bitmap_type);
+ LOG(ERROR) << err_msg;
+ return Status::IOError(err_msg);
+ }
+ return Status::OK();
+ }
};
namespace detail {
@@ -1536,7 +1546,6 @@ public:
// Deserialize a bitmap value from `src`.
// Return false if `src` begins with unknown type code, true otherwise.
bool deserialize(const char* src) {
- DCHECK(*src >= BitmapTypeCode::EMPTY && *src <= BitmapTypeCode::BITMAP64);
switch (*src) {
case BitmapTypeCode::EMPTY:
_type = EMPTY;
@@ -1555,6 +1564,9 @@ public:
_bitmap = detail::Roaring64Map::read(src);
break;
default:
+ LOG(ERROR) << "BitmapTypeCode invalid, should between: " << BitmapTypeCode::EMPTY
+ << " and " << BitmapTypeCode::BITMAP64 << " actrual is "
+ << static_cast<int>(*src);
return false;
}
return true;
diff --git a/be/test/olap/rowset/segment_v2/binary_dict_page_test.cpp b/be/test/olap/rowset/segment_v2/binary_dict_page_test.cpp
index 8c59da6491..bb3493efd6 100644
--- a/be/test/olap/rowset/segment_v2/binary_dict_page_test.cpp
+++ b/be/test/olap/rowset/segment_v2/binary_dict_page_test.cpp
@@ -68,8 +68,9 @@ public:
Status status = page_builder.get_dictionary_page(&dict_slice);
EXPECT_TRUE(status.ok());
PageDecoderOptions dict_decoder_options;
- std::unique_ptr<BinaryPlainPageDecoder> dict_page_decoder(
- new BinaryPlainPageDecoder(dict_slice.slice(), dict_decoder_options));
+ std::unique_ptr<BinaryPlainPageDecoder<OLAP_FIELD_TYPE_VARCHAR>> dict_page_decoder(
+ new BinaryPlainPageDecoder<OLAP_FIELD_TYPE_VARCHAR>(dict_slice.slice(),
+ dict_decoder_options));
status = dict_page_decoder->init();
EXPECT_TRUE(status.ok());
// because every slice is unique
@@ -175,8 +176,9 @@ public:
int slice_index = random() % results.size();
//int slice_index = 1;
PageDecoderOptions dict_decoder_options;
- std::unique_ptr<BinaryPlainPageDecoder> dict_page_decoder(
- new BinaryPlainPageDecoder(dict_slice.slice(), dict_decoder_options));
+ std::unique_ptr<BinaryPlainPageDecoder<OLAP_FIELD_TYPE_VARCHAR>> dict_page_decoder(
+ new BinaryPlainPageDecoder<OLAP_FIELD_TYPE_VARCHAR>(dict_slice.slice(),
+ dict_decoder_options));
status = dict_page_decoder->init();
EXPECT_TRUE(status.ok());
diff --git a/be/test/olap/rowset/segment_v2/binary_plain_page_test.cpp b/be/test/olap/rowset/segment_v2/binary_plain_page_test.cpp
index 0e0d34606d..1da1db55e1 100644
--- a/be/test/olap/rowset/segment_v2/binary_plain_page_test.cpp
+++ b/be/test/olap/rowset/segment_v2/binary_plain_page_test.cpp
@@ -108,7 +108,8 @@ public:
};
TEST_F(BinaryPlainPageTest, TestBinaryPlainPageBuilderSeekByValueSmallPage) {
- TestBinarySeekByValueSmallPage<BinaryPlainPageBuilder, BinaryPlainPageDecoder>();
+ TestBinarySeekByValueSmallPage<BinaryPlainPageBuilder<OLAP_FIELD_TYPE_VARCHAR>,
+ BinaryPlainPageDecoder<OLAP_FIELD_TYPE_VARCHAR>>();
}
} // namespace segment_v2
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org