You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by zh...@apache.org on 2019/09/27 17:10:36 UTC
[incubator-doris] branch master updated: Add frame_of_reference
page (#1818)
This is an automated email from the ASF dual-hosted git repository.
zhaoc pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/master by this push:
new 0c22d8f Add frame_of_reference page (#1818)
0c22d8f is described below
commit 0c22d8fa08e97066816739b1327b2d8dd8fe0760
Author: kangkaisen <ka...@apache.org>
AuthorDate: Sat Sep 28 01:10:29 2019 +0800
Add frame_of_reference page (#1818)
---
.../rowset/segment_v2/frame_of_reference_page.h | 162 +++++++++
be/src/util/CMakeLists.txt | 1 +
be/src/util/bit_stream_utils.h | 3 +-
be/src/util/frame_of_reference_coding.cpp | 366 +++++++++++++++++++++
be/src/util/frame_of_reference_coding.h | 149 +++++++++
be/test/olap/CMakeLists.txt | 1 +
.../segment_v2/frame_of_reference_page_test.cpp | 180 ++++++++++
be/test/util/CMakeLists.txt | 2 +
be/test/util/bit_stream_utils_test.cpp | 231 +++++++++++++
be/test/util/frame_of_reference_coding_test.cpp | 166 ++++++++++
be/test/util/rle_encoding_test.cpp | 138 +-------
run-ut.sh | 3 +
12 files changed, 1263 insertions(+), 139 deletions(-)
diff --git a/be/src/olap/rowset/segment_v2/frame_of_reference_page.h b/be/src/olap/rowset/segment_v2/frame_of_reference_page.h
new file mode 100644
index 0000000..6b33ccf
--- /dev/null
+++ b/be/src/olap/rowset/segment_v2/frame_of_reference_page.h
@@ -0,0 +1,162 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "olap/rowset/segment_v2/page_builder.h" // for PageBuilder
+#include "olap/rowset/segment_v2/page_decoder.h" // for PageDecoder
+#include "olap/rowset/segment_v2/options.h" // for PageBuilderOptions/PageDecoderOptions
+#include "olap/rowset/segment_v2/common.h" // for rowid_t
+#include "util/frame_of_reference_coding.h"
+
+namespace doris {
+namespace segment_v2 {
+
+// Encode page use frame-of-reference coding
+template<FieldType Type>
+class FrameOfReferencePageBuilder : public PageBuilder {
+public:
+ explicit FrameOfReferencePageBuilder(const PageBuilderOptions& options) :
+ _options(options),
+ _count(0),
+ _finished(false) {
+ _encoder.reset(new ForEncoder<CppType>(&_buf));
+ }
+
+ bool is_page_full() override {
+ return _encoder->len() >= _options.data_page_size;
+ }
+
+ Status add(const uint8_t* vals, size_t* count) override {
+ DCHECK(!_finished);
+ auto new_vals = reinterpret_cast<const CppType*>(vals);
+ _encoder->put_batch(new_vals, *count);
+ _count += *count;
+ return Status::OK();
+ }
+
+ Slice finish() override {
+ _finished = true;
+ _encoder->flush();
+ return Slice(_buf.data(), _buf.size());
+ }
+
+ void reset() override {
+ _count = 0;
+ _finished = false;
+ _encoder->clear();
+ }
+
+ size_t count() const override {
+ return _count;
+ }
+
+ uint64_t size() const override {
+ return _buf.size();
+ }
+
+ // this api will release the memory ownership of encoded data
+ // Note:
+ // release() should be called after finish
+ // reset() should be called after this function before reuse the builder
+ void release() override {
+ uint8_t* ret = _buf.release();
+ (void)ret;
+ }
+
+private:
+ typedef typename TypeTraits<Type>::CppType CppType;
+ PageBuilderOptions _options;
+ size_t _count;
+ bool _finished;
+ std::unique_ptr<ForEncoder<CppType>> _encoder;
+ faststring _buf;
+};
+
+template<FieldType Type>
+class FrameOfReferencePageDecoder : public PageDecoder {
+public:
+ FrameOfReferencePageDecoder(Slice slice, const PageDecoderOptions& options) :
+ _parsed(false),
+ _data(slice),
+ _num_elements(0),
+ _cur_index(0){
+ _decoder.reset(new ForDecoder<CppType>((uint8_t*)_data.data, _data.size));
+ }
+
+ Status init() override {
+ CHECK(!_parsed);
+ bool result = _decoder->init();
+ if (result) {
+ _num_elements = _decoder->count();
+ _parsed = true;
+ return Status::OK();
+ } else {
+ return Status::Corruption("The frame of reference page metadata maybe broken");
+ }
+ }
+
+ Status seek_to_position_in_page(size_t pos) override {
+ DCHECK(_parsed) << "Must call init() firstly";
+ DCHECK_LE(pos, _num_elements) << "Tried to seek to " << pos << " which is > number of elements ("
+ << _num_elements << ") in the block!";
+ // If the block is empty (e.g. the column is filled with nulls), there is no data to seek.
+ if (PREDICT_FALSE(_num_elements == 0)) {
+ return Status::OK();
+ }
+
+ int32_t skip_num = pos - _cur_index;
+ _decoder->skip(skip_num);
+ _cur_index = pos;
+ return Status::OK();
+ }
+
+ Status next_batch(size_t* n, ColumnBlockView* dst) override {
+ DCHECK(_parsed) << "Must call init() firstly";
+ if (PREDICT_FALSE(*n == 0 || _cur_index >= _num_elements)) {
+ *n = 0;
+ return Status::OK();
+ }
+
+ size_t to_fetch = std::min(*n, static_cast<size_t>(_num_elements - _cur_index));
+ uint8_t* data_ptr = dst->data();
+ _decoder->get_batch(reinterpret_cast<CppType*>(data_ptr), to_fetch);
+ _cur_index += to_fetch;
+ *n = to_fetch;
+ return Status::OK();
+ }
+
+ size_t count() const override {
+ return _num_elements;
+ }
+
+ size_t current_index() const override {
+ return _cur_index;
+ }
+
+private:
+ typedef typename TypeTraits<Type>::CppType CppType;
+
+ bool _parsed;
+ Slice _data;
+ uint32_t _num_elements;
+ size_t _cur_index;
+ std::unique_ptr<ForDecoder<CppType>> _decoder;
+};
+
+} // namespace segment_v2
+} // namespace doris
\ No newline at end of file
diff --git a/be/src/util/CMakeLists.txt b/be/src/util/CMakeLists.txt
index c282b6e..66e8a92 100644
--- a/be/src/util/CMakeLists.txt
+++ b/be/src/util/CMakeLists.txt
@@ -81,6 +81,7 @@ set(UTIL_FILES
thrift_rpc_helper.cpp
faststring.cc
slice.cpp
+ frame_of_reference_coding.cpp
)
if (WITH_MYSQL)
diff --git a/be/src/util/bit_stream_utils.h b/be/src/util/bit_stream_utils.h
index 2b68c84..220c8cb 100644
--- a/be/src/util/bit_stream_utils.h
+++ b/be/src/util/bit_stream_utils.h
@@ -49,8 +49,7 @@ class BitWriter {
int bytes_written() const { return byte_offset_ + BitUtil::Ceil(bit_offset_, 8); }
// Writes a value to buffered_values_, flushing to buffer_ if necessary. This is bit
- // packed. num_bits must be <= 32. If 'v' is larger than 'num_bits' bits, the higher
- // bits are ignored.
+ // packed.
void PutValue(uint64_t v, int num_bits);
// Writes v to the next aligned byte using num_bits. If T is larger than num_bits, the
diff --git a/be/src/util/frame_of_reference_coding.cpp b/be/src/util/frame_of_reference_coding.cpp
new file mode 100644
index 0000000..a5a8436
--- /dev/null
+++ b/be/src/util/frame_of_reference_coding.cpp
@@ -0,0 +1,366 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "util/frame_of_reference_coding.h"
+
+#include <algorithm>
+#include <cstring>
+
+#include "util/bit_util.h"
+#include "util/coding.h"
+
+namespace doris {
+
+static inline uint8_t bits(const uint64_t v) {
+ return v == 0 ? 0 : 64 - __builtin_clzll(v);
+}
+
+template<typename T>
+const T* ForEncoder<T>::copy_value(const T *p_data, size_t count) {
+ memcpy(&_buffered_values[_buffered_values_num], p_data, count * sizeof(T));
+ _buffered_values_num += count;
+ p_data += count;
+ return p_data;
+}
+
+template<typename T>
+void ForEncoder<T>::put_batch(const T *in_data, size_t count) {
+ if (_buffered_values_num + count < FRAME_VALUE_NUM) {
+ copy_value(in_data, count);
+ _values_num += count;
+ return;
+ }
+
+ // 1. padding one frame
+ size_t padding_num = FRAME_VALUE_NUM - _buffered_values_num;
+ in_data = copy_value(in_data, padding_num);
+ bit_packing_one_frame_value(_buffered_values);
+
+ // 2. process frame by frame
+ size_t frame_size = (count - padding_num) / FRAME_VALUE_NUM;
+ for (size_t i = 0; i < frame_size; i ++) {
+ // directly encode value to the bit_writer, don't buffer the value
+ _buffered_values_num = FRAME_VALUE_NUM;
+ bit_packing_one_frame_value(in_data);
+ in_data += FRAME_VALUE_NUM;
+ }
+
+ // 3. process remaining value
+ size_t remaining_num = (count - padding_num) % FRAME_VALUE_NUM;
+ if (remaining_num > 0) {
+ copy_value(in_data, remaining_num);
+ }
+
+ _values_num += count;
+}
+
+// todo(kks): improve this method by SIMD instructions
+
+// Use as few bit as possible to store a piece of integer data.
+// param[in] input: the integer list need to pack
+// param[in] in_num: the number integer need to pack
+// param[in] bit_width: how many bit we use to store each integer data
+// param[out] out: the packed result
+
+// For example:
+// The input is int32 list: 1, 2, 4, 8 and bit_width is 4
+// The output will be: 0001 0010 0100 1000
+template<typename T>
+void ForEncoder<T>::bit_pack(T *input, uint8_t in_num, int bit_width, uint8_t *output) {
+ if (in_num == 0 || bit_width == 0) {
+ return;
+ }
+
+ T in_mask = 0;
+ int bit_index = 0;
+ *output = 0;
+ for (int i = 0; i < in_num; i++) {
+ in_mask = 1 << (bit_width - 1);
+ for (int k = 0; k < bit_width; k++) {
+ if (bit_index > 7) {
+ bit_index = 0;
+ output++;
+ *output = 0;
+ }
+ *output |= (((input[i] & in_mask) >> (bit_width - k - 1)) << (7 - bit_index));
+ in_mask >>= 1;
+ bit_index++;
+ }
+ }
+}
+
+template<typename T>
+void ForEncoder<T>::bit_packing_one_frame_value(const T* input) {
+ T min = input[0];
+ T max = input[0];
+ bool is_ascending = true;
+ uint8_t bit_width = 0;
+
+ for (uint8_t i = 1; i < _buffered_values_num; ++i) {
+ if (is_ascending) {
+ if (input[i] < input[i - 1]) {
+ is_ascending = false;
+ } else {
+ bit_width = std::max(bit_width, bits(input[i]- input[i - 1])) ;
+ }
+ }
+
+ if (input[i] < min) {
+ min = input[i];
+ continue;
+ }
+
+ if (input[i] > max) {
+ max = input[i];
+ }
+ }
+
+ if (sizeof(T) == 8) {
+ put_fixed64_le(_buffer, min);
+ } else {
+ put_fixed32_le(_buffer, min);
+ }
+
+ // improve for ascending order input, we could use fewer bit
+ T delta_values[FRAME_VALUE_NUM];
+ u_int8_t order_flag = 0;
+ if (is_ascending) {
+ delta_values[0] = 0;
+ for (uint8_t i = 1; i < _buffered_values_num; ++i) {
+ delta_values[i] = input[i] - input[i - 1];
+ }
+ order_flag = 1;
+ } else {
+ bit_width = bits(static_cast<T>(max - min));
+ for (uint8_t i = 0; i < _buffered_values_num; ++i) {
+ delta_values[i] = input[i] - min;
+ }
+ }
+ // 2 bit order_flag + 6 bit bit_width
+ uint8_t order_flag_and_bit_width = order_flag << 6 | bit_width;
+ _order_flag_and_bit_widths.push_back(order_flag_and_bit_width);
+
+ uint32_t packing_len = BitUtil::Ceil(_buffered_values_num * bit_width, 8);
+
+ _buffer->reserve(_buffer->size() + packing_len);
+ size_t origin_size = _buffer->size();
+ _buffer->resize(_buffer->size() + packing_len);
+ bit_pack(delta_values, _buffered_values_num, bit_width, _buffer->data() + origin_size);
+
+ _buffered_values_num = 0;
+}
+
+template<typename T>
+uint32_t ForEncoder<T>::flush() {
+ if (_buffered_values_num != 0) {
+ bit_packing_one_frame_value(_buffered_values);
+ }
+
+ // write the footer:
+ // 1 order_flags and bit_widths
+ for (auto value: _order_flag_and_bit_widths) {
+ _buffer->append(&value, 1);
+ }
+ // 2 frame_value_num and values_num
+ uint8_t frame_value_num = FRAME_VALUE_NUM;
+ _buffer->append(&frame_value_num, 1);
+ put_fixed32_le(_buffer, _values_num);
+
+ return _buffer->size();
+}
+
+
+template<typename T>
+bool ForDecoder<T>::init() {
+ // When row count is zero, the minimum footer size is 5:
+ // only has ValuesNum(4) + FrameValueNum(1)
+ if (_buffer_len < 5) {
+ return false;
+ }
+
+ _frame_value_num = decode_fixed32_le(_buffer + _buffer_len - 5);
+ _values_num = decode_fixed32_le(_buffer + _buffer_len - 4);
+ _frame_count = _values_num / _frame_value_num + (_values_num % _frame_value_num != 0);
+
+ size_t bit_width_offset = _buffer_len - 5 - _frame_count;
+ if (bit_width_offset < 0) {
+ return false;
+ }
+
+ // read order_flags, bit_widths and compute frame_offsets
+ uint8_t mask = (1 << 6) - 1;
+ u_int32_t frame_start_offset = 0;
+ for (uint32_t i = 0; i < _frame_count; i++ ) {
+ uint32_t order_flag_and_bit_width = decode_fixed8(_buffer + bit_width_offset);
+
+ uint8_t bit_width = order_flag_and_bit_width & mask;
+ uint8_t order_flag = order_flag_and_bit_width >> 6;
+ _bit_widths.push_back(bit_width);
+ _order_flags.push_back(order_flag);
+
+ bit_width_offset += 1;
+
+ _frame_offsets.push_back(frame_start_offset);
+ if (sizeof(T) == 8) {
+ frame_start_offset += bit_width * _frame_value_num / 8 + 8;
+ } else {
+ frame_start_offset += bit_width * _frame_value_num / 8 + 4;
+ }
+ }
+
+ _out_buffer.reserve(_frame_value_num);
+
+ return true;
+}
+
+// todo(kks): improve this method by SIMD instructions
+// The reverse of bit_pack method, get original integer data list from packed bits
+// param[in] input: the packed bits need to unpack
+// param[in] in_num: the integer number in packed bits
+// param[in] bit_width: how many bit we used to store each integer data
+// param[out] output: the original integer data list
+template<typename T>
+void ForDecoder<T>::bit_unpack(const uint8_t *input, uint8_t in_num, int bit_width, T *output) {
+ unsigned char in_mask = 0x80;
+ int bit_index = 0;
+ while (in_num > 0) {
+ *output = 0;
+ for (int i = 0; i < bit_width; i++) {
+ if (bit_index > 7) {
+ input++;
+ bit_index = 0;
+ }
+ *output |= ((T)((*input & (in_mask >> bit_index)) >> (7 - bit_index))) << (bit_width - i - 1);
+ bit_index++;
+ }
+ output++;
+ in_num--;
+ }
+}
+
+template<typename T>
+void ForDecoder<T>::decode_current_frame(T* output) {
+ _current_decoded_frame = _current_index / _frame_value_num;
+ uint8_t frame_value_num = _frame_value_num;
+ // compute last frame value num
+ if (_current_decoded_frame == _frame_count - 1 && _values_num % _frame_value_num != 0) {
+ frame_value_num = _values_num % _frame_value_num;;
+ }
+
+ uint32_t base_offset = _frame_offsets[_current_decoded_frame];
+ T min = 0;
+ uint32_t delta_offset = 0;
+ if (sizeof(T) == 8) {
+ min = decode_fixed64_le(_buffer + base_offset);
+ delta_offset = base_offset + 8;
+ } else {
+ min = decode_fixed32_le(_buffer + base_offset);
+ delta_offset = base_offset + 4;
+ }
+
+ uint8_t bit_width = _bit_widths[_current_decoded_frame];
+ bool is_ascending = _order_flags[_current_decoded_frame];
+
+ std::vector<T> delta_values(_frame_value_num);
+ bit_unpack(_buffer + delta_offset, frame_value_num, bit_width, delta_values.data());
+ if (is_ascending) {
+ T pre_value = min;
+ for (uint8_t i = 0; i < frame_value_num; i ++) {
+ T value = delta_values[i] + pre_value;
+ output[i] = value;
+ pre_value = value;
+ }
+ } else {
+ for (uint8_t i = 0; i < frame_value_num; i ++) {
+ output[i] = delta_values[i] + min;
+ }
+ }
+}
+
+template<typename T>
+T* ForDecoder<T>::copy_value(T* val, size_t count) {
+ memcpy(val, &_out_buffer[_current_index % _frame_value_num], sizeof(T) * count);
+ _current_index += count;
+ val += count;
+ return val;
+}
+
+template<typename T>
+bool ForDecoder<T>::get_batch(T* val, size_t count) {
+ if (_current_index + count > _values_num) {
+ return false;
+ }
+
+ if (need_decode_frame()) {
+ decode_current_frame(_out_buffer.data());
+ }
+
+ if (_current_index + count < _frame_value_num * (_current_decoded_frame + 1)) {
+ copy_value(val, count);
+ return true;
+ }
+
+ // 1. padding one frame
+ size_t padding_num = _frame_value_num * (_current_decoded_frame + 1) - _current_index;
+ val = copy_value(val, padding_num);
+
+ // 2. process frame by frame
+ size_t frame_size = (count - padding_num) / _frame_value_num;
+ for (size_t i = 0; i < frame_size; i++) {
+ // directly decode value to the output, don't buffer the value
+ decode_current_frame(val);
+ _current_index += _frame_value_num;
+ val += _frame_value_num;
+ }
+
+ // 3. process remaining value
+ size_t remaining_num = (count - padding_num) % _frame_value_num;
+ if (remaining_num > 0) {
+ decode_current_frame(_out_buffer.data());
+ val = copy_value(val, remaining_num);
+ }
+
+ return true;
+}
+
+template<typename T>
+bool ForDecoder<T>::skip(int32_t skip_num) {
+ if (_current_index + skip_num >= _values_num || _current_index + skip_num < 0) {
+ return false;
+ }
+ _current_index = _current_index + skip_num;
+ return true;
+}
+
+template class ForEncoder<int8_t>;
+template class ForEncoder<int16_t>;
+template class ForEncoder<int32_t>;
+template class ForEncoder<int64_t>;
+template class ForEncoder<uint8_t>;
+template class ForEncoder<uint16_t>;
+template class ForEncoder<uint32_t>;
+template class ForEncoder<uint64_t>;
+
+template class ForDecoder<int8_t>;
+template class ForDecoder<int16_t>;
+template class ForDecoder<int32_t>;
+template class ForDecoder<int64_t>;
+template class ForDecoder<uint8_t>;
+template class ForDecoder<uint16_t>;
+template class ForDecoder<uint32_t>;
+template class ForDecoder<uint64_t>;
+}
diff --git a/be/src/util/frame_of_reference_coding.h b/be/src/util/frame_of_reference_coding.h
new file mode 100644
index 0000000..c48c50f
--- /dev/null
+++ b/be/src/util/frame_of_reference_coding.h
@@ -0,0 +1,149 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef DORIS_FRAME_OF_REFERENCE_CODING_H
+#define DORIS_FRAME_OF_REFERENCE_CODING_H
+
+
+#include <cstdlib>
+#include <iostream>
+
+#include "util/bit_stream_utils.h"
+#include "util/bit_stream_utils.inline.h"
+#include "util/faststring.h"
+
+namespace doris {
+// The implementation for frame-of-reference coding
+// The detail of frame-of-reference coding, please refer to
+// https://lemire.me/blog/2012/02/08/effective-compression-using-frame-of-reference-and-delta-coding/
+// and https://www.elastic.co/cn/blog/frame-of-reference-and-roaring-bitmaps
+//
+// The encoded data format is as follows:
+//
+// 1. Body:
+// BitPackingFrame * FrameCount
+// 2. Footer:
+// (2 bit OrderFlag + 6 bit BitWidth) * FrameCount
+// 8 bit FrameValueNum
+// 32 bit ValuesNum
+//
+// The not ascending order BitPackingFrame format:
+// MinValue, (Value - MinVale) * FrameValueNum
+//
+// The ascending order BitPackingFrame format:
+// MinValue, (Value[i] - Value[i - 1]) * FrameValueNum
+//
+// The OrderFlag is 1 represents ascending order, 0 represents not ascending order
+// The last frame value num maybe less than 128
+template<typename T>
+class ForEncoder {
+public:
+ explicit ForEncoder(faststring* buffer): _buffer(buffer) {}
+
+ void put(const T value) {
+ return put_batch(&value, 1);
+ }
+
+ void put_batch(const T* value, size_t count);
+
+ // Flushes any pending values to the underlying buffer.
+ // Returns the total number of bytes written
+ uint32_t flush();
+
+ // underlying buffer size + footer meta size.
+ // Note: should call this method before flush.
+ uint32_t len() {
+ return _buffer->size() + _order_flag_and_bit_widths.size() + 5;
+ }
+
+ // Resets all the state in the encoder.
+ void clear() {
+ _values_num = 0;
+ _buffered_values_num = 0;
+ _buffer->clear();
+ }
+
+private:
+ void bit_pack(T *input, uint8_t in_num, int bit_width, uint8_t *output);
+
+ void bit_packing_one_frame_value(const T* input);
+
+ const T* copy_value(const T* val, size_t count);
+
+ uint32_t _values_num = 0;
+ uint8_t _buffered_values_num = 0;
+ static const uint8_t FRAME_VALUE_NUM = 128;
+ T _buffered_values[FRAME_VALUE_NUM];
+
+ faststring* _buffer;
+ std::vector<uint8_t> _order_flag_and_bit_widths;
+};
+
+template<typename T>
+class ForDecoder {
+public:
+ explicit ForDecoder(const uint8_t* in_buffer, size_t buffer_len)
+ :_buffer (in_buffer),
+ _buffer_len (buffer_len){}
+
+ // read footer metadata
+ bool init();
+
+ bool get(T* val) {
+ return get_batch(val, 1);
+ }
+
+ // Gets the next batch value. Returns false if there are no more.
+ bool get_batch(T* val, size_t count);
+
+ // The skip_num is positive means move forwards
+ // The skip_num is negative means move backwards
+ bool skip(int32_t skip_num);
+
+ uint32_t count() const {
+ return _values_num;
+ }
+
+private:
+
+ void bit_unpack(const uint8_t *input, uint8_t in_num, int bit_width, T *output);
+
+ void decode_current_frame(T* output);
+
+ T* copy_value(T* val, size_t count);
+
+ bool need_decode_frame() {
+ return !(_frame_value_num * _current_decoded_frame < _current_index
+ && _current_index < _frame_value_num * (_current_decoded_frame + 1));
+ }
+
+ uint8_t _frame_value_num = 0;
+ uint32_t _values_num = 0;
+ uint32_t _frame_count = 0;
+ uint32_t _current_index = 0;
+ uint32_t _current_decoded_frame = -1;
+ std::vector<T> _out_buffer;
+ std::vector<uint32_t> _frame_offsets;
+ std::vector<uint8_t> _bit_widths;
+ std::vector<uint8_t> _order_flags;
+ const uint8_t* _buffer;
+ size_t _buffer_len = 0;
+};
+}
+
+
+#endif //DORIS_FRAME_OF_REFERENCE_CODING_H
diff --git a/be/test/olap/CMakeLists.txt b/be/test/olap/CMakeLists.txt
index 3f248e6..03deaca 100644
--- a/be/test/olap/CMakeLists.txt
+++ b/be/test/olap/CMakeLists.txt
@@ -57,6 +57,7 @@ ADD_BE_TEST(rowset/segment_v2/binary_dict_page_test)
ADD_BE_TEST(rowset/segment_v2/segment_test)
ADD_BE_TEST(rowset/segment_v2/column_zone_map_test)
ADD_BE_TEST(rowset/segment_v2/row_ranges_test)
+ADD_BE_TEST(rowset/segment_v2/frame_of_reference_page_test)
ADD_BE_TEST(tablet_meta_manager_test)
ADD_BE_TEST(tablet_mgr_test)
ADD_BE_TEST(rowset/rowset_meta_manager_test)
diff --git a/be/test/olap/rowset/segment_v2/frame_of_reference_page_test.cpp b/be/test/olap/rowset/segment_v2/frame_of_reference_page_test.cpp
new file mode 100644
index 0000000..756b096
--- /dev/null
+++ b/be/test/olap/rowset/segment_v2/frame_of_reference_page_test.cpp
@@ -0,0 +1,180 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+#include <memory>
+
+#include "olap/rowset/segment_v2/options.h"
+#include "olap/rowset/segment_v2/page_builder.h"
+#include "olap/rowset/segment_v2/page_decoder.h"
+#include "olap/rowset/segment_v2/frame_of_reference_page.h"
+#include "util/arena.h"
+#include "util/logging.h"
+
+using doris::segment_v2::PageBuilderOptions;
+using doris::segment_v2::PageDecoderOptions;
+
+namespace doris {
+class FrameOfReferencePageTest : public testing::Test {
+public:
+ template<FieldType type, class PageDecoderType>
+ void copy_one(PageDecoderType* decoder, typename TypeTraits<type>::CppType* ret) {
+ Arena arena;
+ uint8_t null_bitmap = 0;
+ ColumnBlock block(get_type_info(type), (uint8_t*)ret, &null_bitmap, &arena);
+ ColumnBlockView column_block_view(&block);
+
+ size_t n = 1;
+ decoder->next_batch(&n, &column_block_view);
+ ASSERT_EQ(1, n);
+ }
+
+ template <FieldType Type, class PageBuilderType, class PageDecoderType>
+ void test_encode_decode_page_template(typename TypeTraits<Type>::CppType* src,
+ size_t size) {
+ typedef typename TypeTraits<Type>::CppType CppType;
+ PageBuilderOptions builder_options;
+ builder_options.data_page_size = 256 * 1024;
+ PageBuilderType for_page_builder(builder_options);
+ for_page_builder.add(reinterpret_cast<const uint8_t *>(src), &size);
+ Slice s = for_page_builder.finish();
+ ASSERT_EQ(size, for_page_builder.count());
+ LOG(INFO) << "FrameOfReference Encoded size for 10k values: " << s.size
+ << ", original size:" << size * sizeof(CppType);
+
+ PageDecoderOptions decoder_options;
+ PageDecoderType for_page_decoder(s, decoder_options);
+ Status status = for_page_decoder.init();
+ ASSERT_TRUE(status.ok());
+ ASSERT_EQ(0, for_page_decoder.current_index());
+ ASSERT_EQ(size, for_page_decoder.count());
+
+ Arena arena;
+ CppType* values = reinterpret_cast<CppType*>(arena.Allocate(size * sizeof(CppType)));
+ uint8_t* null_bitmap = reinterpret_cast<uint8_t*>(arena.Allocate(BitmapSize(size)));
+ ColumnBlock block(get_type_info(Type), (uint8_t*)values, null_bitmap, &arena);
+ ColumnBlockView column_block_view(&block);
+ size_t size_to_fetch = size;
+ status = for_page_decoder.next_batch(&size_to_fetch, &column_block_view);
+ ASSERT_TRUE(status.ok());
+ ASSERT_EQ(size, size_to_fetch);
+
+ for (uint i = 0; i < size; i++) {
+ if (src[i] != values[i]) {
+ FAIL() << "Fail at index " << i <<
+ " inserted=" << src[i] << " got=" << values[i];
+ }
+ }
+
+ // Test Seek within block by ordinal
+ for (int i = 0; i < 100; i++) {
+ int seek_off = random() % size;
+ for_page_decoder.seek_to_position_in_page(seek_off);
+ EXPECT_EQ((int32_t )(seek_off), for_page_decoder.current_index());
+ CppType ret;
+ copy_one<Type, PageDecoderType>(&for_page_decoder, &ret);
+ EXPECT_EQ(values[seek_off], ret);
+ }
+ }
+};
+
+TEST_F(FrameOfReferencePageTest, TestInt32BlockEncoderRandom) {
+ const uint32_t size = 10000;
+
+ std::unique_ptr<int32_t[]> ints(new int32_t[size]);
+ for (int i = 0; i < size; i++) {
+ ints.get()[i] = random();
+ }
+
+ test_encode_decode_page_template<OLAP_FIELD_TYPE_INT, segment_v2::FrameOfReferencePageBuilder<OLAP_FIELD_TYPE_INT>,
+ segment_v2::FrameOfReferencePageDecoder<OLAP_FIELD_TYPE_INT> >(ints.get(), size);
+}
+
+TEST_F(FrameOfReferencePageTest, TestInt32BlockEncoderEqual) {
+ const uint32_t size = 10000;
+
+ std::unique_ptr<int32_t[]> ints(new int32_t[size]);
+ for (int i = 0; i < size; i++) {
+ ints.get()[i] = 12345;
+ }
+
+ test_encode_decode_page_template<OLAP_FIELD_TYPE_INT, segment_v2::FrameOfReferencePageBuilder<OLAP_FIELD_TYPE_INT>,
+ segment_v2::FrameOfReferencePageDecoder<OLAP_FIELD_TYPE_INT> >(ints.get(), size);
+}
+
+TEST_F(FrameOfReferencePageTest, TestInt32BlockEncoderSequence) {
+ const uint32_t size = 10000;
+
+ std::unique_ptr<int32_t[]> ints(new int32_t[size]);
+ for (int i = 0; i < size; i++) {
+ ints.get()[i] = 12345 + i;
+ }
+
+ test_encode_decode_page_template<OLAP_FIELD_TYPE_INT, segment_v2::FrameOfReferencePageBuilder<OLAP_FIELD_TYPE_INT>,
+ segment_v2::FrameOfReferencePageDecoder<OLAP_FIELD_TYPE_INT> >(ints.get(), size);
+}
+
+TEST_F(FrameOfReferencePageTest, TestInt64BlockEncoderSequence) {
+ const uint32_t size = 10000;
+
+ std::unique_ptr<int64_t[]> ints(new int64_t[size]);
+ for (int i = 0; i < size; i++) {
+ ints.get()[i] = 21474836478 + i;
+ }
+
+ test_encode_decode_page_template<OLAP_FIELD_TYPE_BIGINT, segment_v2::FrameOfReferencePageBuilder<OLAP_FIELD_TYPE_BIGINT>,
+ segment_v2::FrameOfReferencePageDecoder<OLAP_FIELD_TYPE_BIGINT> >(ints.get(), size);
+}
+
+TEST_F(FrameOfReferencePageTest, TestInt32SequenceBlockEncoderSize) {
+ size_t size = 128;
+ std::unique_ptr<int32_t[]> ints(new int32_t[size]);
+ for (int i = 0; i < size; i++) {
+ ints.get()[i] = i;
+ }
+ PageBuilderOptions builder_options;
+ builder_options.data_page_size = 256 * 1024;
+ segment_v2::FrameOfReferencePageBuilder<OLAP_FIELD_TYPE_INT> page_builder(builder_options);
+ page_builder.add(reinterpret_cast<const uint8_t *>(ints.get()), &size);
+ Slice s = page_builder.finish();
+ // body: 4 bytes min value + 128 * 1 /8 packing value = 20
+ // header: 1 + 1 + 4 = 6
+ ASSERT_EQ(26, s.size);
+}
+
+TEST_F(FrameOfReferencePageTest, TestInt32NormalBlockEncoderSize) {
+ size_t size = 128;
+ std::unique_ptr<int32_t[]> ints(new int32_t[size]);
+ for (int i = 0; i < size; i++) {
+ ints.get()[i] = 128 - i;
+ }
+ PageBuilderOptions builder_options;
+ builder_options.data_page_size = 256 * 1024;
+ segment_v2::FrameOfReferencePageBuilder<OLAP_FIELD_TYPE_INT> page_builder(builder_options);
+ page_builder.add(reinterpret_cast<const uint8_t *>(ints.get()), &size);
+ Slice s = page_builder.finish();
+ // body: 4 bytes min value + 128 * 7 /8 packing value = 116
+ // header: 1 + 1 + 4 = 6
+ ASSERT_EQ(122, s.size);
+}
+
+}
+
+int main(int argc, char** argv) {
+ testing::InitGoogleTest(&argc, argv);
+ return RUN_ALL_TESTS();
+}
diff --git a/be/test/util/CMakeLists.txt b/be/test/util/CMakeLists.txt
index ff910dd..c9cfb5e 100644
--- a/be/test/util/CMakeLists.txt
+++ b/be/test/util/CMakeLists.txt
@@ -48,3 +48,5 @@ ADD_BE_TEST(block_compression_test)
ADD_BE_TEST(arrow/arrow_row_block_test)
ADD_BE_TEST(arrow/arrow_row_batch_test)
ADD_BE_TEST(counter_cond_variable_test)
+ADD_BE_TEST(frame_of_reference_coding_test)
+ADD_BE_TEST(bit_stream_utils_test)
diff --git a/be/test/util/bit_stream_utils_test.cpp b/be/test/util/bit_stream_utils_test.cpp
new file mode 100644
index 0000000..9d979b5
--- /dev/null
+++ b/be/test/util/bit_stream_utils_test.cpp
@@ -0,0 +1,231 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <algorithm>
+#include <cstdint>
+#include <cstdlib>
+#include <cstring>
+#include <ostream>
+#include <string>
+#include <vector>
+#include <limits>
+
+// Must come before gtest.h.
+#include <boost/utility/binary.hpp>
+#include <glog/logging.h>
+#include <gtest/gtest.h>
+
+#include "util/bit_stream_utils.h"
+#include "util/bit_stream_utils.inline.h"
+#include "util/bit_util.h"
+#include "util/faststring.h"
+#include "util/debug_util.h"
+
+using std::string;
+using std::vector;
+
+namespace doris {
+
+const int kMaxWidth = 64;
+class TestBitStreamUtil : public testing::Test {};
+
+TEST(TestBitStreamUtil, TestBool) {
+ const int len_bytes = 2;
+ faststring buffer(len_bytes);
+
+ BitWriter writer(&buffer);
+
+ // Write alternating 0's and 1's
+ for (int i = 0; i < 8; ++i) {
+ writer.PutValue(i % 2, 1);
+ }
+ writer.Flush();
+ EXPECT_EQ(buffer[0], BOOST_BINARY(1 0 1 0 1 0 1 0));
+
+ // Write 00110011
+ for (int i = 0; i < 8; ++i) {
+ switch (i) {
+ case 0:
+ case 1:
+ case 4:
+ case 5:
+ writer.PutValue(0, 1);
+ break;
+ default:
+ writer.PutValue(1, 1);
+ break;
+ }
+ }
+ writer.Flush();
+
+ // Validate the exact bit value
+ EXPECT_EQ(buffer[0], BOOST_BINARY(1 0 1 0 1 0 1 0));
+ EXPECT_EQ(buffer[1], BOOST_BINARY(1 1 0 0 1 1 0 0));
+
+ // Use the reader and validate
+ BitReader reader(buffer.data(), buffer.size());
+ for (int i = 0; i < 8; ++i) {
+ bool val = false;
+ bool result = reader.GetValue(1, &val);
+ EXPECT_TRUE(result);
+ EXPECT_EQ(val, i % 2);
+ }
+
+ for (int i = 0; i < 8; ++i) {
+ bool val = false;
+ bool result = reader.GetValue(1, &val);
+ EXPECT_TRUE(result);
+ switch (i) {
+ case 0:
+ case 1:
+ case 4:
+ case 5:
+ EXPECT_EQ(val, false);
+ break;
+ default:
+ EXPECT_EQ(val, true);
+ break;
+ }
+ }
+}
+
+// Writes 'num_vals' values with width 'bit_width' and reads them back.
+void TestBitArrayValues(int bit_width, int num_vals) {
+ const int kTestLen = BitUtil::Ceil(bit_width * num_vals, 8);
+ const uint64_t mod = bit_width == 64? 1 : 1LL << bit_width;
+
+ faststring buffer(kTestLen);
+ BitWriter writer(&buffer);
+ for (int i = 0; i < num_vals; ++i) {
+ writer.PutValue(i % mod, bit_width);
+ }
+ writer.Flush();
+ EXPECT_EQ(writer.bytes_written(), kTestLen);
+
+ BitReader reader(buffer.data(), kTestLen);
+ for (int i = 0; i < num_vals; ++i) {
+ int64_t val = 0;
+ bool result = reader.GetValue(bit_width, &val);
+ EXPECT_TRUE(result);
+ EXPECT_EQ(val, i % mod);
+ }
+ EXPECT_EQ(reader.bytes_left(), 0);
+}
+
+TEST(TestBitStreamUtil, TestValues) {
+ for (int width = 1; width <= kMaxWidth; ++width) {
+ TestBitArrayValues(width, 1);
+ TestBitArrayValues(width, 2);
+ // Don't write too many values
+ TestBitArrayValues(width, (width < 12) ? (1 << width) : 4096);
+ TestBitArrayValues(width, 1024);
+ }
+}
+
+// Test some mixed values
+TEST(TestBitStreamUtil, TestMixed) {
+ const int kTestLenBits = 1024;
+ faststring buffer(kTestLenBits / 8);
+ bool parity = true;
+
+ BitWriter writer(&buffer);
+ for (int i = 0; i < kTestLenBits; ++i) {
+ if (i % 2 == 0) {
+ writer.PutValue(parity, 1);
+ parity = !parity;
+ } else {
+ writer.PutValue(i, 10);
+ }
+ }
+ writer.Flush();
+
+ parity = true;
+ BitReader reader(buffer.data(), buffer.size());
+ for (int i = 0; i < kTestLenBits; ++i) {
+ bool result;
+ if (i % 2 == 0) {
+ bool val = false;
+ result = reader.GetValue(1, &val);
+ EXPECT_EQ(val, parity);
+ parity = !parity;
+ } else {
+ int val;
+ result = reader.GetValue(10, &val);
+ EXPECT_EQ(val, i);
+ }
+ EXPECT_TRUE(result);
+ }
+}
+
+TEST(TestBitStreamUtil, TestSeekToBit) {
+ faststring buffer(1);
+
+ BitWriter writer(&buffer);
+ writer.PutValue(2019,32);
+ writer.PutValue(2020,32);
+ writer.PutValue(2021,32);
+ writer.Flush();
+
+ BitReader reader(buffer.data(), buffer.size());
+ reader.SeekToBit(buffer.size() * 8 - 8 * 8);
+ uint32_t second_value;
+ reader.GetValue(32, &second_value);
+ ASSERT_EQ(second_value,2020);
+
+ uint32_t third_value;
+ reader.GetValue(32, &third_value);
+ ASSERT_EQ(third_value, 2021);
+
+ reader.SeekToBit(0);
+ uint32_t first_value;
+ reader.GetValue(32, &first_value);
+ ASSERT_EQ(first_value, 2019);
+}
+
+TEST(TestBitStreamUtil, TestUint64) {
+ faststring buffer(1);
+ BitWriter writer(&buffer);
+ writer.PutValue(18446744073709551614U, 64);
+ writer.PutValue(18446744073709551613U, 64);
+ writer.PutValue(128, 32);
+ writer.PutValue(126, 16);
+ writer.Flush();
+
+ BitReader reader(buffer.data(), buffer.size());
+
+ uint64_t v1;
+ reader.GetValue(64, &v1);
+ ASSERT_EQ(v1, 18446744073709551614U);
+
+ uint64_t v2;
+ reader.GetValue(64, &v2);
+ ASSERT_EQ(v2, 18446744073709551613U);
+
+ uint64_t v3;
+ reader.GetValue(32, &v3);
+ ASSERT_EQ(v3, 128);
+
+ uint64_t v4;
+ reader.GetValue(16, &v4);
+ ASSERT_EQ(v4, 126);
+}
+} // namespace doris
+
+int main(int argc, char** argv) {
+ ::testing::InitGoogleTest(&argc, argv);
+ return RUN_ALL_TESTS();
+}
diff --git a/be/test/util/frame_of_reference_coding_test.cpp b/be/test/util/frame_of_reference_coding_test.cpp
new file mode 100644
index 0000000..2a7bca8
--- /dev/null
+++ b/be/test/util/frame_of_reference_coding_test.cpp
@@ -0,0 +1,166 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+#include "util/frame_of_reference_coding.h"
+
+namespace doris {
+class TestForCoding : public testing::Test {
+public:
+ static void test_frame_of_reference_encode_decode(int32_t element_size) {
+ faststring buffer(1);
+ ForEncoder<int32_t> encoder(&buffer);
+
+ std::vector<int32_t> data;
+ for (int32_t i = 0; i < element_size; ++i) {
+ data.push_back(i);
+ }
+ encoder.put_batch(data.data(), element_size);
+ encoder.flush();
+
+ ForDecoder<int32_t> decoder(buffer.data(), buffer.length());
+ decoder.init();
+ std::vector<int32_t> actual_result(element_size);
+ decoder.get_batch(actual_result.data(), element_size);
+
+ ASSERT_EQ(data, actual_result);
+ }
+
+ static void test_skip(int32_t skip_num) {
+ faststring buffer(1);
+ ForEncoder<uint32_t> encoder(&buffer);
+
+ std::vector<uint32_t> input_data;
+ std::vector<uint32_t> expect_result;
+ for (uint32_t i = 0; i < 256; ++i) {
+ input_data.push_back(i);
+ if (i >= skip_num) {
+ expect_result.push_back(i);
+ }
+ }
+ encoder.put_batch(input_data.data(), 256);
+ encoder.flush();
+
+ ForDecoder<uint32_t> decoder(buffer.data(), buffer.length());
+ decoder.init();
+ decoder.skip(skip_num);
+
+ std::vector<uint32_t> actual_result(256 - skip_num);
+ decoder.get_batch(actual_result.data(), 256 - skip_num);
+
+ ASSERT_EQ(expect_result, actual_result);
+ }
+};
+
+
+TEST_F(TestForCoding, TestHalfFrame) {
+ test_frame_of_reference_encode_decode(64);
+}
+
+TEST_F(TestForCoding, TestOneFrame) {
+ test_frame_of_reference_encode_decode(128);
+}
+
+TEST_F(TestForCoding, TestTwoFrame) {
+ test_frame_of_reference_encode_decode(256);
+}
+
+TEST_F(TestForCoding, TestTwoHlafFrame) {
+ test_frame_of_reference_encode_decode(320);
+}
+
+TEST_F(TestForCoding, TestSkipZero) {
+ test_skip(0);
+}
+
+TEST_F(TestForCoding, TestSkipHalfFrame) {
+ test_skip(64);
+}
+
+TEST_F(TestForCoding, TestSkipOneFrame) {
+ test_skip(128);
+}
+
+TEST_F(TestForCoding, TestInt64) {
+ faststring buffer(1);
+ ForEncoder<int64_t> encoder(&buffer);
+
+ std::vector<int64_t> data;
+ for (int64_t i = 0; i < 320; ++i) {
+ data.push_back(i);
+ }
+ encoder.put_batch(data.data(), 320);
+ encoder.flush();
+
+ ForDecoder<int64_t> decoder(buffer.data(), buffer.length());
+ decoder.init();
+ std::vector<int64_t> actual_result(320);
+ decoder.get_batch(actual_result.data(), 320);
+
+ ASSERT_EQ(data, actual_result);
+}
+
+TEST_F(TestForCoding, TestOneMinValue) {
+ faststring buffer(1);
+ ForEncoder<int32_t> encoder(&buffer);
+ encoder.put(2019);
+ encoder.flush();
+
+ ForDecoder<int32_t> decoder(buffer.data(), buffer.length());
+ decoder.init();
+ int32_t actual_value;
+ decoder.get(&actual_value);
+ ASSERT_EQ(2019, actual_value);
+}
+
+TEST_F(TestForCoding, TestZeroValue) {
+ faststring buffer(1);
+ ForEncoder<int32_t> encoder(&buffer);
+ encoder.flush();
+
+ ASSERT_EQ(buffer.length(), 4 + 1);
+
+ ForDecoder<int32_t> decoder(buffer.data(), buffer.length());
+ decoder.init();
+ int32_t actual_value;
+ bool result = decoder.get(&actual_value);
+ ASSERT_EQ(result, false);
+}
+
+TEST_F(TestForCoding, TestBytesAlign) {
+ faststring buffer(1);
+ ForEncoder<int32_t> encoder(&buffer);
+ encoder.put(2019);
+ encoder.put(2020);
+ encoder.flush();
+
+ ForDecoder<int32_t> decoder(buffer.data(), buffer.length());
+ decoder.init();
+
+ int32_t actual_value;
+ decoder.get(&actual_value);
+ ASSERT_EQ(2019, actual_value);
+ decoder.get(&actual_value);
+ ASSERT_EQ(2020, actual_value);
+}
+
+}
+
+int main(int argc, char** argv) {
+ testing::InitGoogleTest(&argc, argv);
+ return RUN_ALL_TESTS();
+}
\ No newline at end of file
diff --git a/be/test/util/rle_encoding_test.cpp b/be/test/util/rle_encoding_test.cpp
index b40ef07..50c92c7 100644
--- a/be/test/util/rle_encoding_test.cpp
+++ b/be/test/util/rle_encoding_test.cpp
@@ -43,143 +43,7 @@ namespace doris {
const int kMaxWidth = 64;
-class TestRle : public testing::Test {
-public:
- TestRle() {
- }
-
- virtual ~TestRle() {
- }
-};
-
-TEST(BitArray, TestBool) {
- const int len_bytes = 2;
- faststring buffer(len_bytes);
-
- BitWriter writer(&buffer);
-
- // Write alternating 0's and 1's
- for (int i = 0; i < 8; ++i) {
- writer.PutValue(i % 2, 1);
- }
- writer.Flush();
- EXPECT_EQ(buffer[0], BOOST_BINARY(1 0 1 0 1 0 1 0));
-
- // Write 00110011
- for (int i = 0; i < 8; ++i) {
- switch (i) {
- case 0:
- case 1:
- case 4:
- case 5:
- writer.PutValue(0, 1);
- break;
- default:
- writer.PutValue(1, 1);
- break;
- }
- }
- writer.Flush();
-
- // Validate the exact bit value
- EXPECT_EQ(buffer[0], BOOST_BINARY(1 0 1 0 1 0 1 0));
- EXPECT_EQ(buffer[1], BOOST_BINARY(1 1 0 0 1 1 0 0));
-
- // Use the reader and validate
- BitReader reader(buffer.data(), buffer.size());
- for (int i = 0; i < 8; ++i) {
- bool val = false;
- bool result = reader.GetValue(1, &val);
- EXPECT_TRUE(result);
- EXPECT_EQ(val, i % 2);
- }
-
- for (int i = 0; i < 8; ++i) {
- bool val = false;
- bool result = reader.GetValue(1, &val);
- EXPECT_TRUE(result);
- switch (i) {
- case 0:
- case 1:
- case 4:
- case 5:
- EXPECT_EQ(val, false);
- break;
- default:
- EXPECT_EQ(val, true);
- break;
- }
- }
-}
-
-// Writes 'num_vals' values with width 'bit_width' and reads them back.
-void TestBitArrayValues(int bit_width, int num_vals) {
- const int kTestLen = BitUtil::Ceil(bit_width * num_vals, 8);
- const uint64_t mod = bit_width == 64? 1 : 1LL << bit_width;
-
- faststring buffer(kTestLen);
- BitWriter writer(&buffer);
- for (int i = 0; i < num_vals; ++i) {
- writer.PutValue(i % mod, bit_width);
- }
- writer.Flush();
- EXPECT_EQ(writer.bytes_written(), kTestLen);
-
- BitReader reader(buffer.data(), kTestLen);
- for (int i = 0; i < num_vals; ++i) {
- int64_t val = 0;
- bool result = reader.GetValue(bit_width, &val);
- EXPECT_TRUE(result);
- EXPECT_EQ(val, i % mod);
- }
- EXPECT_EQ(reader.bytes_left(), 0);
-}
-
-TEST(BitArray, TestValues) {
- for (int width = 1; width <= kMaxWidth; ++width) {
- TestBitArrayValues(width, 1);
- TestBitArrayValues(width, 2);
- // Don't write too many values
- TestBitArrayValues(width, (width < 12) ? (1 << width) : 4096);
- TestBitArrayValues(width, 1024);
- }
-}
-
-// Test some mixed values
-TEST(BitArray, TestMixed) {
- const int kTestLenBits = 1024;
- faststring buffer(kTestLenBits / 8);
- bool parity = true;
-
- BitWriter writer(&buffer);
- for (int i = 0; i < kTestLenBits; ++i) {
- if (i % 2 == 0) {
- writer.PutValue(parity, 1);
- parity = !parity;
- } else {
- writer.PutValue(i, 10);
- }
- }
- writer.Flush();
-
- parity = true;
- BitReader reader(buffer.data(), buffer.size());
- for (int i = 0; i < kTestLenBits; ++i) {
- bool result;
- if (i % 2 == 0) {
- bool val = false;
- result = reader.GetValue(1, &val);
- EXPECT_EQ(val, parity);
- parity = !parity;
- } else {
- int val;
- result = reader.GetValue(10, &val);
- EXPECT_EQ(val, i);
- }
- EXPECT_TRUE(result);
- }
-}
-
+class TestRle : public testing::Test {};
// Validates encoding of values by encoding and decoding them. If
// expected_encoding != NULL, also validates that the encoded buffer is
// exactly 'expected_encoding'.
diff --git a/run-ut.sh b/run-ut.sh
index 95a9456..349fe8f 100755
--- a/run-ut.sh
+++ b/run-ut.sh
@@ -159,6 +159,8 @@ ${DORIS_TEST_BINARY_DIR}/util/block_compression_test
${DORIS_TEST_BINARY_DIR}/util/arrow/arrow_row_block_test
${DORIS_TEST_BINARY_DIR}/util/arrow/arrow_row_batch_test
${DORIS_TEST_BINARY_DIR}/util/counter_cond_variable_test
+${DORIS_TEST_BINARY_DIR}/util/bit_stream_utils_test
+${DORIS_TEST_BINARY_DIR}/util/frame_of_reference_coding_test
# Running common Unittest
${DORIS_TEST_BINARY_DIR}/common/resource_tls_test
@@ -267,6 +269,7 @@ ${DORIS_TEST_BINARY_DIR}/olap/rowset/segment_v2/segment_test
${DORIS_TEST_BINARY_DIR}/olap/rowset/segment_v2/page_compression_test
${DORIS_TEST_BINARY_DIR}/olap/rowset/segment_v2/column_zone_map_test
${DORIS_TEST_BINARY_DIR}/olap/rowset/segment_v2/row_ranges_test
+${DORIS_TEST_BINARY_DIR}/olap/rowset/segment_v2/frame_of_reference_page_test
${DORIS_TEST_BINARY_DIR}/olap/txn_manager_test
${DORIS_TEST_BINARY_DIR}/olap/storage_types_test
${DORIS_TEST_BINARY_DIR}/olap/generic_iterators_test
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org