You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by GitBox <gi...@apache.org> on 2023/01/17 13:50:55 UTC

[GitHub] [doris] xy720 commented on a diff in pull request #15966: [Feature](map)support complex struct for doris

xy720 commented on code in PR #15966:
URL: https://github.com/apache/doris/pull/15966#discussion_r1072095959


##########
be/src/olap/rowset/segment_v2/column_reader.h:
##########
@@ -393,6 +393,44 @@ class EmptyFileColumnIterator final : public ColumnIterator {
     ordinal_t get_current_ordinal() const override { return 0; }
 };
 
+// This iterator is used to read map value column
+class MapFileColumnIterator final : public ColumnIterator {
+public:
+    explicit MapFileColumnIterator(ColumnReader* reader, ColumnIterator* null_iterator,
+                                   ColumnIterator* key_iterator, ColumnIterator* val_iterator);
+
+    ~MapFileColumnIterator() override = default;
+
+    Status init(const ColumnIteratorOptions& opts) override;
+
+    Status next_batch(size_t* n, ColumnBlockView* dst, bool* has_null) override;
+
+    Status next_batch(size_t* n, vectorized::MutableColumnPtr& dst, bool* has_null) override;
+
+    Status read_by_rowids(const rowid_t* rowids, const size_t count,
+                          vectorized::MutableColumnPtr& dst) override;
+
+    Status seek_to_first() override {
+        RETURN_IF_ERROR(_key_iterator->seek_to_first());
+        RETURN_IF_ERROR(_val_iterator->seek_to_first());
+        return Status::OK();
+    }
+
+    Status seek_to_ordinal(ordinal_t ord) override;
+
+    ordinal_t get_current_ordinal() const override {
+        return _key_iterator->get_current_ordinal();
+    }
+
+private:
+    ColumnReader* _map_reader; // need ?
+    std::unique_ptr<ColumnIterator> _null_iterator;
+    std::unique_ptr<ColumnIterator> _key_iterator; // ArrayFileColumnIterator
+    std::unique_ptr<ColumnIterator> _val_iterator; // ArrayFileColumnIterator
+
+    Status _peek_one_offset(ordinal_t* offset);

Review Comment:
   Do we need this method?



##########
be/src/olap/rowset/segment_v2/column_reader.cpp:
##########
@@ -449,13 +478,97 @@ Status ColumnReader::new_iterator(ColumnIterator** iterator) {
                     null_iterator);
             return Status::OK();
         }
+	case FieldType::OLAP_FIELD_TYPE_MAP: {
+            ColumnIterator* key_iterator = nullptr;
+            RETURN_IF_ERROR(_sub_readers[0]->new_iterator(&key_iterator));
+            ColumnIterator* val_iterator = nullptr;
+            RETURN_IF_ERROR(_sub_readers[1]->new_iterator(&val_iterator));
+            ColumnIterator* null_iterator = nullptr;
+            if (is_nullable()) {
+                RETURN_IF_ERROR(_sub_readers[2]->new_iterator(&null_iterator));
+            }
+            *iterator = new MapFileColumnIterator(this,null_iterator,
+                                                  key_iterator, val_iterator);
+            return Status::OK();
+        }
         default:
             return Status::NotSupported("unsupported type to create iterator: {}",
                                         std::to_string(type));
         }
     }
 }
 
+///====================== MapFileColumnIterator ============================////
+MapFileColumnIterator::MapFileColumnIterator(ColumnReader* reader, ColumnIterator* null_iterator,
+                                                 ColumnIterator* key_iterator,
+                                                 ColumnIterator* val_iterator)
+        : _map_reader(reader) {
+    _key_iterator.reset(key_iterator);
+    _val_iterator.reset(val_iterator);
+    if (_map_reader->is_nullable()) {
+        _null_iterator.reset(null_iterator);
+    }
+}
+
+Status MapFileColumnIterator::init(const ColumnIteratorOptions& opts) {
+    RETURN_IF_ERROR(_key_iterator->init(opts));
+    RETURN_IF_ERROR(_val_iterator->init(opts));
+    if (_map_reader->is_nullable()) {
+        RETURN_IF_ERROR(_null_iterator->init(opts));
+    }
+    return Status::OK();
+}
+
+Status MapFileColumnIterator::_peek_one_offset(ordinal_t* offset) {
+    return Status::OK();
+}
+
+Status MapFileColumnIterator::next_batch(size_t* n, ColumnBlockView* dst, bool* has_null) {
+    return Status::OK();
+}
+
+
+Status MapFileColumnIterator::seek_to_ordinal(ordinal_t ord) {
+    RETURN_IF_ERROR(_key_iterator->seek_to_ordinal(ord));
+    RETURN_IF_ERROR(_val_iterator->seek_to_ordinal(ord));
+    if (_map_reader->is_nullable()) {
+        RETURN_IF_ERROR(_null_iterator->seek_to_ordinal(ord));
+    }
+    return Status::OK();
+}
+
+Status MapFileColumnIterator::next_batch(size_t* n, vectorized::MutableColumnPtr& dst,
+                                           bool* has_null) {
+    const auto* column_map = vectorized::check_and_get_column<vectorized::ColumnMap>(
+            dst->is_nullable() ? static_cast<vectorized::ColumnNullable&>(*dst).get_nested_column()
+                               : *dst);
+    auto column_key_ptr = column_map->get_keys().assume_mutable();
+    auto column_val_ptr = column_map->get_values().assume_mutable();
+    RETURN_IF_ERROR(_key_iterator->next_batch(n, column_key_ptr, has_null));

Review Comment:
   use a local value to pass into the method.
   Such as 
   
   ```
   size_t num_read = *n;
   RETURN_IF_ERROR(_key_iterator->next_batch(&num_read, column_key_ptr, has_null));
   ```



##########
be/src/olap/rowset/segment_v2/column_reader.h:
##########
@@ -393,6 +393,44 @@ class EmptyFileColumnIterator final : public ColumnIterator {
     ordinal_t get_current_ordinal() const override { return 0; }
 };
 
+// This iterator is used to read map value column
+class MapFileColumnIterator final : public ColumnIterator {
+public:
+    explicit MapFileColumnIterator(ColumnReader* reader, ColumnIterator* null_iterator,
+                                   ColumnIterator* key_iterator, ColumnIterator* val_iterator);
+
+    ~MapFileColumnIterator() override = default;
+
+    Status init(const ColumnIteratorOptions& opts) override;
+
+    Status next_batch(size_t* n, ColumnBlockView* dst, bool* has_null) override;
+
+    Status next_batch(size_t* n, vectorized::MutableColumnPtr& dst, bool* has_null) override;
+
+    Status read_by_rowids(const rowid_t* rowids, const size_t count,
+                          vectorized::MutableColumnPtr& dst) override;
+
+    Status seek_to_first() override {
+        RETURN_IF_ERROR(_key_iterator->seek_to_first());
+        RETURN_IF_ERROR(_val_iterator->seek_to_first());

Review Comment:
   forget _null_iterator ?



##########
be/src/olap/rowset/segment_v2/column_writer.cpp:
##########
@@ -173,6 +173,119 @@ Status ColumnWriter::create(const ColumnWriterOptions& opts, const TabletColumn*
             *writer = std::move(writer_local);
             return Status::OK();
         }
+	case FieldType::OLAP_FIELD_TYPE_MAP: {
+            DCHECK(column->get_subtype_count() == 2);
+            // todo . here key and value is array only?
+            const TabletColumn& key_column = column->get_sub_column(0); // field_type is true key and value
+            const TabletColumn& value_column = column->get_sub_column(1);
+
+            // create null writer
+            ScalarColumnWriter* null_writer = nullptr;
+            if (opts.meta->is_nullable()) {
+                FieldType null_type = FieldType::OLAP_FIELD_TYPE_TINYINT;
+                ColumnWriterOptions null_options;
+                null_options.meta = opts.meta->add_children_columns();
+                null_options.meta->set_column_id(3);
+                null_options.meta->set_unique_id(3);
+                null_options.meta->set_type(null_type);
+                null_options.meta->set_is_nullable(false);
+                null_options.meta->set_length(
+                        get_scalar_type_info<OLAP_FIELD_TYPE_TINYINT>()->size());
+                null_options.meta->set_encoding(DEFAULT_ENCODING);
+                null_options.meta->set_compression(opts.meta->compression());
+
+                null_options.need_zone_map = false;
+                null_options.need_bloom_filter = false;
+                null_options.need_bitmap_index = false;
+
+                TabletColumn null_column = TabletColumn(
+                        OLAP_FIELD_AGGREGATION_NONE, null_type, false,
+                        null_options.meta->unique_id(), null_options.meta->length());
+                null_column.set_name("nullable");
+                null_column.set_index_length(-1); // no short key index
+                std::unique_ptr<Field> null_field(FieldFactory::create(null_column));
+                null_writer =
+                        new ScalarColumnWriter(null_options, std::move(null_field), file_writer);
+            }
+
+            // create key writer
+            std::unique_ptr<ColumnWriter> key_writer;
+	    ColumnWriterOptions key_opts;
+            TabletColumn key_list_column(OLAP_FIELD_AGGREGATION_NONE, OLAP_FIELD_TYPE_ARRAY);
+            {
+		key_list_column.add_sub_column(const_cast<TabletColumn&>(key_column));
+//                key_list_column.add_sub_column(key_column);
+                key_list_column.set_name("map.key");
+                key_list_column.set_index_length(-1);
+
+
+                key_opts.meta = opts.meta->mutable_children_columns(0);

Review Comment:
   opts.meta and its children have been initialized outside.
   See init_column_meta() function in segment_writer.cpp.
   So here we should not change the column_id/unique_id/encoding/compression.
   We just need to reset the type and nullable and length.



##########
be/src/olap/rowset/segment_v2/column_writer.cpp:
##########
@@ -173,6 +173,119 @@ Status ColumnWriter::create(const ColumnWriterOptions& opts, const TabletColumn*
             *writer = std::move(writer_local);
             return Status::OK();
         }
+	case FieldType::OLAP_FIELD_TYPE_MAP: {
+            DCHECK(column->get_subtype_count() == 2);
+            // todo . here key and value is array only?
+            const TabletColumn& key_column = column->get_sub_column(0); // field_type is true key and value
+            const TabletColumn& value_column = column->get_sub_column(1);
+
+            // create null writer
+            ScalarColumnWriter* null_writer = nullptr;
+            if (opts.meta->is_nullable()) {
+                FieldType null_type = FieldType::OLAP_FIELD_TYPE_TINYINT;
+                ColumnWriterOptions null_options;
+                null_options.meta = opts.meta->add_children_columns();
+                null_options.meta->set_column_id(3);
+                null_options.meta->set_unique_id(3);
+                null_options.meta->set_type(null_type);
+                null_options.meta->set_is_nullable(false);
+                null_options.meta->set_length(
+                        get_scalar_type_info<OLAP_FIELD_TYPE_TINYINT>()->size());
+                null_options.meta->set_encoding(DEFAULT_ENCODING);
+                null_options.meta->set_compression(opts.meta->compression());
+
+                null_options.need_zone_map = false;
+                null_options.need_bloom_filter = false;
+                null_options.need_bitmap_index = false;
+
+                TabletColumn null_column = TabletColumn(
+                        OLAP_FIELD_AGGREGATION_NONE, null_type, false,
+                        null_options.meta->unique_id(), null_options.meta->length());
+                null_column.set_name("nullable");
+                null_column.set_index_length(-1); // no short key index
+                std::unique_ptr<Field> null_field(FieldFactory::create(null_column));
+                null_writer =
+                        new ScalarColumnWriter(null_options, std::move(null_field), file_writer);
+            }
+
+            // create key writer
+            std::unique_ptr<ColumnWriter> key_writer;
+	    ColumnWriterOptions key_opts;
+            TabletColumn key_list_column(OLAP_FIELD_AGGREGATION_NONE, OLAP_FIELD_TYPE_ARRAY);
+            {
+		key_list_column.add_sub_column(const_cast<TabletColumn&>(key_column));
+//                key_list_column.add_sub_column(key_column);
+                key_list_column.set_name("map.key");
+                key_list_column.set_index_length(-1);
+
+
+                key_opts.meta = opts.meta->mutable_children_columns(0);
+                key_opts.meta->set_column_id(4);
+                key_opts.meta->set_unique_id(4);
+                key_opts.meta->set_type(OLAP_FIELD_TYPE_ARRAY);
+                key_opts.meta->set_length(0);
+		key_opts.meta->set_encoding(BIT_SHUFFLE);
+                key_opts.meta->set_compression(segment_v2::CompressionTypePB::LZ4F);
+                key_opts.need_zone_map = false;
+		// no need key array's null map
+                key_opts.meta->set_is_nullable(false);
+
+                ColumnMetaPB* child_meta = key_opts.meta->add_children_columns();
+                child_meta->set_column_id(5);
+                child_meta->set_unique_id(5);
+                child_meta->set_type(key_column.type());
+                child_meta->set_length(key_column.length());
+		child_meta->set_compression(segment_v2::CompressionTypePB::LZ4F);
+                child_meta->set_encoding(DICT_ENCODING);

Review Comment:
   why the encoding is DICT_ENCODING?
   May be DEFAULT_ENCODING ?



##########
be/src/olap/types.h:
##########
@@ -430,6 +431,103 @@ class ArrayTypeInfo : public TypeInfo {
     TypeInfoPtr _item_type_info;
     const size_t _item_size;
 };
+///====================== MapType Info ==========================///
+class MapTypeInfo : public TypeInfo {
+public:
+    explicit MapTypeInfo(TypeInfoPtr key_type_info, TypeInfoPtr value_type_info)
+            : _key_type_info(std::move(key_type_info)), _value_type_info(std::move(value_type_info)) {}
+    ~MapTypeInfo() override = default;
+
+    inline bool equal(const void* left, const void* right) const override {
+        auto l_value = reinterpret_cast<const MapValue*>(left);
+        auto r_value = reinterpret_cast<const MapValue*>(right);
+	return l_value->size() == r_value->size();
+    }
+
+    int cmp(const void* left, const void* right) const override {
+        auto l_value = reinterpret_cast<const MapValue*>(left);
+        auto r_value = reinterpret_cast<const MapValue*>(right);
+        uint32_t l_size = l_value->size();
+        uint32_t r_size = r_value->size();
+	if (l_size < r_size) {
+            return -1;
+        } else if (l_size > r_size) {
+            return 1;
+        } else {
+            return 0;
+        }
+    }
+
+    void shallow_copy(void* dest, const void* src) const override {
+        auto dest_value = reinterpret_cast<MapValue*>(dest);
+        auto src_value = reinterpret_cast<const MapValue*>(src);
+        dest_value->shallow_copy(src_value);
+    }
+
+    void deep_copy(void* dest, const void* src, MemPool* mem_pool) const override {
+        DCHECK(false);
+
+    }
+
+    void copy_object(void* dest, const void* src, MemPool* mem_pool) const override {
+        deep_copy(dest, src, mem_pool);
+    }
+
+    void direct_copy(void* dest, const void* src) const override {
+        CHECK(false);
+    }
+
+    void direct_copy(uint8_t** base, void* dest, const void* src) const {
+        CHECK(false);
+    }
+
+    void direct_copy_may_cut(void* dest, const void* src) const override { direct_copy(dest, src); }
+
+    Status convert_from(void* dest, const void* src, const TypeInfo* src_type, MemPool* mem_pool,
+                        size_t variable_len = 0) const override {
+        return Status::Error<ErrorCode::NOT_IMPLEMENTED_ERROR>();
+    }
+
+    Status from_string(void* buf, const std::string& scan_key, const int precision = 0,
+                       const int scale = 0) const override {
+        return Status::Error<ErrorCode::NOT_IMPLEMENTED_ERROR>();
+    }
+
+    std::string to_string(const void* src) const override {
+        return "{}";
+    }
+
+    void set_to_max(void* buf) const override {
+        DCHECK(false) << "set_to_max of list is not implemented.";
+    }
+
+    void set_to_min(void* buf) const override {
+        DCHECK(false) << "set_to_min of list is not implemented.";
+    }
+
+    uint32_t hash_code(const void* data, uint32_t seed) const override {
+        auto map_value = reinterpret_cast<const MapValue*>(data);
+        auto size = map_value->size();
+        uint32_t result = HashUtil::hash(&size, sizeof(size), seed);
+        result = seed * result + _key_type_info->hash_code(
+                                         map_value->key_data(), seed)
+                 + _value_type_info->hash_code(
+                           map_value->value_data(), seed);
+        return result;
+    }
+
+     // todo . is here only to need return 16 for two ptr?
+    const size_t size() const override { return 16; }

Review Comment:
   sizeof(MapValue)



##########
be/src/olap/rowset/segment_v2/column_writer.cpp:
##########
@@ -173,6 +173,119 @@ Status ColumnWriter::create(const ColumnWriterOptions& opts, const TabletColumn*
             *writer = std::move(writer_local);
             return Status::OK();
         }
+	case FieldType::OLAP_FIELD_TYPE_MAP: {
+            DCHECK(column->get_subtype_count() == 2);
+            // todo . here key and value is array only?
+            const TabletColumn& key_column = column->get_sub_column(0); // field_type is true key and value
+            const TabletColumn& value_column = column->get_sub_column(1);
+
+            // create null writer
+            ScalarColumnWriter* null_writer = nullptr;
+            if (opts.meta->is_nullable()) {
+                FieldType null_type = FieldType::OLAP_FIELD_TYPE_TINYINT;
+                ColumnWriterOptions null_options;
+                null_options.meta = opts.meta->add_children_columns();
+                null_options.meta->set_column_id(3);
+                null_options.meta->set_unique_id(3);
+                null_options.meta->set_type(null_type);
+                null_options.meta->set_is_nullable(false);
+                null_options.meta->set_length(
+                        get_scalar_type_info<OLAP_FIELD_TYPE_TINYINT>()->size());
+                null_options.meta->set_encoding(DEFAULT_ENCODING);
+                null_options.meta->set_compression(opts.meta->compression());
+
+                null_options.need_zone_map = false;
+                null_options.need_bloom_filter = false;
+                null_options.need_bitmap_index = false;
+
+                TabletColumn null_column = TabletColumn(
+                        OLAP_FIELD_AGGREGATION_NONE, null_type, false,
+                        null_options.meta->unique_id(), null_options.meta->length());
+                null_column.set_name("nullable");
+                null_column.set_index_length(-1); // no short key index
+                std::unique_ptr<Field> null_field(FieldFactory::create(null_column));
+                null_writer =
+                        new ScalarColumnWriter(null_options, std::move(null_field), file_writer);
+            }
+
+            // create key writer
+            std::unique_ptr<ColumnWriter> key_writer;
+	    ColumnWriterOptions key_opts;
+            TabletColumn key_list_column(OLAP_FIELD_AGGREGATION_NONE, OLAP_FIELD_TYPE_ARRAY);
+            {
+		key_list_column.add_sub_column(const_cast<TabletColumn&>(key_column));
+//                key_list_column.add_sub_column(key_column);
+                key_list_column.set_name("map.key");
+                key_list_column.set_index_length(-1);
+
+
+                key_opts.meta = opts.meta->mutable_children_columns(0);
+                key_opts.meta->set_column_id(4);
+                key_opts.meta->set_unique_id(4);
+                key_opts.meta->set_type(OLAP_FIELD_TYPE_ARRAY);
+                key_opts.meta->set_length(0);
+		key_opts.meta->set_encoding(BIT_SHUFFLE);
+                key_opts.meta->set_compression(segment_v2::CompressionTypePB::LZ4F);
+                key_opts.need_zone_map = false;
+		// no need key array's null map
+                key_opts.meta->set_is_nullable(false);
+
+                ColumnMetaPB* child_meta = key_opts.meta->add_children_columns();

Review Comment:
   Better set child_meta's values before set key_opts.meta's values,
   because we can use the values in key_opts.meta to set child_meta's values.



##########
be/src/olap/rowset/segment_v2/column_writer.cpp:
##########
@@ -173,6 +173,119 @@ Status ColumnWriter::create(const ColumnWriterOptions& opts, const TabletColumn*
             *writer = std::move(writer_local);
             return Status::OK();
         }
+	case FieldType::OLAP_FIELD_TYPE_MAP: {
+            DCHECK(column->get_subtype_count() == 2);
+            // todo . here key and value is array only?
+            const TabletColumn& key_column = column->get_sub_column(0); // field_type is true key and value
+            const TabletColumn& value_column = column->get_sub_column(1);
+
+            // create null writer
+            ScalarColumnWriter* null_writer = nullptr;
+            if (opts.meta->is_nullable()) {
+                FieldType null_type = FieldType::OLAP_FIELD_TYPE_TINYINT;
+                ColumnWriterOptions null_options;
+                null_options.meta = opts.meta->add_children_columns();
+                null_options.meta->set_column_id(3);
+                null_options.meta->set_unique_id(3);
+                null_options.meta->set_type(null_type);
+                null_options.meta->set_is_nullable(false);
+                null_options.meta->set_length(
+                        get_scalar_type_info<OLAP_FIELD_TYPE_TINYINT>()->size());
+                null_options.meta->set_encoding(DEFAULT_ENCODING);
+                null_options.meta->set_compression(opts.meta->compression());
+
+                null_options.need_zone_map = false;
+                null_options.need_bloom_filter = false;
+                null_options.need_bitmap_index = false;
+
+                TabletColumn null_column = TabletColumn(
+                        OLAP_FIELD_AGGREGATION_NONE, null_type, false,
+                        null_options.meta->unique_id(), null_options.meta->length());
+                null_column.set_name("nullable");
+                null_column.set_index_length(-1); // no short key index
+                std::unique_ptr<Field> null_field(FieldFactory::create(null_column));
+                null_writer =
+                        new ScalarColumnWriter(null_options, std::move(null_field), file_writer);
+            }
+
+            // create key writer
+            std::unique_ptr<ColumnWriter> key_writer;
+	    ColumnWriterOptions key_opts;
+            TabletColumn key_list_column(OLAP_FIELD_AGGREGATION_NONE, OLAP_FIELD_TYPE_ARRAY);
+            {
+		key_list_column.add_sub_column(const_cast<TabletColumn&>(key_column));
+//                key_list_column.add_sub_column(key_column);
+                key_list_column.set_name("map.key");
+                key_list_column.set_index_length(-1);
+
+
+                key_opts.meta = opts.meta->mutable_children_columns(0);
+                key_opts.meta->set_column_id(4);
+                key_opts.meta->set_unique_id(4);
+                key_opts.meta->set_type(OLAP_FIELD_TYPE_ARRAY);
+                key_opts.meta->set_length(0);
+		key_opts.meta->set_encoding(BIT_SHUFFLE);
+                key_opts.meta->set_compression(segment_v2::CompressionTypePB::LZ4F);
+                key_opts.need_zone_map = false;
+		// no need key array's null map
+                key_opts.meta->set_is_nullable(false);
+
+                ColumnMetaPB* child_meta = key_opts.meta->add_children_columns();
+                child_meta->set_column_id(5);
+                child_meta->set_unique_id(5);
+                child_meta->set_type(key_column.type());
+                child_meta->set_length(key_column.length());
+		child_meta->set_compression(segment_v2::CompressionTypePB::LZ4F);

Review Comment:
   the compression type should be same in one tablet_schema.
   use key_opts.meta->compression().



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org