You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by da...@apache.org on 2022/10/24 01:19:53 UTC

[doris] branch branch-1.2-unstable updated: [Fix](array-type) bugfix for array column with delete condition (#13361)

This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-1.2-unstable
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-1.2-unstable by this push:
     new 58c7edf8e7 [Fix](array-type) bugfix for array column with delete condition (#13361)
58c7edf8e7 is described below

commit 58c7edf8e7b4c94f2c29091621e24652ef2792cb
Author: camby <10...@qq.com>
AuthorDate: Fri Oct 21 09:29:02 2022 +0800

    [Fix](array-type) bugfix for array column with delete condition (#13361)
    
    Fix for SQL with array column:
    delete from tbl where c_array is null;
    
    more info please refer to #13360
    
    Co-authored-by: cambyzju <zh...@baidu.com>
---
 be/src/olap/rowset/segment_v2/segment_iterator.cpp |  4 +--
 be/src/olap/schema.cpp                             | 11 +++++---
 be/src/olap/schema.h                               |  3 +-
 be/src/vec/columns/column_array.cpp                | 32 ++++++++++++++++++++++
 be/src/vec/columns/column_array.h                  |  2 ++
 .../data/delete_p0/test_array_column_delete.out    |  5 ++++
 .../delete_p0/test_array_column_delete.groovy      | 29 ++++++++++++++++++++
 7 files changed, 78 insertions(+), 8 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index e6435e8be1..0d6d3b10bd 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -1081,8 +1081,8 @@ Status SegmentIterator::next_batch(vectorized::Block* block) {
             auto cid = _schema.column_id(i);
             auto column_desc = _schema.column(cid);
             if (_is_pred_column[cid]) {
-                _current_return_columns[cid] = Schema::get_predicate_column_nullable_ptr(
-                        column_desc->type(), column_desc->is_nullable());
+                _current_return_columns[cid] =
+                        Schema::get_predicate_column_nullable_ptr(*column_desc);
                 _current_return_columns[cid]->reserve(_opts.block_row_max);
             } else if (i >= block->columns()) {
                 // if i >= block->columns means the column and not the pred_column means `column i` is
diff --git a/be/src/olap/schema.cpp b/be/src/olap/schema.cpp
index d6352b0cd1..01f0343317 100644
--- a/be/src/olap/schema.cpp
+++ b/be/src/olap/schema.cpp
@@ -114,10 +114,13 @@ vectorized::DataTypePtr Schema::get_data_type_ptr(const Field& field) {
     return vectorized::DataTypeFactory::instance().create_data_type(field);
 }
 
-vectorized::IColumn::MutablePtr Schema::get_predicate_column_nullable_ptr(FieldType type,
-                                                                          bool is_null) {
-    vectorized::IColumn::MutablePtr ptr = Schema::get_predicate_column_ptr(type);
-    if (is_null) {
+vectorized::IColumn::MutablePtr Schema::get_predicate_column_nullable_ptr(const Field& field) {
+    if (UNLIKELY(field.type() == OLAP_FIELD_TYPE_ARRAY)) {
+        return get_data_type_ptr(field)->create_column();
+    }
+
+    vectorized::IColumn::MutablePtr ptr = Schema::get_predicate_column_ptr(field.type());
+    if (field.is_nullable()) {
         return doris::vectorized::ColumnNullable::create(std::move(ptr),
                                                          doris::vectorized::ColumnUInt8::create());
     }
diff --git a/be/src/olap/schema.h b/be/src/olap/schema.h
index f3f09ffe3c..7c578f4f27 100644
--- a/be/src/olap/schema.h
+++ b/be/src/olap/schema.h
@@ -112,8 +112,7 @@ public:
 
     static vectorized::IColumn::MutablePtr get_predicate_column_ptr(FieldType type);
 
-    static vectorized::IColumn::MutablePtr get_predicate_column_nullable_ptr(FieldType type,
-                                                                             bool is_null = false);
+    static vectorized::IColumn::MutablePtr get_predicate_column_nullable_ptr(const Field& field);
 
     const std::vector<Field*>& columns() const { return _cols; }
 
diff --git a/be/src/vec/columns/column_array.cpp b/be/src/vec/columns/column_array.cpp
index 9d57a8f003..f87f58ed40 100644
--- a/be/src/vec/columns/column_array.cpp
+++ b/be/src/vec/columns/column_array.cpp
@@ -503,6 +503,38 @@ void ColumnArray::insert_indices_from(const IColumn& src, const int* indices_beg
     }
 }
 
+Status ColumnArray::filter_by_selector(const uint16_t* sel, size_t sel_size, IColumn* col_ptr) {
+    auto to = reinterpret_cast<vectorized::ColumnArray*>(col_ptr);
+    auto& to_offsets = to->get_offsets();
+
+    size_t element_size = 0;
+    size_t max_offset = 0;
+    for (size_t i = 0; i < sel_size; ++i) {
+        element_size += size_at(sel[i]);
+        max_offset = std::max(max_offset, offset_at(sel[i]));
+    }
+    if (max_offset > std::numeric_limits<uint16_t>::max()) {
+        return Status::IOError("array elements too large than uint16_t::max");
+    }
+
+    to_offsets.reserve(to_offsets.size() + sel_size);
+    auto nested_sel = std::make_unique<uint16_t[]>(element_size);
+    size_t nested_sel_size = 0;
+    for (size_t i = 0; i < sel_size; ++i) {
+        auto row_off = offset_at(sel[i]);
+        auto row_size = size_at(sel[i]);
+        to_offsets.push_back(to_offsets.back() + row_size);
+        for (auto j = 0; j < row_size; ++j) {
+            nested_sel[nested_sel_size++] = row_off + j;
+        }
+    }
+
+    if (nested_sel_size > 0) {
+        return data->filter_by_selector(nested_sel.get(), nested_sel_size, &to->get_data());
+    }
+    return Status::OK();
+}
+
 ColumnPtr ColumnArray::replicate(const IColumn::Offsets& replicate_offsets) const {
     if (replicate_offsets.empty()) return clone_empty();
 
diff --git a/be/src/vec/columns/column_array.h b/be/src/vec/columns/column_array.h
index 2fc0194e69..9028e4e38c 100644
--- a/be/src/vec/columns/column_array.h
+++ b/be/src/vec/columns/column_array.h
@@ -175,6 +175,8 @@ public:
         offsets->clear();
     }
 
+    Status filter_by_selector(const uint16_t* sel, size_t sel_size, IColumn* col_ptr) override;
+
 private:
     WrappedPtr data;
     WrappedPtr offsets;
diff --git a/regression-test/data/delete_p0/test_array_column_delete.out b/regression-test/data/delete_p0/test_array_column_delete.out
new file mode 100644
index 0000000000..8324b608c9
--- /dev/null
+++ b/regression-test/data/delete_p0/test_array_column_delete.out
@@ -0,0 +1,5 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !sql --
+2	[12, 3]
+3	[]
+
diff --git a/regression-test/suites/delete_p0/test_array_column_delete.groovy b/regression-test/suites/delete_p0/test_array_column_delete.groovy
new file mode 100644
index 0000000000..21455285a4
--- /dev/null
+++ b/regression-test/suites/delete_p0/test_array_column_delete.groovy
@@ -0,0 +1,29 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_array_column_delete") {
+    def tableName = "test_array_column_delete"
+
+    sql """ SET enable_vectorized_engine = TRUE; """
+    sql "ADMIN SET FRONTEND CONFIG ('enable_array_type' = 'true')"
+
+    sql """ DROP TABLE IF EXISTS ${tableName}; """
+    sql """ CREATE TABLE ${tableName} (id INT NULL, c_array ARRAY<INT> NULL) ENGINE=OLAP DUPLICATE KEY(id) DISTRIBUTED BY HASH(id) BUCKETS 4 PROPERTIES ( "replication_allocation" = "tag.location.default: 1","in_memory" = "false","storage_format" = "V2") """
+    sql """ insert into ${tableName} values(1, NULL),(2,[12,3]),(3,[]),(4,NULL),(5,NULL) """
+    sql """ DELETE FROM ${tableName} WHERE c_array is NULL """
+    qt_sql """ SELECT * FROM ${tableName} order by id """
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org