You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2022/12/28 01:41:23 UTC

[doris] branch branch-1.2-lts updated: [cherry-pick](bitmap)Support return bitmap data in select statement in vectorization (#15388)

This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-1.2-lts by this push:
     new 14553d0068  [cherry-pick](bitmap)Support return bitmap data in select statement in vectorization (#15388)
14553d0068 is described below

commit 14553d006881a99d910cfe772d2227bcba7bf3b8
Author: chenlinzhong <49...@qq.com>
AuthorDate: Wed Dec 28 09:41:17 2022 +0800

     [cherry-pick](bitmap)Support return bitmap data in select statement in vectorization (#15388)
    
    cherry-pick #15224
---
 be/src/runtime/result_writer.h                     |  4 ++
 be/src/vec/columns/column.h                        |  2 +
 be/src/vec/columns/column_complex.h                |  1 +
 be/src/vec/columns/column_nullable.h               |  1 +
 .../exec/format/parquet/vparquet_column_reader.h   |  3 +-
 be/src/vec/sink/vmysql_result_writer.cpp           | 26 ++++++++++-
 .../return_binaray/test_return_binaray_hll.groovy  | 50 ++++++++++++++++++++++
 .../test_return_binary_bitmap.groovy               | 50 ++++++++++++++++++++++
 8 files changed, 133 insertions(+), 4 deletions(-)

diff --git a/be/src/runtime/result_writer.h b/be/src/runtime/result_writer.h
index 7d669e1b4f..ced54960d0 100644
--- a/be/src/runtime/result_writer.h
+++ b/be/src/runtime/result_writer.h
@@ -52,6 +52,10 @@ public:
 
     virtual bool output_object_data() const { return _output_object_data; }
 
+    void set_output_object_data(bool output_object_data) {
+        _output_object_data = output_object_data;
+    }
+
     static const std::string NULL_IN_CSV;
     virtual void set_header_info(const std::string& header_type, const std::string& header) {
         _header_type = header_type;
diff --git a/be/src/vec/columns/column.h b/be/src/vec/columns/column.h
index c5eeb18d8f..6bd5ac7855 100644
--- a/be/src/vec/columns/column.h
+++ b/be/src/vec/columns/column.h
@@ -527,6 +527,8 @@ public:
 
     virtual bool is_bitmap() const { return false; }
 
+    virtual bool is_hll() const { return false; }
+
     // true if column has null element
     virtual bool has_null() const { return false; }
 
diff --git a/be/src/vec/columns/column_complex.h b/be/src/vec/columns/column_complex.h
index ec15c65df7..4b6562d534 100644
--- a/be/src/vec/columns/column_complex.h
+++ b/be/src/vec/columns/column_complex.h
@@ -47,6 +47,7 @@ public:
     bool is_numeric() const override { return false; }
 
     bool is_bitmap() const override { return std::is_same_v<T, BitmapValue>; }
+    bool is_hll() const override { return std::is_same_v<T, HyperLogLog>; }
 
     size_t size() const override { return data.size(); }
 
diff --git a/be/src/vec/columns/column_nullable.h b/be/src/vec/columns/column_nullable.h
index 9e3b480c15..91c0ac0cf4 100644
--- a/be/src/vec/columns/column_nullable.h
+++ b/be/src/vec/columns/column_nullable.h
@@ -214,6 +214,7 @@ public:
 
     bool is_nullable() const override { return true; }
     bool is_bitmap() const override { return get_nested_column().is_bitmap(); }
+    bool is_hll() const override { return get_nested_column().is_hll(); }
     bool is_column_decimal() const override { return get_nested_column().is_column_decimal(); }
     bool is_column_string() const override { return get_nested_column().is_column_string(); }
     bool is_column_array() const override { return get_nested_column().is_column_array(); }
diff --git a/be/src/vec/exec/format/parquet/vparquet_column_reader.h b/be/src/vec/exec/format/parquet/vparquet_column_reader.h
index f8d8085df8..949488a395 100644
--- a/be/src/vec/exec/format/parquet/vparquet_column_reader.h
+++ b/be/src/vec/exec/format/parquet/vparquet_column_reader.h
@@ -107,8 +107,7 @@ public:
     virtual Status read_column_data(ColumnPtr& doris_column, DataTypePtr& type,
                                     ColumnSelectVector& select_vector, size_t batch_size,
                                     size_t* read_rows, bool* eof) = 0;
-    static Status create(FileReader* file, FieldSchema* field,
-                         const tparquet::RowGroup& row_group,
+    static Status create(FileReader* file, FieldSchema* field, const tparquet::RowGroup& row_group,
                          const std::vector<RowRange>& row_ranges, cctz::time_zone* ctz,
                          std::unique_ptr<ParquetColumnReader>& reader, size_t max_buf_size);
     void init_column_metadata(const tparquet::ColumnChunk& chunk);
diff --git a/be/src/vec/sink/vmysql_result_writer.cpp b/be/src/vec/sink/vmysql_result_writer.cpp
index 155e4c3f2e..2900d2d486 100644
--- a/be/src/vec/sink/vmysql_result_writer.cpp
+++ b/be/src/vec/sink/vmysql_result_writer.cpp
@@ -17,11 +17,13 @@
 
 #include "vec/sink/vmysql_result_writer.h"
 
+#include "olap/hll.h"
 #include "runtime/buffer_control_block.h"
 #include "runtime/jsonb_value.h"
 #include "runtime/large_int_value.h"
 #include "runtime/runtime_state.h"
 #include "vec/columns/column_array.h"
+#include "vec/columns/column_complex.h"
 #include "vec/columns/column_nullable.h"
 #include "vec/columns/column_vector.h"
 #include "vec/common/assert_cast.h"
@@ -46,7 +48,7 @@ Status VMysqlResultWriter::init(RuntimeState* state) {
     if (nullptr == _sinker) {
         return Status::InternalError("sinker is NULL pointer.");
     }
-
+    set_output_object_data(state->return_object_data_as_binary());
     return Status::OK();
 }
 
@@ -91,7 +93,27 @@ Status VMysqlResultWriter::_add_one_column(const ColumnPtr& column_ptr,
             }
 
             if constexpr (type == TYPE_OBJECT) {
-                buf_ret = _buffer.push_null();
+                if (column->is_bitmap() && output_object_data()) {
+                    const vectorized::ColumnComplexType<BitmapValue>* pColumnComplexType =
+                            assert_cast<const vectorized::ColumnComplexType<BitmapValue>*>(
+                                    column.get());
+                    BitmapValue bitmapValue = pColumnComplexType->get_element(i);
+                    size_t size = bitmapValue.getSizeInBytes();
+                    std::unique_ptr<char[]> buf = std::make_unique<char[]>(size);
+                    bitmapValue.write(buf.get());
+                    buf_ret = _buffer.push_string(buf.get(), size);
+                } else if (column->is_hll() && output_object_data()) {
+                    const vectorized::ColumnComplexType<HyperLogLog>* pColumnComplexType =
+                            assert_cast<const vectorized::ColumnComplexType<HyperLogLog>*>(
+                                    column.get());
+                    HyperLogLog hyperLogLog = pColumnComplexType->get_element(i);
+                    size_t size = hyperLogLog.max_serialized_size();
+                    std::unique_ptr<char[]> buf = std::make_unique<char[]>(size);
+                    hyperLogLog.serialize((uint8*)buf.get());
+                    buf_ret = _buffer.push_string(buf.get(), size);
+                } else {
+                    buf_ret = _buffer.push_null();
+                }
             }
             if constexpr (type == TYPE_VARCHAR) {
                 const auto string_val = column->get_data_at(i);
diff --git a/regression-test/suites/query_p1/return_binaray/test_return_binaray_hll.groovy b/regression-test/suites/query_p1/return_binaray/test_return_binaray_hll.groovy
new file mode 100644
index 0000000000..d581b6a04e
--- /dev/null
+++ b/regression-test/suites/query_p1/return_binaray/test_return_binaray_hll.groovy
@@ -0,0 +1,50 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_return_binary_hll") {
+    def tableName="test_return_binary_hll"
+    sql "drop table if exists ${tableName};"
+
+    sql """
+    CREATE TABLE `${tableName}` (
+        `dt` int(11) NULL,
+        `page` varchar(10) NULL,
+        `user_id` hll HLL_UNION NULL
+        ) ENGINE=OLAP
+        AGGREGATE KEY(`dt`, `page`)
+        COMMENT 'OLAP'
+        DISTRIBUTED BY HASH(`dt`) BUCKETS 1
+        PROPERTIES (
+        "replication_allocation" = "tag.location.default: 1",
+        "in_memory" = "false",
+        "storage_format" = "V2",
+        "disable_auto_compaction" = "false"
+        );
+    """
+    sql """
+        insert into ${tableName} values(1,1,hll_hash(1)),(1,1,hll_hash(2)),(1,1,hll_hash(3)),(1,1,hll_hash(23332));
+    """
+    sql "set enable_vectorized_engine=true;"
+    sql "set return_object_data_as_binary=false;"
+    def result1 = sql "select * from ${tableName}"
+    assertTrue(result1[0][2]==null);
+
+    sql "set enable_vectorized_engine=true;"
+    sql "set return_object_data_as_binary=true;"
+    def result2 = sql "select * from ${tableName}"
+    assertTrue(result2[0][2]!=null);
+}
\ No newline at end of file
diff --git a/regression-test/suites/query_p1/return_binaray/test_return_binary_bitmap.groovy b/regression-test/suites/query_p1/return_binaray/test_return_binary_bitmap.groovy
new file mode 100644
index 0000000000..c09d71666d
--- /dev/null
+++ b/regression-test/suites/query_p1/return_binaray/test_return_binary_bitmap.groovy
@@ -0,0 +1,50 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_return_binary_bitmap") {
+    def tableName="test_return_binary_bitmap"
+    sql "drop table if exists ${tableName};"
+
+    sql """
+    CREATE TABLE `${tableName}` (
+        `dt` int(11) NULL,
+        `page` varchar(10) NULL,
+        `user_id` bitmap BITMAP_UNION NULL
+        ) ENGINE=OLAP
+        AGGREGATE KEY(`dt`, `page`)
+        COMMENT 'OLAP'
+        DISTRIBUTED BY HASH(`dt`) BUCKETS 1
+        PROPERTIES (
+        "replication_allocation" = "tag.location.default: 1",
+        "in_memory" = "false",
+        "storage_format" = "V2",
+        "disable_auto_compaction" = "false"
+        );
+    """
+    sql """
+        insert into ${tableName} values(1,1,to_bitmap(1)),(1,1,to_bitmap(2)),(1,1,to_bitmap(3)),(1,1,to_bitmap(23332));
+    """
+    sql "set enable_vectorized_engine=true;"
+    sql "set return_object_data_as_binary=false;"
+    def result1 = sql "select * from ${tableName}"
+    assertTrue(result1[0][2]==null);
+
+    sql "set enable_vectorized_engine=true;"
+    sql "set return_object_data_as_binary=true;"
+    def result2 = sql "select * from ${tableName}"
+    assertTrue(result2[0][2]!=null);
+}
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org