You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2022/12/28 01:41:23 UTC
[doris] branch branch-1.2-lts updated: [cherry-pick](bitmap)Support return bitmap data in select statement in vectorization (#15388)
This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-1.2-lts by this push:
new 14553d0068 [cherry-pick](bitmap)Support return bitmap data in select statement in vectorization (#15388)
14553d0068 is described below
commit 14553d006881a99d910cfe772d2227bcba7bf3b8
Author: chenlinzhong <49...@qq.com>
AuthorDate: Wed Dec 28 09:41:17 2022 +0800
[cherry-pick](bitmap)Support return bitmap data in select statement in vectorization (#15388)
cherry-pick #15224
---
be/src/runtime/result_writer.h | 4 ++
be/src/vec/columns/column.h | 2 +
be/src/vec/columns/column_complex.h | 1 +
be/src/vec/columns/column_nullable.h | 1 +
.../exec/format/parquet/vparquet_column_reader.h | 3 +-
be/src/vec/sink/vmysql_result_writer.cpp | 26 ++++++++++-
.../return_binaray/test_return_binaray_hll.groovy | 50 ++++++++++++++++++++++
.../test_return_binary_bitmap.groovy | 50 ++++++++++++++++++++++
8 files changed, 133 insertions(+), 4 deletions(-)
diff --git a/be/src/runtime/result_writer.h b/be/src/runtime/result_writer.h
index 7d669e1b4f..ced54960d0 100644
--- a/be/src/runtime/result_writer.h
+++ b/be/src/runtime/result_writer.h
@@ -52,6 +52,10 @@ public:
virtual bool output_object_data() const { return _output_object_data; }
+ void set_output_object_data(bool output_object_data) {
+ _output_object_data = output_object_data;
+ }
+
static const std::string NULL_IN_CSV;
virtual void set_header_info(const std::string& header_type, const std::string& header) {
_header_type = header_type;
diff --git a/be/src/vec/columns/column.h b/be/src/vec/columns/column.h
index c5eeb18d8f..6bd5ac7855 100644
--- a/be/src/vec/columns/column.h
+++ b/be/src/vec/columns/column.h
@@ -527,6 +527,8 @@ public:
virtual bool is_bitmap() const { return false; }
+ virtual bool is_hll() const { return false; }
+
// true if column has null element
virtual bool has_null() const { return false; }
diff --git a/be/src/vec/columns/column_complex.h b/be/src/vec/columns/column_complex.h
index ec15c65df7..4b6562d534 100644
--- a/be/src/vec/columns/column_complex.h
+++ b/be/src/vec/columns/column_complex.h
@@ -47,6 +47,7 @@ public:
bool is_numeric() const override { return false; }
bool is_bitmap() const override { return std::is_same_v<T, BitmapValue>; }
+ bool is_hll() const override { return std::is_same_v<T, HyperLogLog>; }
size_t size() const override { return data.size(); }
diff --git a/be/src/vec/columns/column_nullable.h b/be/src/vec/columns/column_nullable.h
index 9e3b480c15..91c0ac0cf4 100644
--- a/be/src/vec/columns/column_nullable.h
+++ b/be/src/vec/columns/column_nullable.h
@@ -214,6 +214,7 @@ public:
bool is_nullable() const override { return true; }
bool is_bitmap() const override { return get_nested_column().is_bitmap(); }
+ bool is_hll() const override { return get_nested_column().is_hll(); }
bool is_column_decimal() const override { return get_nested_column().is_column_decimal(); }
bool is_column_string() const override { return get_nested_column().is_column_string(); }
bool is_column_array() const override { return get_nested_column().is_column_array(); }
diff --git a/be/src/vec/exec/format/parquet/vparquet_column_reader.h b/be/src/vec/exec/format/parquet/vparquet_column_reader.h
index f8d8085df8..949488a395 100644
--- a/be/src/vec/exec/format/parquet/vparquet_column_reader.h
+++ b/be/src/vec/exec/format/parquet/vparquet_column_reader.h
@@ -107,8 +107,7 @@ public:
virtual Status read_column_data(ColumnPtr& doris_column, DataTypePtr& type,
ColumnSelectVector& select_vector, size_t batch_size,
size_t* read_rows, bool* eof) = 0;
- static Status create(FileReader* file, FieldSchema* field,
- const tparquet::RowGroup& row_group,
+ static Status create(FileReader* file, FieldSchema* field, const tparquet::RowGroup& row_group,
const std::vector<RowRange>& row_ranges, cctz::time_zone* ctz,
std::unique_ptr<ParquetColumnReader>& reader, size_t max_buf_size);
void init_column_metadata(const tparquet::ColumnChunk& chunk);
diff --git a/be/src/vec/sink/vmysql_result_writer.cpp b/be/src/vec/sink/vmysql_result_writer.cpp
index 155e4c3f2e..2900d2d486 100644
--- a/be/src/vec/sink/vmysql_result_writer.cpp
+++ b/be/src/vec/sink/vmysql_result_writer.cpp
@@ -17,11 +17,13 @@
#include "vec/sink/vmysql_result_writer.h"
+#include "olap/hll.h"
#include "runtime/buffer_control_block.h"
#include "runtime/jsonb_value.h"
#include "runtime/large_int_value.h"
#include "runtime/runtime_state.h"
#include "vec/columns/column_array.h"
+#include "vec/columns/column_complex.h"
#include "vec/columns/column_nullable.h"
#include "vec/columns/column_vector.h"
#include "vec/common/assert_cast.h"
@@ -46,7 +48,7 @@ Status VMysqlResultWriter::init(RuntimeState* state) {
if (nullptr == _sinker) {
return Status::InternalError("sinker is NULL pointer.");
}
-
+ set_output_object_data(state->return_object_data_as_binary());
return Status::OK();
}
@@ -91,7 +93,27 @@ Status VMysqlResultWriter::_add_one_column(const ColumnPtr& column_ptr,
}
if constexpr (type == TYPE_OBJECT) {
- buf_ret = _buffer.push_null();
+ if (column->is_bitmap() && output_object_data()) {
+ const vectorized::ColumnComplexType<BitmapValue>* pColumnComplexType =
+ assert_cast<const vectorized::ColumnComplexType<BitmapValue>*>(
+ column.get());
+ BitmapValue bitmapValue = pColumnComplexType->get_element(i);
+ size_t size = bitmapValue.getSizeInBytes();
+ std::unique_ptr<char[]> buf = std::make_unique<char[]>(size);
+ bitmapValue.write(buf.get());
+ buf_ret = _buffer.push_string(buf.get(), size);
+ } else if (column->is_hll() && output_object_data()) {
+ const vectorized::ColumnComplexType<HyperLogLog>* pColumnComplexType =
+ assert_cast<const vectorized::ColumnComplexType<HyperLogLog>*>(
+ column.get());
+ HyperLogLog hyperLogLog = pColumnComplexType->get_element(i);
+ size_t size = hyperLogLog.max_serialized_size();
+ std::unique_ptr<char[]> buf = std::make_unique<char[]>(size);
+ hyperLogLog.serialize((uint8*)buf.get());
+ buf_ret = _buffer.push_string(buf.get(), size);
+ } else {
+ buf_ret = _buffer.push_null();
+ }
}
if constexpr (type == TYPE_VARCHAR) {
const auto string_val = column->get_data_at(i);
diff --git a/regression-test/suites/query_p1/return_binaray/test_return_binaray_hll.groovy b/regression-test/suites/query_p1/return_binaray/test_return_binaray_hll.groovy
new file mode 100644
index 0000000000..d581b6a04e
--- /dev/null
+++ b/regression-test/suites/query_p1/return_binaray/test_return_binaray_hll.groovy
@@ -0,0 +1,50 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_return_binary_hll") {
+ def tableName="test_return_binary_hll"
+ sql "drop table if exists ${tableName};"
+
+ sql """
+ CREATE TABLE `${tableName}` (
+ `dt` int(11) NULL,
+ `page` varchar(10) NULL,
+ `user_id` hll HLL_UNION NULL
+ ) ENGINE=OLAP
+ AGGREGATE KEY(`dt`, `page`)
+ COMMENT 'OLAP'
+ DISTRIBUTED BY HASH(`dt`) BUCKETS 1
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1",
+ "in_memory" = "false",
+ "storage_format" = "V2",
+ "disable_auto_compaction" = "false"
+ );
+ """
+ sql """
+ insert into ${tableName} values(1,1,hll_hash(1)),(1,1,hll_hash(2)),(1,1,hll_hash(3)),(1,1,hll_hash(23332));
+ """
+ sql "set enable_vectorized_engine=true;"
+ sql "set return_object_data_as_binary=false;"
+ def result1 = sql "select * from ${tableName}"
+ assertTrue(result1[0][2]==null);
+
+ sql "set enable_vectorized_engine=true;"
+ sql "set return_object_data_as_binary=true;"
+ def result2 = sql "select * from ${tableName}"
+ assertTrue(result2[0][2]!=null);
+}
\ No newline at end of file
diff --git a/regression-test/suites/query_p1/return_binaray/test_return_binary_bitmap.groovy b/regression-test/suites/query_p1/return_binaray/test_return_binary_bitmap.groovy
new file mode 100644
index 0000000000..c09d71666d
--- /dev/null
+++ b/regression-test/suites/query_p1/return_binaray/test_return_binary_bitmap.groovy
@@ -0,0 +1,50 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_return_binary_bitmap") {
+ def tableName="test_return_binary_bitmap"
+ sql "drop table if exists ${tableName};"
+
+ sql """
+ CREATE TABLE `${tableName}` (
+ `dt` int(11) NULL,
+ `page` varchar(10) NULL,
+ `user_id` bitmap BITMAP_UNION NULL
+ ) ENGINE=OLAP
+ AGGREGATE KEY(`dt`, `page`)
+ COMMENT 'OLAP'
+ DISTRIBUTED BY HASH(`dt`) BUCKETS 1
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1",
+ "in_memory" = "false",
+ "storage_format" = "V2",
+ "disable_auto_compaction" = "false"
+ );
+ """
+ sql """
+ insert into ${tableName} values(1,1,to_bitmap(1)),(1,1,to_bitmap(2)),(1,1,to_bitmap(3)),(1,1,to_bitmap(23332));
+ """
+ sql "set enable_vectorized_engine=true;"
+ sql "set return_object_data_as_binary=false;"
+ def result1 = sql "select * from ${tableName}"
+ assertTrue(result1[0][2]==null);
+
+ sql "set enable_vectorized_engine=true;"
+ sql "set return_object_data_as_binary=true;"
+ def result2 = sql "select * from ${tableName}"
+ assertTrue(result2[0][2]!=null);
+}
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org