You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2023/04/06 07:32:28 UTC
[doris] 03/09: [Fix](orc-reader) Fix the scale of decimal column is incorrect when query orc tables. (#18324)
This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git
commit 3fb44bee921234d11b1562284a3c23347fc27988
Author: Qi Chen <ka...@gmail.com>
AuthorDate: Tue Apr 4 08:50:47 2023 +0800
[Fix](orc-reader) Fix the scale of decimal column is incorrect when query orc tables. (#18324)
The scale of decimal column is incorrect when query orc tables.
---
be/src/vec/exec/format/orc/vorc_reader.cpp | 14 ++--
be/src/vec/exec/format/orc/vorc_reader.h | 21 ++++--
.../external_table_emr_p2/hive/test_wide_table.out | 23 +++++++
.../hive/test_wide_table.groovy | 74 ++++++++++++++++++++++
4 files changed, 117 insertions(+), 15 deletions(-)
diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp b/be/src/vec/exec/format/orc/vorc_reader.cpp
index 0840155203..a6b2ef9f7b 100644
--- a/be/src/vec/exec/format/orc/vorc_reader.cpp
+++ b/be/src/vec/exec/format/orc/vorc_reader.cpp
@@ -709,17 +709,13 @@ Status OrcReader::_orc_column_to_doris_column(const std::string& col_name,
FOR_FLAT_ORC_COLUMNS(DISPATCH)
#undef DISPATCH
case TypeIndex::Decimal32:
- return _decode_decimal_column<Int32>(col_name, data_column, data_type,
- _decimal_scale_params, cvb, num_values);
+ return _decode_decimal_column<Int32>(col_name, data_column, data_type, cvb, num_values);
case TypeIndex::Decimal64:
- return _decode_decimal_column<Int64>(col_name, data_column, data_type,
- _decimal_scale_params, cvb, num_values);
+ return _decode_decimal_column<Int64>(col_name, data_column, data_type, cvb, num_values);
case TypeIndex::Decimal128:
- return _decode_decimal_column<Int128>(col_name, data_column, data_type,
- _decimal_scale_params, cvb, num_values);
+ return _decode_decimal_column<Int128>(col_name, data_column, data_type, cvb, num_values);
case TypeIndex::Decimal128I:
- return _decode_decimal_column<Int128>(col_name, data_column, data_type,
- _decimal_scale_params, cvb, num_values);
+ return _decode_decimal_column<Int128>(col_name, data_column, data_type, cvb, num_values);
case TypeIndex::Date:
return _decode_time_column<VecDateTimeValue, Int64, orc::LongVectorBatch>(
col_name, data_column, cvb, num_values);
@@ -769,6 +765,8 @@ Status OrcReader::get_next_block(Block* block, size_t* read_rows, bool* eof) {
SCOPED_RAW_TIMER(&_statistics.column_read_time);
{
SCOPED_RAW_TIMER(&_statistics.get_batch_time);
+ // reset decimal_scale_params_index
+ _decimal_scale_params_index = 0;
if (!_row_reader->next(*_batch)) {
*eof = true;
*read_rows = 0;
diff --git a/be/src/vec/exec/format/orc/vorc_reader.h b/be/src/vec/exec/format/orc/vorc_reader.h
index 53f4a91880..d4a65925b9 100644
--- a/be/src/vec/exec/format/orc/vorc_reader.h
+++ b/be/src/vec/exec/format/orc/vorc_reader.h
@@ -170,13 +170,19 @@ private:
Status _decode_explicit_decimal_column(const std::string& col_name,
const MutableColumnPtr& data_column,
const DataTypePtr& data_type,
- DecimalScaleParams& scale_params,
orc::ColumnVectorBatch* cvb, size_t num_values) {
OrcColumnType* data = dynamic_cast<OrcColumnType*>(cvb);
if (data == nullptr) {
return Status::InternalError("Wrong data type for colum '{}'", col_name);
}
- _init_decimal_converter<DecimalPrimitiveType>(data_type, scale_params, data->scale);
+ if (_decimal_scale_params_index >= _decimal_scale_params.size()) {
+ DecimalScaleParams temp_scale_params;
+ _init_decimal_converter<DecimalPrimitiveType>(data_type, temp_scale_params,
+ data->scale);
+ _decimal_scale_params.emplace_back(std::move(temp_scale_params));
+ }
+ DecimalScaleParams& scale_params = _decimal_scale_params[_decimal_scale_params_index];
+ ++_decimal_scale_params_index;
auto* cvb_data = data->values.data();
auto& column_data =
@@ -206,16 +212,16 @@ private:
template <typename DecimalPrimitiveType>
Status _decode_decimal_column(const std::string& col_name, const MutableColumnPtr& data_column,
- const DataTypePtr& data_type, DecimalScaleParams& scale_params,
- orc::ColumnVectorBatch* cvb, size_t num_values) {
+ const DataTypePtr& data_type, orc::ColumnVectorBatch* cvb,
+ size_t num_values) {
SCOPED_RAW_TIMER(&_statistics.decode_value_time);
if (dynamic_cast<orc::Decimal64VectorBatch*>(cvb) != nullptr) {
return _decode_explicit_decimal_column<DecimalPrimitiveType, orc::Decimal64VectorBatch>(
- col_name, data_column, data_type, scale_params, cvb, num_values);
+ col_name, data_column, data_type, cvb, num_values);
} else {
return _decode_explicit_decimal_column<DecimalPrimitiveType,
orc::Decimal128VectorBatch>(
- col_name, data_column, data_type, scale_params, cvb, num_values);
+ col_name, data_column, data_type, cvb, num_values);
}
}
@@ -293,7 +299,8 @@ private:
orc::RowReaderOptions _row_reader_options;
// only for decimal
- DecimalScaleParams _decimal_scale_params;
+ std::vector<DecimalScaleParams> _decimal_scale_params;
+ size_t _decimal_scale_params_index;
};
} // namespace doris::vectorized
diff --git a/regression-test/data/external_table_emr_p2/hive/test_wide_table.out b/regression-test/data/external_table_emr_p2/hive/test_wide_table.out
new file mode 100644
index 0000000000..143aeb9bf5
--- /dev/null
+++ b/regression-test/data/external_table_emr_p2/hive/test_wide_table.out
@@ -0,0 +1,23 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !01 --
+6117920261 28156890937818.64 11058113 84788841307158.93 9988065.83660 8116313253956313.527443
+
+-- !02 --
+6117920261 28156890937818.64 11058113 84788841307158.93 9988065.83660 8116313253956313.527443
+
+-- !03 --
+
+-- !04 --
+
+-- !05 --
+6117920261 28156890937818.64 11058113 84788841307158.93 9988065.83660 8116313253956313.527443
+
+-- !06 --
+6117920261 28156890937818.64 11058113 84788841307158.93 9988065.83660 8116313253956313.527443
+
+-- !07 --
+6117920261 28156890937818.64 11058113 84788841307158.93 9988065.83660 8116313253956313.527443
+
+-- !08 --
+9999999541515682.000000000 99999218685068.860000000 99999869.000000000 221095586.110000000 27.542540000 61077635638.763621000
+
diff --git a/regression-test/suites/external_table_emr_p2/hive/test_wide_table.groovy b/regression-test/suites/external_table_emr_p2/hive/test_wide_table.groovy
new file mode 100644
index 0000000000..f7ba390c1f
--- /dev/null
+++ b/regression-test/suites/external_table_emr_p2/hive/test_wide_table.groovy
@@ -0,0 +1,74 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_wide_table", "p2") {
+
+ def formats = ["_orc"]
+ def decimal_test1 = """select col1, col70, col71, col81, col100, col534 from wide_table1SUFFIX where col1 is not null order by col1 limit 1;"""
+ def decimal_test2 = """select * from
+ (select col1, col70, col71, col81, col100, col534 from wide_table1SUFFIX where col1 is not null order by col1 limit 1) as T where col100 = 9988065.8366;
+ """
+ def decimal_test3 = """select * from
+ (select col1, col70, col71, col81, col100, col534 from wide_table1SUFFIX where col1 is not null order by col1 limit 1) as T where col100 = 9988065.8367;
+ """
+ def decimal_test4 = """select * from
+ (select col1, col70, col71, col81, col100, col534 from wide_table1SUFFIX where col1 is not null order by col1 limit 1) as T where col100 = 9988065.836;
+ """
+ def decimal_test5 = """select * from
+ (select col1, col70, col71, col81, col100, col534 from wide_table1SUFFIX where col1 is not null order by col1 limit 1) as T where col100 = 9988065.836600;
+ """
+ def decimal_test6 = """select * from
+ (select col1, col70, col71, col81, col100, col534 from wide_table1SUFFIX where col1 is not null order by col1 limit 1) as T where col100 > 9988065.83653;
+ """
+ def decimal_test7 = """select * from
+ (select col1, col70, col71, col81, col100, col534 from wide_table1SUFFIX where col1 is not null order by col1 limit 1) as T where col100 < 9988065.83673;
+ """
+ def decimal_test8 = """select max(col1), max(col70), max(col71), min(col81), min(col100), min(col534) from wide_table1SUFFIX;"""
+
+ String enabled = context.config.otherConfigs.get("enableExternalHiveTest")
+ if (enabled != null && enabled.equalsIgnoreCase("true")) {
+ String extHiveHmsHost = context.config.otherConfigs.get("extHiveHmsHost")
+ String extHiveHmsPort = context.config.otherConfigs.get("extHiveHmsPort")
+ String catalog_name = "external_wide_table"
+
+ sql """drop catalog if exists ${catalog_name};"""
+ sql """
+ create catalog if not exists ${catalog_name} properties (
+ 'type'='hms',
+ 'hive.metastore.uris' = 'thrift://${extHiveHmsHost}:${extHiveHmsPort}'
+ );
+ """
+ logger.info("catalog " + catalog_name + " created")
+ sql """switch ${catalog_name};"""
+ logger.info("switched to catalog " + catalog_name)
+ sql """use wide_tables;"""
+ logger.info("use wide_tables")
+
+ for (String format in formats) {
+ logger.info("Process format " + format)
+ qt_01 decimal_test1.replace("SUFFIX", format)
+ qt_02 decimal_test2.replace("SUFFIX", format)
+ qt_03 decimal_test3.replace("SUFFIX", format)
+ qt_04 decimal_test4.replace("SUFFIX", format)
+ qt_05 decimal_test5.replace("SUFFIX", format)
+ qt_06 decimal_test6.replace("SUFFIX", format)
+ qt_07 decimal_test7.replace("SUFFIX", format)
+ qt_08 decimal_test8.replace("SUFFIX", format)
+ }
+ }
+}
+
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org