You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by mo...@apache.org on 2023/04/12 17:02:54 UTC

[doris] 10/33: [Fix](orc-reader) Fix the scale of decimal column is incorrect when query orc tables. (#18324)

This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch doris-for-zhongjin
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 15679d8c375b941d47fe13803ac0e64ba4d76a92
Author: Qi Chen <ka...@gmail.com>
AuthorDate: Tue Apr 4 08:50:47 2023 +0800

    [Fix](orc-reader) Fix the scale of decimal column is incorrect when query orc tables. (#18324)
    
    The scale of decimal column is incorrect when query orc tables.
---
 be/src/vec/exec/format/orc/vorc_reader.cpp         | 14 ++--
 be/src/vec/exec/format/orc/vorc_reader.h           | 22 ++++---
 .../external_table_emr_p2/hive/test_wide_table.out | 23 +++++++
 .../hive/test_wide_table.groovy                    | 74 ++++++++++++++++++++++
 4 files changed, 117 insertions(+), 16 deletions(-)

diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp b/be/src/vec/exec/format/orc/vorc_reader.cpp
index bde965f3b1..293e1233d1 100644
--- a/be/src/vec/exec/format/orc/vorc_reader.cpp
+++ b/be/src/vec/exec/format/orc/vorc_reader.cpp
@@ -742,17 +742,13 @@ Status OrcReader::_orc_column_to_doris_column(const std::string& col_name,
         FOR_FLAT_ORC_COLUMNS(DISPATCH)
 #undef DISPATCH
     case TypeIndex::Decimal32:
-        return _decode_decimal_column<Int32>(col_name, data_column, data_type,
-                                             _decimal_scale_params, cvb, num_values);
+        return _decode_decimal_column<Int32>(col_name, data_column, data_type, cvb, num_values);
     case TypeIndex::Decimal64:
-        return _decode_decimal_column<Int64>(col_name, data_column, data_type,
-                                             _decimal_scale_params, cvb, num_values);
+        return _decode_decimal_column<Int64>(col_name, data_column, data_type, cvb, num_values);
     case TypeIndex::Decimal128:
-        return _decode_decimal_column<Int128>(col_name, data_column, data_type,
-                                              _decimal_scale_params, cvb, num_values);
+        return _decode_decimal_column<Int128>(col_name, data_column, data_type, cvb, num_values);
     case TypeIndex::Decimal128I:
-        return _decode_decimal_column<Int128>(col_name, data_column, data_type,
-                                              _decimal_scale_params, cvb, num_values);
+        return _decode_decimal_column<Int128>(col_name, data_column, data_type, cvb, num_values);
     case TypeIndex::Date:
         return _decode_time_column<VecDateTimeValue, Int64, orc::LongVectorBatch>(
                 col_name, data_column, cvb, num_values);
@@ -850,6 +846,8 @@ Status OrcReader::get_next_block(Block* block, size_t* read_rows, bool* eof) {
     SCOPED_RAW_TIMER(&_statistics.column_read_time);
     {
         SCOPED_RAW_TIMER(&_statistics.get_batch_time);
+        // reset decimal_scale_params_index
+        _decimal_scale_params_index = 0;
         if (!_row_reader->next(*_batch)) {
             *eof = true;
             *read_rows = 0;
diff --git a/be/src/vec/exec/format/orc/vorc_reader.h b/be/src/vec/exec/format/orc/vorc_reader.h
index 4383129bd8..4fc2fd5ec1 100644
--- a/be/src/vec/exec/format/orc/vorc_reader.h
+++ b/be/src/vec/exec/format/orc/vorc_reader.h
@@ -147,13 +147,19 @@ private:
     Status _decode_explicit_decimal_column(const std::string& col_name,
                                            const MutableColumnPtr& data_column,
                                            const DataTypePtr& data_type,
-                                           DecimalScaleParams& scale_params,
                                            orc::ColumnVectorBatch* cvb, size_t num_values) {
         OrcColumnType* data = dynamic_cast<OrcColumnType*>(cvb);
         if (data == nullptr) {
             return Status::InternalError("Wrong data type for colum '{}'", col_name);
         }
-        _init_decimal_converter<DecimalPrimitiveType>(data_type, scale_params, data->scale);
+        if (_decimal_scale_params_index >= _decimal_scale_params.size()) {
+            DecimalScaleParams temp_scale_params;
+            _init_decimal_converter<DecimalPrimitiveType>(data_type, temp_scale_params,
+                                                          data->scale);
+            _decimal_scale_params.emplace_back(std::move(temp_scale_params));
+        }
+        DecimalScaleParams& scale_params = _decimal_scale_params[_decimal_scale_params_index];
+        ++_decimal_scale_params_index;
 
         auto* cvb_data = data->values.data();
         auto& column_data =
@@ -183,16 +189,16 @@ private:
 
     template <typename DecimalPrimitiveType>
     Status _decode_decimal_column(const std::string& col_name, const MutableColumnPtr& data_column,
-                                  const DataTypePtr& data_type, DecimalScaleParams& scale_params,
-                                  orc::ColumnVectorBatch* cvb, size_t num_values) {
+                                  const DataTypePtr& data_type, orc::ColumnVectorBatch* cvb,
+                                  size_t num_values) {
         SCOPED_RAW_TIMER(&_statistics.decode_value_time);
         if (dynamic_cast<orc::Decimal64VectorBatch*>(cvb) != nullptr) {
             return _decode_explicit_decimal_column<DecimalPrimitiveType, orc::Decimal64VectorBatch>(
-                    col_name, data_column, data_type, scale_params, cvb, num_values);
+                    col_name, data_column, data_type, cvb, num_values);
         } else {
             return _decode_explicit_decimal_column<DecimalPrimitiveType,
                                                    orc::Decimal128VectorBatch>(
-                    col_name, data_column, data_type, scale_params, cvb, num_values);
+                    col_name, data_column, data_type, cvb, num_values);
         }
     }
 
@@ -279,8 +285,8 @@ private:
 
     io::IOContext* _io_ctx;
 
-    // only for decimal
-    DecimalScaleParams _decimal_scale_params;
+    std::vector<DecimalScaleParams> _decimal_scale_params;
+    size_t _decimal_scale_params_index;
 };
 
 class ORCFileInputStream : public orc::InputStream {
diff --git a/regression-test/data/external_table_emr_p2/hive/test_wide_table.out b/regression-test/data/external_table_emr_p2/hive/test_wide_table.out
new file mode 100644
index 0000000000..143aeb9bf5
--- /dev/null
+++ b/regression-test/data/external_table_emr_p2/hive/test_wide_table.out
@@ -0,0 +1,23 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !01 --
+6117920261	28156890937818.64	11058113	84788841307158.93	9988065.83660	8116313253956313.527443
+
+-- !02 --
+6117920261	28156890937818.64	11058113	84788841307158.93	9988065.83660	8116313253956313.527443
+
+-- !03 --
+
+-- !04 --
+
+-- !05 --
+6117920261	28156890937818.64	11058113	84788841307158.93	9988065.83660	8116313253956313.527443
+
+-- !06 --
+6117920261	28156890937818.64	11058113	84788841307158.93	9988065.83660	8116313253956313.527443
+
+-- !07 --
+6117920261	28156890937818.64	11058113	84788841307158.93	9988065.83660	8116313253956313.527443
+
+-- !08 --
+9999999541515682.000000000	99999218685068.860000000	99999869.000000000	221095586.110000000	27.542540000	61077635638.763621000
+
diff --git a/regression-test/suites/external_table_emr_p2/hive/test_wide_table.groovy b/regression-test/suites/external_table_emr_p2/hive/test_wide_table.groovy
new file mode 100644
index 0000000000..f7ba390c1f
--- /dev/null
+++ b/regression-test/suites/external_table_emr_p2/hive/test_wide_table.groovy
@@ -0,0 +1,74 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_wide_table", "p2") {
+
+    def formats = ["_orc"]
+    def decimal_test1 = """select col1, col70, col71, col81, col100, col534 from wide_table1SUFFIX where col1 is not null order by col1 limit 1;"""
+    def decimal_test2 = """select * from
+     (select col1, col70, col71, col81, col100, col534 from wide_table1SUFFIX where col1 is not null order by col1 limit 1) as T where col100 = 9988065.8366;
+         """
+    def decimal_test3 = """select * from
+     (select col1, col70, col71, col81, col100, col534 from wide_table1SUFFIX where col1 is not null order by col1 limit 1) as T  where col100 = 9988065.8367;
+         """
+    def decimal_test4 = """select * from
+     (select col1, col70, col71, col81, col100, col534 from wide_table1SUFFIX where col1 is not null order by col1 limit 1) as T  where col100 = 9988065.836;
+         """
+    def decimal_test5 = """select * from
+     (select col1, col70, col71, col81, col100, col534 from wide_table1SUFFIX where col1 is not null order by col1 limit 1) as T  where col100 = 9988065.836600;
+     """
+    def decimal_test6 = """select * from
+     (select col1, col70, col71, col81, col100, col534 from wide_table1SUFFIX where col1 is not null order by col1 limit 1) as T  where col100 > 9988065.83653;
+     """
+    def decimal_test7 = """select * from
+     (select col1, col70, col71, col81, col100, col534 from wide_table1SUFFIX where col1 is not null order by col1 limit 1) as T  where col100 < 9988065.83673;
+     """
+    def decimal_test8 = """select max(col1), max(col70), max(col71), min(col81), min(col100), min(col534) from wide_table1SUFFIX;"""
+
+    String enabled = context.config.otherConfigs.get("enableExternalHiveTest")
+    if (enabled != null && enabled.equalsIgnoreCase("true")) {
+        String extHiveHmsHost = context.config.otherConfigs.get("extHiveHmsHost")
+        String extHiveHmsPort = context.config.otherConfigs.get("extHiveHmsPort")
+        String catalog_name = "external_wide_table"
+
+        sql """drop catalog if exists ${catalog_name};"""
+        sql """
+            create catalog if not exists ${catalog_name} properties (
+                'type'='hms',
+                'hive.metastore.uris' = 'thrift://${extHiveHmsHost}:${extHiveHmsPort}'
+            );
+        """
+        logger.info("catalog " + catalog_name + " created")
+        sql """switch ${catalog_name};"""
+        logger.info("switched to catalog " + catalog_name)
+        sql """use wide_tables;"""
+        logger.info("use wide_tables")
+
+        for (String format in formats) {
+            logger.info("Process format " + format)
+            qt_01 decimal_test1.replace("SUFFIX", format)
+            qt_02 decimal_test2.replace("SUFFIX", format)
+            qt_03 decimal_test3.replace("SUFFIX", format)
+            qt_04 decimal_test4.replace("SUFFIX", format)
+            qt_05 decimal_test5.replace("SUFFIX", format)
+            qt_06 decimal_test6.replace("SUFFIX", format)
+            qt_07 decimal_test7.replace("SUFFIX", format)
+            qt_08 decimal_test8.replace("SUFFIX", format)
+        }
+    }
+}
+


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org