You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by zh...@apache.org on 2020/02/16 11:14:53 UTC

[incubator-doris] branch master updated: Fix orc load bug (#2912)

This is an automated email from the ASF dual-hosted git repository.

zhaoc pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 43583e7  Fix orc load bug (#2912)
43583e7 is described below

commit 43583e7bd20a5a089ff889add56e1b5cf502d808
Author: HangyuanLiu <46...@qq.com>
AuthorDate: Sun Feb 16 19:14:42 2020 +0800

    Fix orc load bug (#2912)
---
 be/src/exec/orc_scanner.cpp                        |   7 ++-
 be/test/exec/orc_scanner_test.cpp                  |  62 ++++++++++++++++++---
 .../orc_scanner/decimal_and_timestamp.orc          | Bin 693 -> 831 bytes
 3 files changed, 58 insertions(+), 11 deletions(-)

diff --git a/be/src/exec/orc_scanner.cpp b/be/src/exec/orc_scanner.cpp
index ae02151..7b35dad 100644
--- a/be/src/exec/orc_scanner.cpp
+++ b/be/src/exec/orc_scanner.cpp
@@ -202,6 +202,7 @@ Status ORCScanner::get_next(Tuple* tuple, MemPool* tuple_pool, bool* eof) {
                             }
                             break;
                         }
+                        case orc::BYTE:
                         case orc::INT:
                         case orc::SHORT:
                         case orc::LONG:
@@ -245,9 +246,9 @@ Status ORCScanner::get_next(Tuple* tuple, MemPool* tuple_pool, bool* eof) {
                             } else {
                                 decimal_str = ((orc::Decimal128VectorBatch*) cvb)->values[_current_line_of_group].toString();
                             }
-                            //Orc api will fill in 0 at the end, so size must greater than scale
-                            std::string v = decimal_str.substr(0, decimal_str.size() - scale) + "." 
-                                + decimal_str.substr(decimal_str.size() - scale);
+                            //Orc api will fill in 0 at the end, so size must greater than scale. But 0 is not fill.
+                            std::string v = decimal_str == "0" ? 
+                                "0" : (decimal_str.substr(0, decimal_str.size() - scale) + "." + decimal_str.substr(decimal_str.size() - scale));
                             str_slot->ptr = reinterpret_cast<char*>(tuple_pool->allocate(v.size()));
                             memcpy(str_slot->ptr, v.c_str(), v.size());
                             str_slot->len = v.size();
diff --git a/be/test/exec/orc_scanner_test.cpp b/be/test/exec/orc_scanner_test.cpp
index 8d2cd5a..1fdee6c 100644
--- a/be/test/exec/orc_scanner_test.cpp
+++ b/be/test/exec/orc_scanner_test.cpp
@@ -547,6 +547,16 @@ TEST_F(OrcScannerTest, normal3) {
         node.__set_scalar_type(scalar_type);
         decimal_type.types.push_back(node);
     }
+    
+    TTypeDesc tinyint_type;
+    {
+        TTypeNode node;
+        node.__set_type(TTypeNodeType::SCALAR);
+        TScalarType scalar_type;
+        scalar_type.__set_type(TPrimitiveType::TINYINT);
+        node.__set_scalar_type(scalar_type);
+        tinyint_type.types.push_back(node);
+    }
 
     TTypeDesc datetime_type;
     {
@@ -588,10 +598,43 @@ TEST_F(OrcScannerTest, normal3) {
             expr.nodes.push_back(cast_expr);
             expr.nodes.push_back(slot_ref);
 
-            params.expr_of_dest_slot.emplace(6 + i, expr);
+            params.expr_of_dest_slot.emplace(7 + i, expr);
             params.src_slot_ids.push_back(i);
         }
-        
+
+        {
+            TExprNode cast_expr;
+            cast_expr.node_type = TExprNodeType::CAST_EXPR;
+            cast_expr.type = tinyint_type;
+            cast_expr.__set_opcode(TExprOpcode::CAST);
+            cast_expr.__set_num_children(1);
+            cast_expr.__set_output_scale(-1);
+            cast_expr.__isset.fn = true;
+            cast_expr.fn.name.function_name = "casttotinyint";
+            cast_expr.fn.binary_type = TFunctionBinaryType::BUILTIN;
+            cast_expr.fn.arg_types.push_back(varchar_type);
+            cast_expr.fn.ret_type = tinyint_type;
+            cast_expr.fn.has_var_args = false;
+            cast_expr.fn.__set_signature("cast_to_tiny_int_val(VARCHAR(*))");
+            cast_expr.fn.__isset.scalar_fn = true;
+            cast_expr.fn.scalar_fn.symbol = "doris::CastFunctions::cast_to_tiny_int_val";
+
+            TExprNode slot_ref;
+            slot_ref.node_type = TExprNodeType::SLOT_REF;
+            slot_ref.type = varchar_type;
+            slot_ref.num_children = 0;
+            slot_ref.__isset.slot_ref = true;
+            slot_ref.slot_ref.slot_id = 5;
+            slot_ref.slot_ref.tuple_id = 0;
+
+            TExpr expr;
+            expr.nodes.push_back(cast_expr);
+            expr.nodes.push_back(slot_ref);
+
+            params.expr_of_dest_slot.emplace(12, expr);
+            params.src_slot_ids.push_back(5);
+        }
+
         {
             TExprNode cast_expr;
             cast_expr.node_type = TExprNodeType::CAST_EXPR;
@@ -614,17 +657,16 @@ TEST_F(OrcScannerTest, normal3) {
             slot_ref.type = varchar_type;
             slot_ref.num_children = 0;
             slot_ref.__isset.slot_ref = true;
-            slot_ref.slot_ref.slot_id = 5;
+            slot_ref.slot_ref.slot_id = 6;
             slot_ref.slot_ref.tuple_id = 0;
 
             TExpr expr;
             expr.nodes.push_back(cast_expr);
             expr.nodes.push_back(slot_ref);
 
-            params.expr_of_dest_slot.emplace(11, expr);
-            params.src_slot_ids.push_back(5);
+            params.expr_of_dest_slot.emplace(13, expr);
+            params.src_slot_ids.push_back(6);
         }
-
     }
     params.__set_src_tuple_id(0);
     params.__set_dest_tuple_id(1);
@@ -656,6 +698,8 @@ TEST_F(OrcScannerTest, normal3) {
                 TSlotDescriptorBuilder().string_type(65535).nullable(true).column_name("col5").column_pos(5).build());
     src_tuple_builder.add_slot(
                 TSlotDescriptorBuilder().string_type(65535).nullable(true).column_name("col6").column_pos(6).build());
+    src_tuple_builder.add_slot(
+                TSlotDescriptorBuilder().string_type(65535).nullable(true).column_name("col7").column_pos(7).build());
     src_tuple_builder.build(&dtb);
 
     TTupleDescriptorBuilder dest_tuple_builder;
@@ -670,7 +714,9 @@ TEST_F(OrcScannerTest, normal3) {
     dest_tuple_builder.add_slot(
                 TSlotDescriptorBuilder().decimal_type(10,5).column_name("col5").column_pos(5).build());
     dest_tuple_builder.add_slot(
-                TSlotDescriptorBuilder().type(TYPE_DATETIME).column_name("col5").column_pos(6).build());
+                TSlotDescriptorBuilder().type(TYPE_TINYINT).column_name("col6").column_pos(6).build());
+    dest_tuple_builder.add_slot(
+                TSlotDescriptorBuilder().type(TYPE_DATETIME).column_name("col7").column_pos(7).build());
 
     dest_tuple_builder.build(&dtb);
     t_desc_table = dtb.desc_tbl();
@@ -698,7 +744,7 @@ TEST_F(OrcScannerTest, normal3) {
     bool eof = false;
     ASSERT_TRUE(scanner.get_next(tuple, &tuple_pool, &eof).ok());
     ASSERT_EQ(Tuple::to_string(tuple, *_desc_tbl->get_tuple_descriptor(1)),
-                "(1.123456789 1.12 1.1234500000 1.12345 1.12345 2020-01-14 22:12:19)");
+                "(0.123456789 1.12 -1.1234500000 0.12345 0 1 2020-01-14 22:12:19)");
     scanner.close();
 }
 
diff --git a/be/test/exec/test_data/orc_scanner/decimal_and_timestamp.orc b/be/test/exec/test_data/orc_scanner/decimal_and_timestamp.orc
index cccedf3..0f0d931 100644
Binary files a/be/test/exec/test_data/orc_scanner/decimal_and_timestamp.orc and b/be/test/exec/test_data/orc_scanner/decimal_and_timestamp.orc differ


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org