You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by dk...@apache.org on 2022/07/22 07:33:04 UTC

[hive] branch master updated: HIVE-26396: Trunc function does not honour the scale for constant decimal data type (Simhadri Govindappa, reviewed by Denys Kuzmenko)

This is an automated email from the ASF dual-hosted git repository.

dkuzmenko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 3ae189e6a47 HIVE-26396: Trunc function does not honour the scale for constant decimal data type (Simhadri Govindappa, reviewed by Denys Kuzmenko)
3ae189e6a47 is described below

commit 3ae189e6a47559ab98f19ecf845138be0e6b8f5d
Author: SimhadriGovindappa <si...@gmail.com>
AuthorDate: Fri Jul 22 13:02:54 2022 +0530

    HIVE-26396: Trunc function does not honour the scale for constant decimal data type (Simhadri Govindappa, reviewed by Denys Kuzmenko)
    
    Closes #3463
---
 .../hive/ql/udf/generic/GenericUDFTrunc.java       |  6 +-
 .../test/queries/clientpositive/udf_trunc_number.q |  1 +
 .../clientpositive/llap/udf_trunc_number.q.out     | 13 ++++-
 .../results/clientpositive/vector_udf_trunc.q.out  | 68 +++++++++++-----------
 4 files changed, 51 insertions(+), 37 deletions(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFTrunc.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFTrunc.java
index 0dab187fbb7..3cebbe7af85 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFTrunc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFTrunc.java
@@ -54,6 +54,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectIn
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping;
+import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
 import org.apache.hadoop.io.FloatWritable;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.LongWritable;
@@ -198,7 +199,10 @@ public class GenericUDFTrunc extends GenericUDF {
     ObjectInspector outputOI = null;
     switch (inputType1) {
     case DECIMAL:
-      outputOI = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(inputType1);
+      int outputScale = scale > 0 ? scale : 0;
+      int outputPrecision = ((PrimitiveObjectInspector) arguments[0]).precision() - ((PrimitiveObjectInspector) arguments[0]).scale() + outputScale;
+      DecimalTypeInfo t = new DecimalTypeInfo(outputPrecision, outputScale);
+      outputOI = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(t);
       break;
     case VOID:
     case BYTE:
diff --git a/ql/src/test/queries/clientpositive/udf_trunc_number.q b/ql/src/test/queries/clientpositive/udf_trunc_number.q
index 9717968ba3c..29989483586 100644
--- a/ql/src/test/queries/clientpositive/udf_trunc_number.q
+++ b/ql/src/test/queries/clientpositive/udf_trunc_number.q
@@ -5,6 +5,7 @@ EXPLAIN SELECT trunc(1234567891.1234567891,4), trunc(1234567891.1234567891,-4),
 
 SELECT trunc(1234567891.1234567891,4), trunc(1234567891.1234567891,-4), trunc(1234567891.1234567891,0), trunc(1234567891.1234567891) FROM src tablesample (1 rows);
 
+SELECT trunc(12.123891,4), trunc(12,-4) FROM src tablesample (1 rows);
 DROP TABLE sampletable;
 
 CREATE TABLE sampletable(c DOUBLE, d INT)
diff --git a/ql/src/test/results/clientpositive/llap/udf_trunc_number.q.out b/ql/src/test/results/clientpositive/llap/udf_trunc_number.q.out
index 087b19180cb..1f628af05cc 100644
--- a/ql/src/test/results/clientpositive/llap/udf_trunc_number.q.out
+++ b/ql/src/test/results/clientpositive/llap/udf_trunc_number.q.out
@@ -18,7 +18,7 @@ STAGE PLANS:
           alias: src
           Row Limit Per Split: 1
           Select Operator
-            expressions: 1234567891.1234 (type: decimal(38,18)), 1234560000 (type: decimal(38,18)), 1234567891 (type: decimal(38,18)), 1234567891 (type: decimal(38,18))
+            expressions: 1234567891.1234 (type: decimal(14,4)), 1234560000 (type: decimal(10,0)), 1234567891 (type: decimal(10,0)), 1234567891 (type: decimal(10,0))
             outputColumnNames: _col0, _col1, _col2, _col3
             ListSink
 
@@ -30,7 +30,16 @@ POSTHOOK: query: SELECT trunc(1234567891.1234567891,4), trunc(1234567891.1234567
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src
 #### A masked pattern was here ####
-1234567891.123400000000000000	1234560000.000000000000000000	1234567891.000000000000000000	1234567891.000000000000000000
+1234567891.1234	1234560000	1234567891	1234567891
+PREHOOK: query: SELECT trunc(12.123891,4), trunc(12,-4) FROM src tablesample (1 rows)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT trunc(12.123891,4), trunc(12,-4) FROM src tablesample (1 rows)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+12.1238	0
 PREHOOK: query: DROP TABLE sampletable
 PREHOOK: type: DROPTABLE
 POSTHOOK: query: DROP TABLE sampletable
diff --git a/ql/src/test/results/clientpositive/vector_udf_trunc.q.out b/ql/src/test/results/clientpositive/vector_udf_trunc.q.out
index 8d20b63f412..18d16a92d52 100644
--- a/ql/src/test/results/clientpositive/vector_udf_trunc.q.out
+++ b/ql/src/test/results/clientpositive/vector_udf_trunc.q.out
@@ -734,13 +734,13 @@ STAGE PLANS:
                 native: true
                 vectorizationSchemaColumns: [0:c:double, 1:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>, 2:ROW__IS__DELETED:boolean]
             Select Operator
-              expressions: c (type: double), trunc(CAST( c AS decimal(10,5)), 0) (type: decimal(38,18))
+              expressions: c (type: double), trunc(CAST( c AS decimal(10,5)), 0) (type: decimal(5,0))
               outputColumnNames: _col0, _col1
               Select Vectorization:
                   className: VectorSelectOperator
                   native: true
                   projectedOutputColumnNums: [0, 4]
-                  selectExpressions: TruncDecimal(col 3, scale 0)(children: CastDoubleToDecimal(col 0:double) -> 3:decimal(10,5)) -> 4:decimal(38,18)
+                  selectExpressions: TruncDecimal(col 3, scale 0)(children: CastDoubleToDecimal(col 0:double) -> 3:decimal(10,5)) -> 4:decimal(5,0)
               Statistics: Num rows: 4 Data size: 480 Basic stats: COMPLETE Column stats: COMPLETE
               Reduce Output Operator
                 key expressions: _col0 (type: double)
@@ -752,7 +752,7 @@ STAGE PLANS:
                     nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                     nativeConditionsNotMet: hive.execution.engine mr IN [tez] IS false
                 Statistics: Num rows: 4 Data size: 480 Basic stats: COMPLETE Column stats: COMPLETE
-                value expressions: _col1 (type: decimal(38,18))
+                value expressions: _col1 (type: decimal(5,0))
       Execution mode: vectorized
       Map Vectorization:
           enabled: true
@@ -768,14 +768,14 @@ STAGE PLANS:
               includeColumns: [0]
               dataColumns: c:double
               partitionColumnCount: 0
-              scratchColumnTypeNames: [decimal(10,5), decimal(38,18)]
+              scratchColumnTypeNames: [decimal(10,5), decimal(5,0)]
       Reduce Vectorization:
           enabled: false
           enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
           enableConditionsNotMet: hive.execution.engine mr IN [tez] IS false
       Reduce Operator Tree:
         Select Operator
-          expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: decimal(38,18))
+          expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: decimal(5,0))
           outputColumnNames: _col0, _col1
           Statistics: Num rows: 4 Data size: 480 Basic stats: COMPLETE Column stats: COMPLETE
           File Output Operator
@@ -800,9 +800,9 @@ POSTHOOK: query: select c, 0, trunc(CAST (c AS DECIMAL(10,5)), 0) from trunc_num
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@trunc_number
 #### A masked pattern was here ####
-0.54321	0	0.000000000000000000
-12345.0	0	12345.000000000000000000
-12345.54321	0	12345.000000000000000000
+0.54321	0	0
+12345.0	0	12345
+12345.54321	0	12345
 NULL	0	NULL
 PREHOOK: query: select c, -1, trunc(CAST (c AS DECIMAL(10,5)), -1) from trunc_number order by c
 PREHOOK: type: QUERY
@@ -812,9 +812,9 @@ POSTHOOK: query: select c, -1, trunc(CAST (c AS DECIMAL(10,5)), -1) from trunc_n
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@trunc_number
 #### A masked pattern was here ####
-0.54321	-1	0.000000000000000000
-12345.0	-1	12340.000000000000000000
-12345.54321	-1	12340.000000000000000000
+0.54321	-1	0
+12345.0	-1	12340
+12345.54321	-1	12340
 NULL	-1	NULL
 PREHOOK: query: select c, 1, trunc(CAST (c AS DECIMAL(10,5)), 1) from trunc_number order by c
 PREHOOK: type: QUERY
@@ -824,9 +824,9 @@ POSTHOOK: query: select c, 1, trunc(CAST (c AS DECIMAL(10,5)), 1) from trunc_num
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@trunc_number
 #### A masked pattern was here ####
-0.54321	1	0.500000000000000000
-12345.0	1	12345.000000000000000000
-12345.54321	1	12345.500000000000000000
+0.54321	1	0.5
+12345.0	1	12345.0
+12345.54321	1	12345.5
 NULL	1	NULL
 PREHOOK: query: explain vectorization detail
 select c, trunc(c) from trunc_number order by c
@@ -956,13 +956,13 @@ STAGE PLANS:
                 native: true
                 vectorizationSchemaColumns: [0:c:double, 1:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>, 2:ROW__IS__DELETED:boolean]
             Select Operator
-              expressions: c (type: double), trunc(CAST( c AS decimal(10,5))) (type: decimal(38,18))
+              expressions: c (type: double), trunc(CAST( c AS decimal(10,5))) (type: decimal(5,0))
               outputColumnNames: _col0, _col1
               Select Vectorization:
                   className: VectorSelectOperator
                   native: true
                   projectedOutputColumnNums: [0, 4]
-                  selectExpressions: TruncDecimalNoScale(col 3, scale 0)(children: CastDoubleToDecimal(col 0:double) -> 3:decimal(10,5)) -> 4:decimal(38,18)
+                  selectExpressions: TruncDecimalNoScale(col 3, scale 0)(children: CastDoubleToDecimal(col 0:double) -> 3:decimal(10,5)) -> 4:decimal(5,0)
               Statistics: Num rows: 4 Data size: 480 Basic stats: COMPLETE Column stats: COMPLETE
               Reduce Output Operator
                 key expressions: _col0 (type: double)
@@ -974,7 +974,7 @@ STAGE PLANS:
                     nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                     nativeConditionsNotMet: hive.execution.engine mr IN [tez] IS false
                 Statistics: Num rows: 4 Data size: 480 Basic stats: COMPLETE Column stats: COMPLETE
-                value expressions: _col1 (type: decimal(38,18))
+                value expressions: _col1 (type: decimal(5,0))
       Execution mode: vectorized
       Map Vectorization:
           enabled: true
@@ -990,14 +990,14 @@ STAGE PLANS:
               includeColumns: [0]
               dataColumns: c:double
               partitionColumnCount: 0
-              scratchColumnTypeNames: [decimal(10,5), decimal(38,18)]
+              scratchColumnTypeNames: [decimal(10,5), decimal(5,0)]
       Reduce Vectorization:
           enabled: false
           enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
           enableConditionsNotMet: hive.execution.engine mr IN [tez] IS false
       Reduce Operator Tree:
         Select Operator
-          expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: decimal(38,18))
+          expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: decimal(5,0))
           outputColumnNames: _col0, _col1
           Statistics: Num rows: 4 Data size: 480 Basic stats: COMPLETE Column stats: COMPLETE
           File Output Operator
@@ -1022,9 +1022,9 @@ POSTHOOK: query: select c, trunc(CAST (c AS DECIMAL(10,5))) from trunc_number or
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@trunc_number
 #### A masked pattern was here ####
-0.54321	0.000000000000000000
-12345.0	12345.000000000000000000
-12345.54321	12345.000000000000000000
+0.54321	0
+12345.0	12345
+12345.54321	12345
 NULL	NULL
 PREHOOK: query: select trunc(ctimestamp1, 'MM'), ctimestamp1 from alltypesorc order by ctimestamp1 LIMIT 10
 PREHOOK: type: QUERY
@@ -1286,9 +1286,9 @@ POSTHOOK: query: select c, 0, trunc(CAST (c AS DECIMAL(10,5)), 0) from trunc_num
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@trunc_number
 #### A masked pattern was here ####
-0.54321	0	0.000000000000000000
-12345.0	0	12345.000000000000000000
-12345.54321	0	12345.000000000000000000
+0.54321	0	0
+12345.0	0	12345
+12345.54321	0	12345
 NULL	0	NULL
 PREHOOK: query: select c, -1, trunc(CAST (c AS DECIMAL(10,5)), -1) from trunc_number order by c
 PREHOOK: type: QUERY
@@ -1298,9 +1298,9 @@ POSTHOOK: query: select c, -1, trunc(CAST (c AS DECIMAL(10,5)), -1) from trunc_n
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@trunc_number
 #### A masked pattern was here ####
-0.54321	-1	0.000000000000000000
-12345.0	-1	12340.000000000000000000
-12345.54321	-1	12340.000000000000000000
+0.54321	-1	0
+12345.0	-1	12340
+12345.54321	-1	12340
 NULL	-1	NULL
 PREHOOK: query: select c, 1, trunc(CAST (c AS DECIMAL(10,5)), 1) from trunc_number order by c
 PREHOOK: type: QUERY
@@ -1310,9 +1310,9 @@ POSTHOOK: query: select c, 1, trunc(CAST (c AS DECIMAL(10,5)), 1) from trunc_num
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@trunc_number
 #### A masked pattern was here ####
-0.54321	1	0.500000000000000000
-12345.0	1	12345.000000000000000000
-12345.54321	1	12345.500000000000000000
+0.54321	1	0.5
+12345.0	1	12345.0
+12345.54321	1	12345.5
 NULL	1	NULL
 PREHOOK: query: select c, trunc(c) from trunc_number order by c
 PREHOOK: type: QUERY
@@ -1334,9 +1334,9 @@ POSTHOOK: query: select c, trunc(CAST (c AS DECIMAL(10,5))) from trunc_number or
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@trunc_number
 #### A masked pattern was here ####
-0.54321	0.000000000000000000
-12345.0	12345.000000000000000000
-12345.54321	12345.000000000000000000
+0.54321	0
+12345.0	12345
+12345.54321	12345
 NULL	NULL
 PREHOOK: query: drop table trunc_number
 PREHOOK: type: DROPTABLE