You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by dk...@apache.org on 2022/07/22 07:33:04 UTC
[hive] branch master updated: HIVE-26396: Trunc function does not honour the scale for constant decimal data type (Simhadri Govindappa, reviewed by Denys Kuzmenko)
This is an automated email from the ASF dual-hosted git repository.
dkuzmenko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 3ae189e6a47 HIVE-26396: Trunc function does not honour the scale for constant decimal data type (Simhadri Govindappa, reviewed by Denys Kuzmenko)
3ae189e6a47 is described below
commit 3ae189e6a47559ab98f19ecf845138be0e6b8f5d
Author: SimhadriGovindappa <si...@gmail.com>
AuthorDate: Fri Jul 22 13:02:54 2022 +0530
HIVE-26396: Trunc function does not honour the scale for constant decimal data type (Simhadri Govindappa, reviewed by Denys Kuzmenko)
Closes #3463
---
.../hive/ql/udf/generic/GenericUDFTrunc.java | 6 +-
.../test/queries/clientpositive/udf_trunc_number.q | 1 +
.../clientpositive/llap/udf_trunc_number.q.out | 13 ++++-
.../results/clientpositive/vector_udf_trunc.q.out | 68 +++++++++++-----------
4 files changed, 51 insertions(+), 37 deletions(-)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFTrunc.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFTrunc.java
index 0dab187fbb7..3cebbe7af85 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFTrunc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFTrunc.java
@@ -54,6 +54,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectIn
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping;
+import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
@@ -198,7 +199,10 @@ public class GenericUDFTrunc extends GenericUDF {
ObjectInspector outputOI = null;
switch (inputType1) {
case DECIMAL:
- outputOI = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(inputType1);
+ int outputScale = scale > 0 ? scale : 0;
+ int outputPrecision = ((PrimitiveObjectInspector) arguments[0]).precision() - ((PrimitiveObjectInspector) arguments[0]).scale() + outputScale;
+ DecimalTypeInfo t = new DecimalTypeInfo(outputPrecision, outputScale);
+ outputOI = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(t);
break;
case VOID:
case BYTE:
diff --git a/ql/src/test/queries/clientpositive/udf_trunc_number.q b/ql/src/test/queries/clientpositive/udf_trunc_number.q
index 9717968ba3c..29989483586 100644
--- a/ql/src/test/queries/clientpositive/udf_trunc_number.q
+++ b/ql/src/test/queries/clientpositive/udf_trunc_number.q
@@ -5,6 +5,7 @@ EXPLAIN SELECT trunc(1234567891.1234567891,4), trunc(1234567891.1234567891,-4),
SELECT trunc(1234567891.1234567891,4), trunc(1234567891.1234567891,-4), trunc(1234567891.1234567891,0), trunc(1234567891.1234567891) FROM src tablesample (1 rows);
+SELECT trunc(12.123891,4), trunc(12,-4) FROM src tablesample (1 rows);
DROP TABLE sampletable;
CREATE TABLE sampletable(c DOUBLE, d INT)
diff --git a/ql/src/test/results/clientpositive/llap/udf_trunc_number.q.out b/ql/src/test/results/clientpositive/llap/udf_trunc_number.q.out
index 087b19180cb..1f628af05cc 100644
--- a/ql/src/test/results/clientpositive/llap/udf_trunc_number.q.out
+++ b/ql/src/test/results/clientpositive/llap/udf_trunc_number.q.out
@@ -18,7 +18,7 @@ STAGE PLANS:
alias: src
Row Limit Per Split: 1
Select Operator
- expressions: 1234567891.1234 (type: decimal(38,18)), 1234560000 (type: decimal(38,18)), 1234567891 (type: decimal(38,18)), 1234567891 (type: decimal(38,18))
+ expressions: 1234567891.1234 (type: decimal(14,4)), 1234560000 (type: decimal(10,0)), 1234567891 (type: decimal(10,0)), 1234567891 (type: decimal(10,0))
outputColumnNames: _col0, _col1, _col2, _col3
ListSink
@@ -30,7 +30,16 @@ POSTHOOK: query: SELECT trunc(1234567891.1234567891,4), trunc(1234567891.1234567
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
#### A masked pattern was here ####
-1234567891.123400000000000000 1234560000.000000000000000000 1234567891.000000000000000000 1234567891.000000000000000000
+1234567891.1234 1234560000 1234567891 1234567891
+PREHOOK: query: SELECT trunc(12.123891,4), trunc(12,-4) FROM src tablesample (1 rows)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT trunc(12.123891,4), trunc(12,-4) FROM src tablesample (1 rows)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+12.1238 0
PREHOOK: query: DROP TABLE sampletable
PREHOOK: type: DROPTABLE
POSTHOOK: query: DROP TABLE sampletable
diff --git a/ql/src/test/results/clientpositive/vector_udf_trunc.q.out b/ql/src/test/results/clientpositive/vector_udf_trunc.q.out
index 8d20b63f412..18d16a92d52 100644
--- a/ql/src/test/results/clientpositive/vector_udf_trunc.q.out
+++ b/ql/src/test/results/clientpositive/vector_udf_trunc.q.out
@@ -734,13 +734,13 @@ STAGE PLANS:
native: true
vectorizationSchemaColumns: [0:c:double, 1:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>, 2:ROW__IS__DELETED:boolean]
Select Operator
- expressions: c (type: double), trunc(CAST( c AS decimal(10,5)), 0) (type: decimal(38,18))
+ expressions: c (type: double), trunc(CAST( c AS decimal(10,5)), 0) (type: decimal(5,0))
outputColumnNames: _col0, _col1
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0, 4]
- selectExpressions: TruncDecimal(col 3, scale 0)(children: CastDoubleToDecimal(col 0:double) -> 3:decimal(10,5)) -> 4:decimal(38,18)
+ selectExpressions: TruncDecimal(col 3, scale 0)(children: CastDoubleToDecimal(col 0:double) -> 3:decimal(10,5)) -> 4:decimal(5,0)
Statistics: Num rows: 4 Data size: 480 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: double)
@@ -752,7 +752,7 @@ STAGE PLANS:
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
nativeConditionsNotMet: hive.execution.engine mr IN [tez] IS false
Statistics: Num rows: 4 Data size: 480 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: decimal(38,18))
+ value expressions: _col1 (type: decimal(5,0))
Execution mode: vectorized
Map Vectorization:
enabled: true
@@ -768,14 +768,14 @@ STAGE PLANS:
includeColumns: [0]
dataColumns: c:double
partitionColumnCount: 0
- scratchColumnTypeNames: [decimal(10,5), decimal(38,18)]
+ scratchColumnTypeNames: [decimal(10,5), decimal(5,0)]
Reduce Vectorization:
enabled: false
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
enableConditionsNotMet: hive.execution.engine mr IN [tez] IS false
Reduce Operator Tree:
Select Operator
- expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: decimal(38,18))
+ expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: decimal(5,0))
outputColumnNames: _col0, _col1
Statistics: Num rows: 4 Data size: 480 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
@@ -800,9 +800,9 @@ POSTHOOK: query: select c, 0, trunc(CAST (c AS DECIMAL(10,5)), 0) from trunc_num
POSTHOOK: type: QUERY
POSTHOOK: Input: default@trunc_number
#### A masked pattern was here ####
-0.54321 0 0.000000000000000000
-12345.0 0 12345.000000000000000000
-12345.54321 0 12345.000000000000000000
+0.54321 0 0
+12345.0 0 12345
+12345.54321 0 12345
NULL 0 NULL
PREHOOK: query: select c, -1, trunc(CAST (c AS DECIMAL(10,5)), -1) from trunc_number order by c
PREHOOK: type: QUERY
@@ -812,9 +812,9 @@ POSTHOOK: query: select c, -1, trunc(CAST (c AS DECIMAL(10,5)), -1) from trunc_n
POSTHOOK: type: QUERY
POSTHOOK: Input: default@trunc_number
#### A masked pattern was here ####
-0.54321 -1 0.000000000000000000
-12345.0 -1 12340.000000000000000000
-12345.54321 -1 12340.000000000000000000
+0.54321 -1 0
+12345.0 -1 12340
+12345.54321 -1 12340
NULL -1 NULL
PREHOOK: query: select c, 1, trunc(CAST (c AS DECIMAL(10,5)), 1) from trunc_number order by c
PREHOOK: type: QUERY
@@ -824,9 +824,9 @@ POSTHOOK: query: select c, 1, trunc(CAST (c AS DECIMAL(10,5)), 1) from trunc_num
POSTHOOK: type: QUERY
POSTHOOK: Input: default@trunc_number
#### A masked pattern was here ####
-0.54321 1 0.500000000000000000
-12345.0 1 12345.000000000000000000
-12345.54321 1 12345.500000000000000000
+0.54321 1 0.5
+12345.0 1 12345.0
+12345.54321 1 12345.5
NULL 1 NULL
PREHOOK: query: explain vectorization detail
select c, trunc(c) from trunc_number order by c
@@ -956,13 +956,13 @@ STAGE PLANS:
native: true
vectorizationSchemaColumns: [0:c:double, 1:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>, 2:ROW__IS__DELETED:boolean]
Select Operator
- expressions: c (type: double), trunc(CAST( c AS decimal(10,5))) (type: decimal(38,18))
+ expressions: c (type: double), trunc(CAST( c AS decimal(10,5))) (type: decimal(5,0))
outputColumnNames: _col0, _col1
Select Vectorization:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0, 4]
- selectExpressions: TruncDecimalNoScale(col 3, scale 0)(children: CastDoubleToDecimal(col 0:double) -> 3:decimal(10,5)) -> 4:decimal(38,18)
+ selectExpressions: TruncDecimalNoScale(col 3, scale 0)(children: CastDoubleToDecimal(col 0:double) -> 3:decimal(10,5)) -> 4:decimal(5,0)
Statistics: Num rows: 4 Data size: 480 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: double)
@@ -974,7 +974,7 @@ STAGE PLANS:
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
nativeConditionsNotMet: hive.execution.engine mr IN [tez] IS false
Statistics: Num rows: 4 Data size: 480 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: decimal(38,18))
+ value expressions: _col1 (type: decimal(5,0))
Execution mode: vectorized
Map Vectorization:
enabled: true
@@ -990,14 +990,14 @@ STAGE PLANS:
includeColumns: [0]
dataColumns: c:double
partitionColumnCount: 0
- scratchColumnTypeNames: [decimal(10,5), decimal(38,18)]
+ scratchColumnTypeNames: [decimal(10,5), decimal(5,0)]
Reduce Vectorization:
enabled: false
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
enableConditionsNotMet: hive.execution.engine mr IN [tez] IS false
Reduce Operator Tree:
Select Operator
- expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: decimal(38,18))
+ expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: decimal(5,0))
outputColumnNames: _col0, _col1
Statistics: Num rows: 4 Data size: 480 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
@@ -1022,9 +1022,9 @@ POSTHOOK: query: select c, trunc(CAST (c AS DECIMAL(10,5))) from trunc_number or
POSTHOOK: type: QUERY
POSTHOOK: Input: default@trunc_number
#### A masked pattern was here ####
-0.54321 0.000000000000000000
-12345.0 12345.000000000000000000
-12345.54321 12345.000000000000000000
+0.54321 0
+12345.0 12345
+12345.54321 12345
NULL NULL
PREHOOK: query: select trunc(ctimestamp1, 'MM'), ctimestamp1 from alltypesorc order by ctimestamp1 LIMIT 10
PREHOOK: type: QUERY
@@ -1286,9 +1286,9 @@ POSTHOOK: query: select c, 0, trunc(CAST (c AS DECIMAL(10,5)), 0) from trunc_num
POSTHOOK: type: QUERY
POSTHOOK: Input: default@trunc_number
#### A masked pattern was here ####
-0.54321 0 0.000000000000000000
-12345.0 0 12345.000000000000000000
-12345.54321 0 12345.000000000000000000
+0.54321 0 0
+12345.0 0 12345
+12345.54321 0 12345
NULL 0 NULL
PREHOOK: query: select c, -1, trunc(CAST (c AS DECIMAL(10,5)), -1) from trunc_number order by c
PREHOOK: type: QUERY
@@ -1298,9 +1298,9 @@ POSTHOOK: query: select c, -1, trunc(CAST (c AS DECIMAL(10,5)), -1) from trunc_n
POSTHOOK: type: QUERY
POSTHOOK: Input: default@trunc_number
#### A masked pattern was here ####
-0.54321 -1 0.000000000000000000
-12345.0 -1 12340.000000000000000000
-12345.54321 -1 12340.000000000000000000
+0.54321 -1 0
+12345.0 -1 12340
+12345.54321 -1 12340
NULL -1 NULL
PREHOOK: query: select c, 1, trunc(CAST (c AS DECIMAL(10,5)), 1) from trunc_number order by c
PREHOOK: type: QUERY
@@ -1310,9 +1310,9 @@ POSTHOOK: query: select c, 1, trunc(CAST (c AS DECIMAL(10,5)), 1) from trunc_num
POSTHOOK: type: QUERY
POSTHOOK: Input: default@trunc_number
#### A masked pattern was here ####
-0.54321 1 0.500000000000000000
-12345.0 1 12345.000000000000000000
-12345.54321 1 12345.500000000000000000
+0.54321 1 0.5
+12345.0 1 12345.0
+12345.54321 1 12345.5
NULL 1 NULL
PREHOOK: query: select c, trunc(c) from trunc_number order by c
PREHOOK: type: QUERY
@@ -1334,9 +1334,9 @@ POSTHOOK: query: select c, trunc(CAST (c AS DECIMAL(10,5))) from trunc_number or
POSTHOOK: type: QUERY
POSTHOOK: Input: default@trunc_number
#### A masked pattern was here ####
-0.54321 0.000000000000000000
-12345.0 12345.000000000000000000
-12345.54321 12345.000000000000000000
+0.54321 0
+12345.0 12345
+12345.54321 12345
NULL NULL
PREHOOK: query: drop table trunc_number
PREHOOK: type: DROPTABLE