You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2018/02/16 15:52:16 UTC
[02/32] hive git commit: HIVE-18622: Vectorization: IF Statements,
Comparisons, and more do not handle NULLs correctly (Matt McCline,
reviewed by Sergey Shelukhin, Deepak Jaiswal, Vihang Karajgaonkar)
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/ql/src/test/results/clientpositive/vectorized_case.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vectorized_case.q.out b/ql/src/test/results/clientpositive/vectorized_case.q.out
index 50e9b0e..b89fa51 100644
--- a/ql/src/test/results/clientpositive/vectorized_case.q.out
+++ b/ql/src/test/results/clientpositive/vectorized_case.q.out
@@ -1,4 +1,4 @@
-PREHOOK: query: explain vectorization expression
+PREHOOK: query: explain vectorization detail
select
csmallint,
case
@@ -16,7 +16,7 @@ where csmallint = 418
or csmallint = 12205
or csmallint = 10583
PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
+POSTHOOK: query: explain vectorization detail
select
csmallint,
case
@@ -51,6 +51,7 @@ STAGE PLANS:
Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
+ vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>]
Filter Operator
Filter Vectorization:
className: VectorFilterOperator
@@ -87,6 +88,12 @@ STAGE PLANS:
allNative: false
usesVectorUDFAdaptor: false
vectorized: true
+ rowBatchContext:
+ dataColumnCount: 12
+ includeColumns: [1]
+ dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [bigint, bigint, string, string, string]
Stage: Stage-0
Fetch Operator
@@ -136,7 +143,7 @@ POSTHOOK: Input: default@alltypesorc
10583 c c
418 a a
12205 b b
-PREHOOK: query: explain vectorization expression
+PREHOOK: query: explain vectorization detail
select
csmallint,
case
@@ -154,7 +161,7 @@ where csmallint = 418
or csmallint = 12205
or csmallint = 10583
PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
+POSTHOOK: query: explain vectorization detail
select
csmallint,
case
@@ -189,6 +196,7 @@ STAGE PLANS:
Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
+ vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>]
Filter Operator
Filter Vectorization:
className: VectorFilterOperator
@@ -225,6 +233,12 @@ STAGE PLANS:
allNative: false
usesVectorUDFAdaptor: false
vectorized: true
+ rowBatchContext:
+ dataColumnCount: 12
+ includeColumns: [1]
+ dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [bigint, bigint, string, string, string, bigint, string, string]
Stage: Stage-0
Fetch Operator
@@ -232,13 +246,13 @@ STAGE PLANS:
Processor Tree:
ListSink
-PREHOOK: query: explain vectorization expression
+PREHOOK: query: explain vectorization detail
select
sum(case when cint % 2 = 0 then 1 else 0 end) as ceven,
sum(case when cint % 2 = 1 then 1 else 0 end) as codd
from alltypesorc
PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
+POSTHOOK: query: explain vectorization detail
select
sum(case when cint % 2 = 0 then 1 else 0 end) as ceven,
sum(case when cint % 2 = 1 then 1 else 0 end) as codd
@@ -261,6 +275,7 @@ STAGE PLANS:
Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
+ vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>]
Select Operator
expressions: CASE WHEN (((cint % 2) = 0)) THEN (1) ELSE (0) END (type: int), CASE WHEN (((cint % 2) = 1)) THEN (1) ELSE (0) END (type: int)
outputColumnNames: _col0, _col1
@@ -301,6 +316,12 @@ STAGE PLANS:
allNative: false
usesVectorUDFAdaptor: false
vectorized: true
+ rowBatchContext:
+ dataColumnCount: 12
+ includeColumns: [2]
+ dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [bigint, bigint, bigint]
Reduce Vectorization:
enabled: false
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
@@ -339,14 +360,14 @@ from alltypesorc
POSTHOOK: type: QUERY
POSTHOOK: Input: default@alltypesorc
#### A masked pattern was here ####
-5110 4607
-PREHOOK: query: explain vectorization expression
+4086 3583
+PREHOOK: query: explain vectorization detail
select
sum(case when cint % 2 = 0 then cint else 0 end) as ceven,
sum(case when cint % 2 = 1 then cint else 0 end) as codd
from alltypesorc
PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
+POSTHOOK: query: explain vectorization detail
select
sum(case when cint % 2 = 0 then cint else 0 end) as ceven,
sum(case when cint % 2 = 1 then cint else 0 end) as codd
@@ -369,6 +390,7 @@ STAGE PLANS:
Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
+ vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>]
Select Operator
expressions: CASE WHEN (((cint % 2) = 0)) THEN (cint) ELSE (0) END (type: int), CASE WHEN (((cint % 2) = 1)) THEN (cint) ELSE (0) END (type: int)
outputColumnNames: _col0, _col1
@@ -409,6 +431,12 @@ STAGE PLANS:
allNative: false
usesVectorUDFAdaptor: false
vectorized: true
+ rowBatchContext:
+ dataColumnCount: 12
+ includeColumns: [2]
+ dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [bigint, bigint, bigint]
Reduce Vectorization:
enabled: false
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
@@ -466,10 +494,10 @@ POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: default@test_1
POSTHOOK: Lineage: test_1.attr SCRIPT []
POSTHOOK: Lineage: test_1.member SCRIPT []
-PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE attr+2.0 END FROM test_1
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE attr+2.0 END FROM test_1
POSTHOOK: type: QUERY
PLAN VECTORIZATION:
@@ -489,6 +517,7 @@ STAGE PLANS:
Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
+ vectorizationSchemaColumns: [0:member:decimal(10,0), 1:attr:decimal(10,0), 2:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>]
Select Operator
expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE ((attr + 2)) END (type: decimal(11,0))
outputColumnNames: _col0
@@ -518,6 +547,12 @@ STAGE PLANS:
allNative: false
usesVectorUDFAdaptor: true
vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ includeColumns: [0, 1]
+ dataColumns: member:decimal(10,0), attr:decimal(10,0)
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [bigint, decimal(11,0), decimal(11,0), decimal(11,0)]
Stage: Stage-0
Fetch Operator
@@ -536,10 +571,10 @@ POSTHOOK: Input: default@test_1
3
4
4
-PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
SELECT CASE WHEN member =1.0 THEN 1.0 ELSE attr+2.0 END FROM test_1
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
SELECT CASE WHEN member =1.0 THEN 1.0 ELSE attr+2.0 END FROM test_1
POSTHOOK: type: QUERY
PLAN VECTORIZATION:
@@ -559,6 +594,7 @@ STAGE PLANS:
Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
+ vectorizationSchemaColumns: [0:member:decimal(10,0), 1:attr:decimal(10,0), 2:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>]
Select Operator
expressions: CASE WHEN ((member = 1)) THEN (1) ELSE ((attr + 2)) END (type: decimal(11,0))
outputColumnNames: _col0
@@ -588,6 +624,12 @@ STAGE PLANS:
allNative: false
usesVectorUDFAdaptor: true
vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ includeColumns: [0, 1]
+ dataColumns: member:decimal(10,0), attr:decimal(10,0)
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [bigint, decimal(11,0), decimal(11,0)]
Stage: Stage-0
Fetch Operator
@@ -606,10 +648,10 @@ POSTHOOK: Input: default@test_1
3
4
1
-PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE 2.0 END FROM test_1
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
SELECT CASE WHEN member =1.0 THEN attr+1.0 ELSE 2.0 END FROM test_1
POSTHOOK: type: QUERY
PLAN VECTORIZATION:
@@ -629,6 +671,7 @@ STAGE PLANS:
Statistics: Num rows: 3 Data size: 672 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
+ vectorizationSchemaColumns: [0:member:decimal(10,0), 1:attr:decimal(10,0), 2:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>]
Select Operator
expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE (2) END (type: decimal(11,0))
outputColumnNames: _col0
@@ -658,6 +701,12 @@ STAGE PLANS:
allNative: false
usesVectorUDFAdaptor: true
vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ includeColumns: [0, 1]
+ dataColumns: member:decimal(10,0), attr:decimal(10,0)
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [bigint, decimal(11,0), decimal(11,0)]
Stage: Stage-0
Fetch Operator
@@ -694,10 +743,10 @@ POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: default@test_2
POSTHOOK: Lineage: test_2.attr SCRIPT []
POSTHOOK: Lineage: test_2.member SCRIPT []
-PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
SELECT CASE WHEN member=1 THEN attr+1 else attr+2 END FROM test_2
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
SELECT CASE WHEN member=1 THEN attr+1 else attr+2 END FROM test_2
POSTHOOK: type: QUERY
PLAN VECTORIZATION:
@@ -717,6 +766,7 @@ STAGE PLANS:
Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
+ vectorizationSchemaColumns: [0:member:bigint, 1:attr:bigint, 2:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>]
Select Operator
expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE ((attr + 2)) END (type: bigint)
outputColumnNames: _col0
@@ -746,6 +796,12 @@ STAGE PLANS:
allNative: false
usesVectorUDFAdaptor: false
vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ includeColumns: [0, 1]
+ dataColumns: member:bigint, attr:bigint
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [bigint, bigint, bigint, bigint]
Stage: Stage-0
Fetch Operator
@@ -764,10 +820,10 @@ POSTHOOK: Input: default@test_2
3
4
4
-PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
SELECT CASE WHEN member=1 THEN null else attr+2 END FROM test_2
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
SELECT CASE WHEN member=1 THEN null else attr+2 END FROM test_2
POSTHOOK: type: QUERY
PLAN VECTORIZATION:
@@ -787,6 +843,7 @@ STAGE PLANS:
Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
+ vectorizationSchemaColumns: [0:member:bigint, 1:attr:bigint, 2:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>]
Select Operator
expressions: CASE WHEN ((member = 1)) THEN (null) ELSE ((attr + 2)) END (type: bigint)
outputColumnNames: _col0
@@ -816,6 +873,12 @@ STAGE PLANS:
allNative: false
usesVectorUDFAdaptor: false
vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ includeColumns: [0, 1]
+ dataColumns: member:bigint, attr:bigint
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [bigint, bigint, bigint]
Stage: Stage-0
Fetch Operator
@@ -834,10 +897,10 @@ POSTHOOK: Input: default@test_2
3
4
NULL
-PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
SELECT CASE WHEN member=1 THEN attr+1 else null END FROM test_2
PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
SELECT CASE WHEN member=1 THEN attr+1 else null END FROM test_2
POSTHOOK: type: QUERY
PLAN VECTORIZATION:
@@ -857,6 +920,7 @@ STAGE PLANS:
Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
+ vectorizationSchemaColumns: [0:member:bigint, 1:attr:bigint, 2:ROW__ID:struct<transactionid:bigint,bucketid:int,rowid:bigint>]
Select Operator
expressions: CASE WHEN ((member = 1)) THEN ((attr + 1)) ELSE (null) END (type: bigint)
outputColumnNames: _col0
@@ -886,6 +950,12 @@ STAGE PLANS:
allNative: false
usesVectorUDFAdaptor: false
vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ includeColumns: [0, 1]
+ dataColumns: member:bigint, attr:bigint
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [bigint, bigint, bigint]
Stage: Stage-0
Fetch Operator
@@ -904,3 +974,227 @@ POSTHOOK: Input: default@test_2
NULL
NULL
4
+PREHOOK: query: select count(*), sum(a.ceven)
+from (
+select
+ case when cint % 2 = 0 then 1 else 0 end as ceven
+from alltypesorc) a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*), sum(a.ceven)
+from (
+select
+ case when cint % 2 = 0 then 1 else 0 end as ceven
+from alltypesorc) a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+12288 4086
+PREHOOK: query: select count(*)
+from (
+select
+ (case when cint % 2 = 0 then 1 else 0 end) as ceven
+from alltypesorc
+where (case when cint % 2 = 0 then 1 else 0 end) = 0) a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*)
+from (
+select
+ (case when cint % 2 = 0 then 1 else 0 end) as ceven
+from alltypesorc
+where (case when cint % 2 = 0 then 1 else 0 end) = 0) a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+8202
+PREHOOK: query: select count(*)
+from (
+select
+ (case when cint % 2 = 0 then 1 else 0 end) as ceven
+from alltypesorc
+where (case when cint % 2 = 0 then 1 else 0 end) = 0 AND cint is NOT NULL) a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*)
+from (
+select
+ (case when cint % 2 = 0 then 1 else 0 end) as ceven
+from alltypesorc
+where (case when cint % 2 = 0 then 1 else 0 end) = 0 AND cint is NOT NULL) a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+5087
+PREHOOK: query: select count(*)
+from (
+select
+ (case when cint % 2 = 0 then 1 else 0 end) as ceven
+from alltypesorc
+where (case when cint % 2 = 0 then 1 else 0 end) = 1) a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*)
+from (
+select
+ (case when cint % 2 = 0 then 1 else 0 end) as ceven
+from alltypesorc
+where (case when cint % 2 = 0 then 1 else 0 end) = 1) a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+4086
+PREHOOK: query: select count(*)
+from (
+select
+ (case when cint % 2 = 0 then 1 else 0 end) as ceven
+from alltypesorc
+where (case when cint % 2 = 0 then 1 else 0 end) = 1 AND cint is NOT NULL) a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*)
+from (
+select
+ (case when cint % 2 = 0 then 1 else 0 end) as ceven
+from alltypesorc
+where (case when cint % 2 = 0 then 1 else 0 end) = 1 AND cint is NOT NULL) a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+4086
+PREHOOK: query: select count(*)
+from (
+select
+ (case when cint % 2 = 0 then 1 else 0 end) as ceven
+from alltypesorc
+where cint is null) a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*)
+from (
+select
+ (case when cint % 2 = 0 then 1 else 0 end) as ceven
+from alltypesorc
+where cint is null) a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+3115
+PREHOOK: query: select count(*), sum(a.ceven)
+from (
+select
+ case when cint % 2 = 0 then cint else 0 end as ceven
+from alltypesorc) a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*), sum(a.ceven)
+from (
+select
+ case when cint % 2 = 0 then cint else 0 end as ceven
+from alltypesorc) a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+12288 248718130534
+PREHOOK: query: select count(*)
+from (
+select
+ (case when cint % 2 = 0 then cint else 0 end) as ceven
+from alltypesorc
+where (case when cint % 2 = 0 then cint else 0 end) = 0) a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*)
+from (
+select
+ (case when cint % 2 = 0 then cint else 0 end) as ceven
+from alltypesorc
+where (case when cint % 2 = 0 then cint else 0 end) = 0) a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+8202
+PREHOOK: query: select count(*)
+from (
+select
+ (case when cint % 2 = 0 then cint else 0 end) as ceven
+from alltypesorc
+where (case when cint % 2 = 0 then cint else 0 end) = 0 AND cint is NOT NULL) a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*)
+from (
+select
+ (case when cint % 2 = 0 then cint else 0 end) as ceven
+from alltypesorc
+where (case when cint % 2 = 0 then cint else 0 end) = 0 AND cint is NOT NULL) a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+5087
+PREHOOK: query: select count(*)
+from (
+select
+ (case when cint % 2 = 0 then cint else 0 end) as ceven
+from alltypesorc
+where (case when cint % 2 = 0 then cint else 0 end) = cint) a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*)
+from (
+select
+ (case when cint % 2 = 0 then cint else 0 end) as ceven
+from alltypesorc
+where (case when cint % 2 = 0 then cint else 0 end) = cint) a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+4086
+PREHOOK: query: select count(*)
+from (
+select
+ (case when cint % 2 = 0 then cint else 0 end) as ceven
+from alltypesorc
+where (case when cint % 2 = 0 then cint else 0 end) = cint AND cint is NOT NULL) a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*)
+from (
+select
+ (case when cint % 2 = 0 then cint else 0 end) as ceven
+from alltypesorc
+where (case when cint % 2 = 0 then cint else 0 end) = cint AND cint is NOT NULL) a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+4086
+PREHOOK: query: select count(*)
+from (
+select
+ (case when cint % 2 = 0 then cint else 0 end) as ceven
+from alltypesorc
+where cint is null) a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*)
+from (
+select
+ (case when cint % 2 = 0 then cint else 0 end) as ceven
+from alltypesorc
+where cint is null) a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+3115
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java
----------------------------------------------------------------------
diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java
index b743e64..5e25c47 100644
--- a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java
+++ b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector;
+import java.util.Arrays;
+
/**
* This class supports string and binary data by value reference -- i.e. each field is
@@ -93,7 +95,12 @@ public class BytesColumnVector extends ColumnVector {
initBuffer(0);
}
- /** Set a field by reference.
+ /**
+ * Set a field by reference.
+ *
+ * This is a FAST version that assumes the caller has checked to make sure the sourceBuf
+ * is not null and elementNum is correctly adjusted for isRepeating. And, that the isNull entry
+ * has been set. Only the output entry fields will be set by this method.
*
* @param elementNum index within column vector to set
* @param sourceBuf container of source data
@@ -161,6 +168,10 @@ public class BytesColumnVector extends ColumnVector {
* DO NOT USE this method unless it's not practical to set data by reference with setRef().
* Setting data by reference tends to run a lot faster than copying data in.
*
+ * This is a FAST version that assumes the caller has checked to make sure the sourceBuf
+ * is not null and elementNum is correctly adjusted for isRepeating. And, that the isNull entry
+ * has been set. Only the output entry fields will be set by this method.
+ *
* @param elementNum index within column vector to set
* @param sourceBuf container of source data
* @param start start byte position within source
@@ -183,6 +194,10 @@ public class BytesColumnVector extends ColumnVector {
* DO NOT USE this method unless it's not practical to set data by reference with setRef().
* Setting data by reference tends to run a lot faster than copying data in.
*
+ * This is a FAST version that assumes the caller has checked to make sure the sourceBuf
+ * is not null and elementNum is correctly adjusted for isRepeating. And, that the isNull entry
+ * has been set. Only the output entry fields will be set by this method.
+ *
* @param elementNum index within column vector to set
* @param sourceBuf container of source data
*/
@@ -309,46 +324,86 @@ public class BytesColumnVector extends ColumnVector {
/** Copy the current object contents into the output. Only copy selected entries,
* as indicated by selectedInUse and the sel array.
*/
+ @Override
public void copySelected(
- boolean selectedInUse, int[] sel, int size, BytesColumnVector output) {
+ boolean selectedInUse, int[] sel, int size, ColumnVector outputColVector) {
- // Output has nulls if and only if input has nulls.
- output.noNulls = noNulls;
+ BytesColumnVector output = (BytesColumnVector) outputColVector;
+ boolean[] outputIsNull = output.isNull;
+
+ // We do not need to do a column reset since we are carefully changing the output.
output.isRepeating = false;
// Handle repeating case
if (isRepeating) {
- output.setVal(0, vector[0], start[0], length[0]);
- output.isNull[0] = isNull[0];
+ if (noNulls || !isNull[0]) {
+ outputIsNull[0] = false;
+ output.setVal(0, vector[0], start[0], length[0]);
+ } else {
+ outputIsNull[0] = true;
+ output.noNulls = false;
+ }
output.isRepeating = true;
return;
}
// Handle normal case
- // Copy data values over
- if (selectedInUse) {
- for (int j = 0; j < size; j++) {
- int i = sel[j];
- output.setVal(i, vector[i], start[i], length[i]);
- }
- }
- else {
- for (int i = 0; i < size; i++) {
- output.setVal(i, vector[i], start[i], length[i]);
+ if (noNulls) {
+ if (selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != size; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ output.setVal(i, vector[i], start[i], length[i]);
+ }
+ } else {
+ for(int j = 0; j != size; j++) {
+ final int i = sel[j];
+ output.setVal(i, vector[i], start[i], length[i]);
+ }
+ }
+ } else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
+ for(int i = 0; i != size; i++) {
+ output.setVal(i, vector[i], start[i], length[i]);
+ }
}
- }
+ } else /* there are nulls in our column */ {
+
+ // Carefully handle NULLs...
- // Copy nulls over if needed
- if (!noNulls) {
if (selectedInUse) {
for (int j = 0; j < size; j++) {
int i = sel[j];
- output.isNull[i] = isNull[i];
+ if (!isNull[i]) {
+ output.isNull[i] = false;
+ output.setVal(i, vector[i], start[i], length[i]);
+ } else {
+ output.isNull[i] = true;
+ output.noNulls = false;
+ }
+ }
+ } else {
+ for (int i = 0; i < size; i++) {
+ if (!isNull[i]) {
+ output.isNull[i] = false;
+ output.setVal(i, vector[i], start[i], length[i]);
+ } else {
+ output.isNull[i] = true;
+ output.noNulls = false;
+ }
}
- }
- else {
- System.arraycopy(isNull, 0, output.isNull, 0, size);
}
}
}
@@ -390,9 +445,9 @@ public class BytesColumnVector extends ColumnVector {
// Fill the all the vector entries with provided value
public void fill(byte[] value) {
- noNulls = true;
isRepeating = true;
- setRef(0, value, 0, value.length);
+ isNull[0] = false;
+ setVal(0, value, 0, value.length);
}
// Fill the column vector with nulls
@@ -403,18 +458,55 @@ public class BytesColumnVector extends ColumnVector {
isNull[0] = true;
}
+ /**
+ * Set the element in this column vector from the given input vector.
+ *
+ * The inputElementNum will be adjusted to 0 if the input column has isRepeating set.
+ *
+ * On the other hand, the outElementNum must have been adjusted to 0 in ADVANCE when the output
+ * has isRepeating set.
+ *
+ * IMPORTANT: if the output entry is marked as NULL, this method will do NOTHING. This
+ * supports the caller to do output NULL processing in advance that may cause the output results
+ * operation to be ignored. Thus, make sure the output isNull entry is set in ADVANCE.
+ *
+ * The inputColVector noNulls and isNull entry will be examined. The output will only
+ * be set if the input is NOT NULL. I.e. noNulls || !isNull[inputElementNum] where
+ * inputElementNum may have been adjusted to 0 for isRepeating.
+ *
+ * If the input entry is NULL or out-of-range, the output will be marked as NULL.
+ * I.e. set output noNull = false and isNull[outElementNum] = true. An example of out-of-range
+ * is the DecimalColumnVector which can find the input decimal does not fit in the output
+ * precision/scale.
+ *
+ * (Since we return immediately if the output entry is NULL, we have no need and do not mark
+ * the output entry to NOT NULL).
+ *
+ */
@Override
- public void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) {
- if (inputVector.isRepeating) {
+ public void setElement(int outputElementNum, int inputElementNum, ColumnVector inputColVector) {
+
+ // Invariants.
+ if (isRepeating && outputElementNum != 0) {
+ throw new AssertionError("Output column number expected to be 0 when isRepeating");
+ }
+ if (inputColVector.isRepeating) {
inputElementNum = 0;
}
- if (inputVector.noNulls || !inputVector.isNull[inputElementNum]) {
- isNull[outElementNum] = false;
- BytesColumnVector in = (BytesColumnVector) inputVector;
- setVal(outElementNum, in.vector[inputElementNum],
+
+ // Do NOTHING if output is NULL.
+ if (!noNulls && isNull[outputElementNum]) {
+ return;
+ }
+
+ if (inputColVector.noNulls || !inputColVector.isNull[inputElementNum]) {
+ BytesColumnVector in = (BytesColumnVector) inputColVector;
+ setVal(outputElementNum, in.vector[inputElementNum],
in.start[inputElementNum], in.length[inputElementNum]);
} else {
- isNull[outElementNum] = true;
+
+ // Only mark output NULL when input is NULL.
+ isNull[outputElementNum] = true;
noNulls = false;
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java
----------------------------------------------------------------------
diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java
index bce0bd7..a498428 100644
--- a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java
+++ b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java
@@ -114,120 +114,148 @@ public abstract class ColumnVector {
abstract public void flatten(boolean selectedInUse, int[] sel, int size);
- // Simplify vector by brute-force flattening noNulls if isRepeating
- // This can be used to reduce combinatorial explosion of code paths in VectorExpressions
- // with many arguments.
- protected void flattenRepeatingNulls(boolean selectedInUse, int[] sel,
- int size) {
+ // Simplify vector by brute-force flattening noNulls if isRepeating
+ // This can be used to reduce combinatorial explosion of code paths in VectorExpressions
+ // with many arguments.
+ protected void flattenRepeatingNulls(boolean selectedInUse, int[] sel,
+ int size) {
- boolean nullFillValue;
+ boolean nullFillValue;
- if (noNulls) {
- nullFillValue = false;
- } else {
- nullFillValue = isNull[0];
+ if (noNulls) {
+ nullFillValue = false;
+ } else {
+ nullFillValue = isNull[0];
+ }
+
+ if (selectedInUse) {
+ for (int j = 0; j < size; j++) {
+ int i = sel[j];
+ isNull[i] = nullFillValue;
}
+ } else {
+ Arrays.fill(isNull, 0, size, nullFillValue);
+ }
+
+ // all nulls are now explicit
+ noNulls = false;
+ }
+ protected void flattenNoNulls(boolean selectedInUse, int[] sel,
+ int size) {
+ if (noNulls) {
+ noNulls = false;
if (selectedInUse) {
for (int j = 0; j < size; j++) {
- int i = sel[j];
- isNull[i] = nullFillValue;
+ isNull[sel[j]] = false;
}
} else {
- Arrays.fill(isNull, 0, size, nullFillValue);
+ Arrays.fill(isNull, 0, size, false);
}
-
- // all nulls are now explicit
- noNulls = false;
}
+ }
- protected void flattenNoNulls(boolean selectedInUse, int[] sel,
- int size) {
- if (noNulls) {
- noNulls = false;
- if (selectedInUse) {
- for (int j = 0; j < size; j++) {
- isNull[sel[j]] = false;
- }
- } else {
- Arrays.fill(isNull, 0, size, false);
- }
- }
- }
+ /**
+ * Restore the state of isRepeating and noNulls to what it was
+ * before flattening. This must only be called just after flattening
+ * and then evaluating a VectorExpression on the column vector.
+ * It is an optimization that allows other operations on the same
+ * column to continue to benefit from the isRepeating and noNulls
+ * indicators.
+ */
+ public void unFlatten() {
+ isRepeating = preFlattenIsRepeating;
+ noNulls = preFlattenNoNulls;
+ }
- /**
- * Restore the state of isRepeating and noNulls to what it was
- * before flattening. This must only be called just after flattening
- * and then evaluating a VectorExpression on the column vector.
- * It is an optimization that allows other operations on the same
- * column to continue to benefit from the isRepeating and noNulls
- * indicators.
- */
- public void unFlatten() {
- isRepeating = preFlattenIsRepeating;
- noNulls = preFlattenNoNulls;
- }
+ // Record repeating and no nulls state to be restored later.
+ protected void flattenPush() {
+ preFlattenIsRepeating = isRepeating;
+ preFlattenNoNulls = noNulls;
+ }
- // Record repeating and no nulls state to be restored later.
- protected void flattenPush() {
- preFlattenIsRepeating = isRepeating;
- preFlattenNoNulls = noNulls;
- }
+ /**
+ * Set the element in this column vector from the given input vector.
+ *
+ * The inputElementNum will be adjusted to 0 if the input column has isRepeating set.
+ *
+ * On the other hand, the outElementNum must have been adjusted to 0 in ADVANCE when the output
+ * has isRepeating set.
+ *
+ * IMPORTANT: if the output entry is marked as NULL, this method will do NOTHING. This
+ * supports the caller to do output NULL processing in advance that may cause the output results
+ * operation to be ignored. Thus, make sure the output isNull entry is set in ADVANCE.
+ *
+ * The inputColVector noNulls and isNull entry will be examined. The output will only
+ * be set if the input is NOT NULL. I.e. noNulls || !isNull[inputElementNum] where
+ * inputElementNum may have been adjusted to 0 for isRepeating.
+ *
+ * If the input entry is NULL or out-of-range, the output will be marked as NULL.
+ * I.e. set output noNull = false and isNull[outElementNum] = true. An example of out-of-range
+ * is the DecimalColumnVector which can find the input decimal does not fit in the output
+ * precision/scale.
+ *
+ * (Since we return immediately if the output entry is NULL, we have no need and do not mark
+ * the output entry to NOT NULL).
+ *
+ */
+ public abstract void setElement(int outputElementNum, int inputElementNum,
+ ColumnVector inputColVector);
- /**
- * Set the element in this column vector from the given input vector.
- * This method can assume that the output does not have isRepeating set.
- */
- public abstract void setElement(int outElementNum, int inputElementNum,
- ColumnVector inputVector);
-
- /**
- * Initialize the column vector. This method can be overridden by specific column vector types.
- * Use this method only if the individual type of the column vector is not known, otherwise its
- * preferable to call specific initialization methods.
- */
- public void init() {
- // Do nothing by default
- }
+ /*
+ * Copy the current object contents into the output. Only copy selected entries
+ * as indicated by selectedInUse and the sel array.
+ */
+ public abstract void copySelected(
+ boolean selectedInUse, int[] sel, int size, ColumnVector outputColVector);
- /**
- * Ensure the ColumnVector can hold at least size values.
- * This method is deliberately *not* recursive because the complex types
- * can easily have more (or less) children than the upper levels.
- * @param size the new minimum size
- * @param preserveData should the old data be preserved?
- */
- public void ensureSize(int size, boolean preserveData) {
- if (isNull.length < size) {
- boolean[] oldArray = isNull;
- isNull = new boolean[size];
- if (preserveData && !noNulls) {
- if (isRepeating) {
- isNull[0] = oldArray[0];
- } else {
- System.arraycopy(oldArray, 0, isNull, 0, oldArray.length);
- }
+ /**
+ * Initialize the column vector. This method can be overridden by specific column vector types.
+ * Use this method only if the individual type of the column vector is not known, otherwise its
+ * preferable to call specific initialization methods.
+ */
+ public void init() {
+ // Do nothing by default
+ }
+
+ /**
+ * Ensure the ColumnVector can hold at least size values.
+ * This method is deliberately *not* recursive because the complex types
+ * can easily have more (or less) children than the upper levels.
+ * @param size the new minimum size
+ * @param preserveData should the old data be preserved?
+ */
+ public void ensureSize(int size, boolean preserveData) {
+ if (isNull.length < size) {
+ boolean[] oldArray = isNull;
+ isNull = new boolean[size];
+ if (preserveData && !noNulls) {
+ if (isRepeating) {
+ isNull[0] = oldArray[0];
+ } else {
+ System.arraycopy(oldArray, 0, isNull, 0, oldArray.length);
}
}
}
+ }
- /**
- * Print the value for this column into the given string builder.
- * @param buffer the buffer to print into
- * @param row the id of the row to print
- */
- public abstract void stringifyValue(StringBuilder buffer,
- int row);
-
- /**
- * Shallow copy of the contents of this vector to the other vector;
- * replaces other vector's values.
- */
- public void shallowCopyTo(ColumnVector otherCv) {
- otherCv.isNull = isNull;
- otherCv.noNulls = noNulls;
- otherCv.isRepeating = isRepeating;
- otherCv.preFlattenIsRepeating = preFlattenIsRepeating;
- otherCv.preFlattenNoNulls = preFlattenNoNulls;
- }
+ /**
+ * Print the value for this column into the given string builder.
+ * @param buffer the buffer to print into
+ * @param row the id of the row to print
+ */
+ public abstract void stringifyValue(StringBuilder buffer,
+ int row);
+
+ /**
+ * Shallow copy of the contents of this vector to the other vector;
+ * replaces other vector's values.
+ */
+ public void shallowCopyTo(ColumnVector otherCv) {
+ otherCv.isNull = isNull;
+ otherCv.noNulls = noNulls;
+ otherCv.isRepeating = isRepeating;
+ otherCv.preFlattenIsRepeating = preFlattenIsRepeating;
+ otherCv.preFlattenNoNulls = preFlattenNoNulls;
}
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/Decimal64ColumnVector.java
----------------------------------------------------------------------
diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/Decimal64ColumnVector.java b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/Decimal64ColumnVector.java
index 37b0bf5..615eb6f 100644
--- a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/Decimal64ColumnVector.java
+++ b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/Decimal64ColumnVector.java
@@ -28,7 +28,7 @@ public class Decimal64ColumnVector extends LongColumnVector {
public short scale;
public short precision;
- private HiveDecimalWritable tempHiveDecWritable;
+ private HiveDecimalWritable scratchHiveDecWritable;
public Decimal64ColumnVector(int precision, int scale) {
this(VectorizedRowBatch.DEFAULT_SIZE, precision, scale);
@@ -38,30 +38,124 @@ public class Decimal64ColumnVector extends LongColumnVector {
super(size);
this.precision = (short) precision;
this.scale = (short) scale;
- tempHiveDecWritable = new HiveDecimalWritable();
+ scratchHiveDecWritable = new HiveDecimalWritable();
}
+ /**
+ * Set a Decimal64 field from a HiveDecimalWritable.
+ *
+ * This is a FAST version that assumes the caller has checked to make sure the writable
+ * is not null and elementNum is correctly adjusted for isRepeating. And, that the isNull entry
+ * has been set.
+ *
+ * We will check for precision/scale range, so the entry's NULL may get set.
+ * Otherwise, only the output entry fields will be set by this method.
+ *
+ * @param elementNum
+ * @param writable
+ */
public void set(int elementNum, HiveDecimalWritable writable) {
- tempHiveDecWritable.set(writable);
- tempHiveDecWritable.mutateEnforcePrecisionScale(precision, scale);
- if (!tempHiveDecWritable.isSet()) {
+ scratchHiveDecWritable.set(writable);
+ scratchHiveDecWritable.mutateEnforcePrecisionScale(precision, scale);
+ if (!scratchHiveDecWritable.isSet()) {
noNulls = false;
isNull[elementNum] = true;
} else {
- isNull[elementNum] = false;
- vector[elementNum] = tempHiveDecWritable.serialize64(scale);
+ vector[elementNum] = scratchHiveDecWritable.serialize64(scale);
}
}
+ /**
+ * Set a Decimal64 field from a HiveDecimal.
+ *
+ * This is a FAST version that assumes the caller has checked to make sure the hiveDec
+ * is not null and elementNum is correctly adjusted for isRepeating. And, that the isNull entry
+ * has been set.
+ *
+ * We will check for precision/scale range, so the entry's NULL may get set.
+ * Otherwise, only the output entry fields will be set by this method.
+ *
+ * @param elementNum
+ * @param hiveDec
+ */
public void set(int elementNum, HiveDecimal hiveDec) {
- tempHiveDecWritable.set(hiveDec);
- tempHiveDecWritable.mutateEnforcePrecisionScale(precision, scale);
- if (!tempHiveDecWritable.isSet()) {
+ scratchHiveDecWritable.set(hiveDec);
+ scratchHiveDecWritable.mutateEnforcePrecisionScale(precision, scale);
+ if (!scratchHiveDecWritable.isSet()) {
noNulls = false;
isNull[elementNum] = true;
} else {
- isNull[elementNum] = false;
- vector[elementNum] = tempHiveDecWritable.serialize64(scale);
+ vector[elementNum] = scratchHiveDecWritable.serialize64(scale);
}
}
+
+ /**
+ * Set the element in this column vector from the given input vector.
+ *
+ * The inputElementNum will be adjusted to 0 if the input column has isRepeating set.
+ *
+ * On the other hand, the outElementNum must have been adjusted to 0 in ADVANCE when the output
+ * has isRepeating set.
+ *
+ * IMPORTANT: if the output entry is marked as NULL, this method will do NOTHING. This
+ * supports the caller to do output NULL processing in advance that may cause the output results
+ * operation to be ignored. Thus, make sure the output isNull entry is set in ADVANCE.
+ *
+ * The inputColVector noNulls and isNull entry will be examined. The output will only
+ * be set if the input is NOT NULL. I.e. noNulls || !isNull[inputElementNum] where
+ * inputElementNum may have been adjusted to 0 for isRepeating.
+ *
+ * If the input entry is NULL or out-of-range, the output will be marked as NULL.
+ * I.e. set output noNull = false and isNull[outElementNum] = true. An example of out-of-range
+ * is the DecimalColumnVector which can find the input decimal does not fit in the output
+ * precision/scale.
+ *
+ * (Since we return immediately if the output entry is NULL, we have no need and do not mark
+ * the output entry to NOT NULL).
+ *
+ */
+ @Override
+ public void setElement(int outputElementNum, int inputElementNum, ColumnVector inputColVector) {
+
+ // Invariants.
+ if (isRepeating && outputElementNum != 0) {
+ throw new RuntimeException("Output column number expected to be 0 when isRepeating");
+ }
+ if (inputColVector.isRepeating) {
+ inputElementNum = 0;
+ }
+
+ // Do NOTHING if output is NULL.
+ if (!noNulls && isNull[outputElementNum]) {
+ return;
+ }
+
+ if (inputColVector.noNulls || !inputColVector.isNull[inputElementNum]) {
+ Decimal64ColumnVector decimal64ColVector = (Decimal64ColumnVector) inputColVector;
+ scratchHiveDecWritable.deserialize64(
+ decimal64ColVector.vector[inputElementNum], decimal64ColVector.scale);
+ scratchHiveDecWritable.mutateEnforcePrecisionScale(precision, scale);
+ if (scratchHiveDecWritable.isSet()) {
+ vector[inputElementNum] = scratchHiveDecWritable.serialize64(scale);
+ } else {
+
+ // In effect, the input is NULL because of out-of-range precision/scale.
+ noNulls = false;
+ isNull[inputElementNum] = true;
+ }
+ } else {
+
+ // Only mark output NULL when input is NULL.
+ isNull[outputElementNum] = true;
+ noNulls = false;
+ }
+ }
+
+ /**
+ * Return a convenience writable object stored by this column vector.
+ * @return
+ */
+ public HiveDecimalWritable getScratchWritable() {
+ return scratchHiveDecWritable;
+ }
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java
----------------------------------------------------------------------
diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java
index e41e19f..c1d6a3a 100644
--- a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java
+++ b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java
@@ -19,6 +19,8 @@
package org.apache.hadoop.hive.ql.exec.vector;
+import java.util.Arrays;
+
import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
import org.apache.hadoop.hive.common.type.HiveDecimal;
@@ -51,37 +53,74 @@ public class DecimalColumnVector extends ColumnVector {
// Fill the all the vector entries with provided value
public void fill(HiveDecimal value) {
- noNulls = true;
isRepeating = true;
+ isNull[0] = false;
if (vector[0] == null) {
vector[0] = new HiveDecimalWritable(value);
- } else {
- vector[0].set(value);
}
+ set(0, value);
}
@Override
public void flatten(boolean selectedInUse, int[] sel, int size) {
- // TODO Auto-generated method stub
+ throw new RuntimeException("Not implemented");
}
+ /**
+ * Set the element in this column vector from the given input vector.
+ *
+ * The inputElementNum will be adjusted to 0 if the input column has isRepeating set.
+ *
+ * On the other hand, the outElementNum must have been adjusted to 0 in ADVANCE when the output
+ * has isRepeating set.
+ *
+ * IMPORTANT: if the output entry is marked as NULL, this method will do NOTHING. This
+ * supports the caller to do output NULL processing in advance that may cause the output results
+ * operation to be ignored. Thus, make sure the output isNull entry is set in ADVANCE.
+ *
+ * The inputColVector noNulls and isNull entry will be examined. The output will only
+ * be set if the input is NOT NULL. I.e. noNulls || !isNull[inputElementNum] where
+ * inputElementNum may have been adjusted to 0 for isRepeating.
+ *
+ * If the input entry is NULL or out-of-range, the output will be marked as NULL.
+ * I.e. set output noNull = false and isNull[outElementNum] = true. An example of out-of-range
+ * is the DecimalColumnVector which can find the input decimal does not fit in the output
+ * precision/scale.
+ *
+ * (Since we return immediately if the output entry is NULL, we have no need and do not mark
+ * the output entry to NOT NULL).
+ *
+ */
@Override
- public void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) {
- if (inputVector.isRepeating) {
+ public void setElement(int outputElementNum, int inputElementNum, ColumnVector inputColVector) {
+
+ // Invariants.
+ if (isRepeating && outputElementNum != 0) {
+ throw new RuntimeException("Output column number expected to be 0 when isRepeating");
+ }
+ if (inputColVector.isRepeating) {
inputElementNum = 0;
}
- if (inputVector.noNulls || !inputVector.isNull[inputElementNum]) {
- vector[outElementNum].set(
- ((DecimalColumnVector) inputVector).vector[inputElementNum],
+
+ // Do NOTHING if output is NULL.
+ if (!noNulls && isNull[outputElementNum]) {
+ return;
+ }
+
+ if (inputColVector.noNulls || !inputColVector.isNull[inputElementNum]) {
+ vector[outputElementNum].set(
+ ((DecimalColumnVector) inputColVector).vector[inputElementNum],
precision, scale);
- if (!vector[outElementNum].isSet()) {
- isNull[outElementNum] = true;
+ if (!vector[outputElementNum].isSet()) {
+
+ // In effect, the input is NULL because of out-of-range precision/scale.
+ isNull[outputElementNum] = true;
noNulls = false;
- } else {
- isNull[outElementNum] = false;
}
} else {
- isNull[outElementNum] = true;
+
+ // Only mark output NULL when input is NULL.
+ isNull[outputElementNum] = true;
noNulls = false;
}
}
@@ -98,23 +137,45 @@ public class DecimalColumnVector extends ColumnVector {
}
}
- public void set(int elementNum, HiveDecimalWritable writeable) {
- vector[elementNum].set(writeable, precision, scale);
+ /**
+ * Set a Decimal64 field from a HiveDecimalWritable.
+ *
+ * This is a FAST version that assumes the caller has checked to make sure the writable
+ * is not null and elementNum is correctly adjusted for isRepeating. And, that the isNull entry
+ * has been set.
+ *
+ * We will check for precision/scale range, so the entry's NULL may get set.
+ * Otherwise, only the output entry fields will be set by this method.
+ *
+ * @param elementNum
+ * @param writable
+ */
+ public void set(int elementNum, HiveDecimalWritable writable) {
+ vector[elementNum].set(writable, precision, scale);
if (!vector[elementNum].isSet()) {
noNulls = false;
isNull[elementNum] = true;
- } else {
- isNull[elementNum] = false;
}
}
+ /**
+ * Set a decimal from a HiveDecimal.
+ *
+ * This is a FAST version that assumes the caller has checked to make sure the hiveDec
+ * is not null and elementNum is correctly adjusted for isRepeating. And, that the isNull entry
+ * has been set.
+ *
+ * We will check for precision/scale range, so the entry's NULL may get set.
+ * Otherwise, only the output entry fields will be set by this method.
+ *
+ * @param elementNum
+ * @param hiveDec
+ */
public void set(int elementNum, HiveDecimal hiveDec) {
vector[elementNum].set(hiveDec, precision, scale);
if (!vector[elementNum].isSet()) {
noNulls = false;
isNull[elementNum] = true;
- } else {
- isNull[elementNum] = false;
}
}
@@ -149,4 +210,91 @@ public class DecimalColumnVector extends ColumnVector {
other.precision = precision;
other.vector = vector;
}
+
+ // Copy the current object contents into the output. Only copy selected entries,
+ // as indicated by selectedInUse and the sel array.
+ @Override
+ public void copySelected(
+ boolean selectedInUse, int[] sel, int size, ColumnVector outputColVector) {
+
+ DecimalColumnVector output = (DecimalColumnVector) outputColVector;
+ boolean[] outputIsNull = output.isNull;
+
+ // We do not need to do a column reset since we are carefully changing the output.
+ output.isRepeating = false;
+
+ // Handle repeating case
+ if (isRepeating) {
+ if (noNulls || !isNull[0]) {
+ outputIsNull[0] = false;
+ output.set(0, vector[0]);
+ } else {
+ outputIsNull[0] = true;
+ output.noNulls = false;
+ output.vector[0].setFromLong(0);
+ }
+ output.isRepeating = true;
+ return;
+ }
+
+ // Handle normal case
+
+ if (noNulls) {
+ if (selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != size; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ output.set(i, vector[i]);
+ }
+ } else {
+ for(int j = 0; j != size; j++) {
+ final int i = sel[j];
+ output.set(i, vector[i]);
+ }
+ }
+ } else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
+ for(int i = 0; i != size; i++) {
+ output.set(i, vector[i]);
+ }
+ }
+ } else /* there are nulls in our column */ {
+
+ // Carefully handle NULLs...
+
+ if (selectedInUse) {
+ for (int j = 0; j < size; j++) {
+ int i = sel[j];
+ if (!isNull[i]) {
+ output.isNull[i] = false;
+ output.set(i, vector[i]);
+ } else {
+ output.isNull[i] = true;
+ output.noNulls = false;
+ }
+ }
+ } else {
+ for (int i = 0; i < size; i++) {
+ if (!isNull[i]) {
+ output.isNull[i] = false;
+ output.set(i, vector[i]);
+ } else {
+ output.isNull[i] = true;
+ output.noNulls = false;
+ }
+ }
+ }
+ }
+ }
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java
----------------------------------------------------------------------
diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java
index e04af01..f833bde 100644
--- a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java
+++ b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java
@@ -54,52 +54,88 @@ public class DoubleColumnVector extends ColumnVector {
// Copy the current object contents into the output. Only copy selected entries,
// as indicated by selectedInUse and the sel array.
+ @Override
public void copySelected(
- boolean selectedInUse, int[] sel, int size, DoubleColumnVector output) {
+ boolean selectedInUse, int[] sel, int size, ColumnVector outputColVector) {
+
+ DoubleColumnVector output = (DoubleColumnVector) outputColVector;
+ boolean[] outputIsNull = output.isNull;
- // Output has nulls if and only if input has nulls.
- output.noNulls = noNulls;
+ // We do not need to do a column reset since we are carefully changing the output.
output.isRepeating = false;
// Handle repeating case
if (isRepeating) {
- output.vector[0] = vector[0];
- output.isNull[0] = isNull[0];
+ if (noNulls || !isNull[0]) {
+ outputIsNull[0] = false;
+ output.vector[0] = vector[0];
+ } else {
+ outputIsNull[0] = true;
+ output.noNulls = false;
+ }
output.isRepeating = true;
return;
}
// Handle normal case
- // Copy data values over
- if (selectedInUse) {
- for (int j = 0; j < size; j++) {
- int i = sel[j];
- output.vector[i] = vector[i];
+ if (noNulls) {
+ if (selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != size; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ output.vector[i] = vector[i];
+ }
+ } else {
+ for(int j = 0; j != size; j++) {
+ final int i = sel[j];
+ output.vector[i] = vector[i];
+ }
+ }
+ } else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
+ System.arraycopy(vector, 0, output.vector, 0, size);
}
- }
- else {
- System.arraycopy(vector, 0, output.vector, 0, size);
- }
+ } else /* there are nulls in our column */ {
+
+ // Carefully handle NULLs...
+
+ /*
+ * For better performance on LONG/DOUBLE we don't want the conditional
+ * statements inside the for loop.
+ */
+ output.noNulls = false;
- // Copy nulls over if needed
- if (!noNulls) {
if (selectedInUse) {
for (int j = 0; j < size; j++) {
int i = sel[j];
output.isNull[i] = isNull[i];
+ output.vector[i] = vector[i];
}
- }
- else {
+ } else {
System.arraycopy(isNull, 0, output.isNull, 0, size);
+ for (int i = 0; i < size; i++) {
+ output.vector[i] = vector[i];
+ }
}
}
}
// Fill the column vector with the provided value
public void fill(double value) {
- noNulls = true;
isRepeating = true;
+ isNull[0] = false;
vector[0] = value;
}
@@ -132,17 +168,54 @@ public class DoubleColumnVector extends ColumnVector {
flattenNoNulls(selectedInUse, sel, size);
}
+ /**
+ * Set the element in this column vector from the given input vector.
+ *
+ * The inputElementNum will be adjusted to 0 if the input column has isRepeating set.
+ *
+ * On the other hand, the outElementNum must have been adjusted to 0 in ADVANCE when the output
+ * has isRepeating set.
+ *
+ * IMPORTANT: if the output entry is marked as NULL, this method will do NOTHING. This
+ * supports the caller to do output NULL processing in advance that may cause the output results
+ * operation to be ignored. Thus, make sure the output isNull entry is set in ADVANCE.
+ *
+ * The inputColVector noNulls and isNull entry will be examined. The output will only
+ * be set if the input is NOT NULL. I.e. noNulls || !isNull[inputElementNum] where
+ * inputElementNum may have been adjusted to 0 for isRepeating.
+ *
+ * If the input entry is NULL or out-of-range, the output will be marked as NULL.
+ * I.e. set output noNull = false and isNull[outElementNum] = true. An example of out-of-range
+ * is the DecimalColumnVector which can find the input decimal does not fit in the output
+ * precision/scale.
+ *
+ * (Since we return immediately if the output entry is NULL, we have no need and do not mark
+ * the output entry to NOT NULL).
+ *
+ */
@Override
- public void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) {
- if (inputVector.isRepeating) {
+ public void setElement(int outputElementNum, int inputElementNum, ColumnVector inputColVector) {
+
+ // Invariants.
+ if (isRepeating && outputElementNum != 0) {
+ throw new RuntimeException("Output column number expected to be 0 when isRepeating");
+ }
+ if (inputColVector.isRepeating) {
inputElementNum = 0;
}
- if (inputVector.noNulls || !inputVector.isNull[inputElementNum]) {
- isNull[outElementNum] = false;
- vector[outElementNum] =
- ((DoubleColumnVector) inputVector).vector[inputElementNum];
+
+ // Do NOTHING if output is NULL.
+ if (!noNulls && isNull[outputElementNum]) {
+ return;
+ }
+
+ if (inputColVector.noNulls || !inputColVector.isNull[inputElementNum]) {
+ vector[outputElementNum] =
+ ((DoubleColumnVector) inputColVector).vector[inputElementNum];
} else {
- isNull[outElementNum] = true;
+
+ // Only mark output NULL when input is NULL.
+ isNull[outputElementNum] = true;
noNulls = false;
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/IntervalDayTimeColumnVector.java
----------------------------------------------------------------------
diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/IntervalDayTimeColumnVector.java b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/IntervalDayTimeColumnVector.java
index f813b1b..9324bc0 100644
--- a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/IntervalDayTimeColumnVector.java
+++ b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/IntervalDayTimeColumnVector.java
@@ -195,13 +195,57 @@ public class IntervalDayTimeColumnVector extends ColumnVector {
asScratchIntervalDayTime(elementNum2));
}
+ /**
+ * Set the element in this column vector from the given input vector.
+ *
+ * The inputElementNum will be adjusted to 0 if the input column has isRepeating set.
+ *
+ * On the other hand, the outElementNum must have been adjusted to 0 in ADVANCE when the output
+ * has isRepeating set.
+ *
+ * IMPORTANT: if the output entry is marked as NULL, this method will do NOTHING. This
+ * supports the caller to do output NULL processing in advance that may cause the output results
+ * operation to be ignored. Thus, make sure the output isNull entry is set in ADVANCE.
+ *
+ * The inputColVector noNulls and isNull entry will be examined. The output will only
+ * be set if the input is NOT NULL. I.e. noNulls || !isNull[inputElementNum] where
+ * inputElementNum may have been adjusted to 0 for isRepeating.
+ *
+ * If the input entry is NULL or out-of-range, the output will be marked as NULL.
+ * I.e. set output noNull = false and isNull[outElementNum] = true. An example of out-of-range
+ * is the DecimalColumnVector which can find the input decimal does not fit in the output
+ * precision/scale.
+ *
+ * (Since we return immediately if the output entry is NULL, we have no need and do not mark
+ * the output entry to NOT NULL).
+ *
+ */
@Override
- public void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) {
+ public void setElement(int outputElementNum, int inputElementNum, ColumnVector inputColVector) {
+
+ // Invariants.
+ if (isRepeating && outputElementNum != 0) {
+ throw new RuntimeException("Output column number expected to be 0 when isRepeating");
+ }
+ if (inputColVector.isRepeating) {
+ inputElementNum = 0;
+ }
+
+ // Do NOTHING if output is NULL.
+ if (!noNulls && isNull[outputElementNum]) {
+ return;
+ }
- IntervalDayTimeColumnVector timestampColVector = (IntervalDayTimeColumnVector) inputVector;
+ if (inputColVector.noNulls || !inputColVector.isNull[inputElementNum]) {
+ IntervalDayTimeColumnVector timestampColVector = (IntervalDayTimeColumnVector) inputColVector;
+ totalSeconds[outputElementNum] = timestampColVector.totalSeconds[inputElementNum];
+ nanos[outputElementNum] = timestampColVector.nanos[inputElementNum];
+ } else {
- totalSeconds[outElementNum] = timestampColVector.totalSeconds[inputElementNum];
- nanos[outElementNum] = timestampColVector.nanos[inputElementNum];
+ // Only mark output NULL when input is NULL.
+ isNull[outputElementNum] = true;
+ noNulls = false;
+ }
}
// Simplify vector by brute-force flattening noNulls and isRepeating
@@ -229,8 +273,12 @@ public class IntervalDayTimeColumnVector extends ColumnVector {
}
/**
- * Set a row from a HiveIntervalDayTime.
- * We assume the entry has already been isRepeated adjusted.
+ * Set a field from a HiveIntervalDayTime.
+ *
+ * This is a FAST version that assumes the caller has checked to make sure the sourceBuf
+ * is not null and elementNum is correctly adjusted for isRepeating. And, that the isNull entry
+ * has been set. Only the output entry fields will be set by this method.
+ *
* @param elementNum
* @param intervalDayTime
*/
@@ -240,7 +288,12 @@ public class IntervalDayTimeColumnVector extends ColumnVector {
}
/**
- * Set a row from the current value in the scratch interval day time.
+ * Set a field from the current value in the scratch interval day time.
+ *
+ * This is a FAST version that assumes the caller has checked to make sure the scratch interval
+ * day time is valid and elementNum is correctly adjusted for isRepeating. And, that the isNull
+ * entry has been set. Only the output entry fields will be set by this method.
+ *
* @param elementNum
*/
public void setFromScratchIntervalDayTime(int elementNum) {
@@ -260,47 +313,86 @@ public class IntervalDayTimeColumnVector extends ColumnVector {
// Copy the current object contents into the output. Only copy selected entries,
// as indicated by selectedInUse and the sel array.
+ @Override
public void copySelected(
- boolean selectedInUse, int[] sel, int size, IntervalDayTimeColumnVector output) {
+ boolean selectedInUse, int[] sel, int size, ColumnVector outputColVector) {
- // Output has nulls if and only if input has nulls.
- output.noNulls = noNulls;
+ IntervalDayTimeColumnVector output = (IntervalDayTimeColumnVector) outputColVector;
+ boolean[] outputIsNull = output.isNull;
+
+ // We do not need to do a column reset since we are carefully changing the output.
output.isRepeating = false;
// Handle repeating case
if (isRepeating) {
- output.totalSeconds[0] = totalSeconds[0];
- output.nanos[0] = nanos[0];
- output.isNull[0] = isNull[0];
+ if (noNulls || !isNull[0]) {
+ outputIsNull[0] = false;
+ output.totalSeconds[0] = totalSeconds[0];
+ output.nanos[0] = nanos[0];
+ } else {
+ outputIsNull[0] = true;
+ output.noNulls = false;
+ }
output.isRepeating = true;
return;
}
// Handle normal case
- // Copy data values over
- if (selectedInUse) {
- for (int j = 0; j < size; j++) {
- int i = sel[j];
- output.totalSeconds[i] = totalSeconds[i];
- output.nanos[i] = nanos[i];
+ if (noNulls) {
+ if (selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != size; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ output.totalSeconds[i] = totalSeconds[i];
+ output.nanos[i] = nanos[i];
+ }
+ } else {
+ for(int j = 0; j != size; j++) {
+ final int i = sel[j];
+ output.totalSeconds[i] = totalSeconds[i];
+ output.nanos[i] = nanos[i];
+ }
+ }
+ } else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
+ for(int i = 0; i != size; i++) {
+ output.totalSeconds[i] = totalSeconds[i];
+ output.nanos[i] = nanos[i];
+ }
}
- }
- else {
- System.arraycopy(totalSeconds, 0, output.totalSeconds, 0, size);
- System.arraycopy(nanos, 0, output.nanos, 0, size);
- }
+ } else /* there are nulls in our column */ {
+
+ // Carefully handle NULLs...
+
+ /*
+ * For better performance on LONG/DOUBLE we don't want the conditional
+ * statements inside the for loop.
+ */
+ output.noNulls = false;
- // Copy nulls over if needed
- if (!noNulls) {
if (selectedInUse) {
for (int j = 0; j < size; j++) {
int i = sel[j];
output.isNull[i] = isNull[i];
+ output.totalSeconds[i] = totalSeconds[i];
+ output.nanos[i] = nanos[i];
}
- }
- else {
+ } else {
System.arraycopy(isNull, 0, output.isNull, 0, size);
+ System.arraycopy(totalSeconds, 0, output.totalSeconds, 0, size);
+ System.arraycopy(nanos, 0, output.nanos, 0, size);
}
}
}
@@ -310,8 +402,8 @@ public class IntervalDayTimeColumnVector extends ColumnVector {
* @param intervalDayTime
*/
public void fill(HiveIntervalDayTime intervalDayTime) {
- noNulls = true;
isRepeating = true;
+ isNull[0] = false;
totalSeconds[0] = intervalDayTime.getTotalSeconds();
nanos[0] = intervalDayTime.getNanos();
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ListColumnVector.java
----------------------------------------------------------------------
diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ListColumnVector.java b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ListColumnVector.java
index 7ecb1e0..8cbcc02 100644
--- a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ListColumnVector.java
+++ b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ListColumnVector.java
@@ -49,28 +49,67 @@ public class ListColumnVector extends MultiValuedColumnVector {
child.flatten(useSelected, selected, size);
}
+ /**
+ * Set the element in this column vector from the given input vector.
+ *
+ * The inputElementNum will be adjusted to 0 if the input column has isRepeating set.
+ *
+ * On the other hand, the outElementNum must have been adjusted to 0 in ADVANCE when the output
+ * has isRepeating set.
+ *
+ * IMPORTANT: if the output entry is marked as NULL, this method will do NOTHING. This
+ * supports the caller to do output NULL processing in advance that may cause the output results
+ * operation to be ignored. Thus, make sure the output isNull entry is set in ADVANCE.
+ *
+ * The inputColVector noNulls and isNull entry will be examined. The output will only
+ * be set if the input is NOT NULL. I.e. noNulls || !isNull[inputElementNum] where
+ * inputElementNum may have been adjusted to 0 for isRepeating.
+ *
+ * If the input entry is NULL or out-of-range, the output will be marked as NULL.
+ * I.e. set output noNull = false and isNull[outElementNum] = true. An example of out-of-range
+ * is the DecimalColumnVector which can find the input decimal does not fit in the output
+ * precision/scale.
+ *
+ * (Since we return immediately if the output entry is NULL, we have no need and do not mark
+ * the output entry to NOT NULL).
+ *
+ */
@Override
- public void setElement(int outElementNum, int inputElementNum,
- ColumnVector inputVector) {
- ListColumnVector input = (ListColumnVector) inputVector;
- if (input.isRepeating) {
+ public void setElement(int outputElementNum, int inputElementNum, ColumnVector inputColVector) {
+
+ // Invariants.
+ if (isRepeating && outputElementNum != 0) {
+ throw new RuntimeException("Output column number expected to be 0 when isRepeating");
+ }
+ if (inputColVector.isRepeating) {
inputElementNum = 0;
}
- if (!input.noNulls && input.isNull[inputElementNum]) {
- isNull[outElementNum] = true;
- noNulls = false;
- } else {
- isNull[outElementNum] = false;
+
+ // Do NOTHING if output is NULL.
+ if (!noNulls && isNull[outputElementNum]) {
+ return;
+ }
+
+ // CONCERN: isRepeating
+ if (inputColVector.noNulls || !inputColVector.isNull[inputElementNum]) {
+ ListColumnVector input = (ListColumnVector) inputColVector;
int offset = childCount;
int length = (int) input.lengths[inputElementNum];
int inputOffset = (int) input.offsets[inputElementNum];
- offsets[outElementNum] = offset;
+ offsets[outputElementNum] = offset;
childCount += length;
- lengths[outElementNum] = length;
+ lengths[outputElementNum] = length;
child.ensureSize(childCount, true);
for (int i = 0; i < length; ++i) {
- child.setElement(i + offset, inputOffset + i, input.child);
+ final int outputIndex = i + offset;
+ child.isNull[outputIndex] = false;
+ child.setElement(outputIndex, inputOffset + i, input.child);
}
+ } else {
+
+ // Only mark output NULL when input is NULL.
+ isNull[outputElementNum] = true;
+ noNulls = false;
}
}
@@ -116,4 +155,10 @@ public class ListColumnVector extends MultiValuedColumnVector {
}
}
+ @Override
+ public void copySelected(boolean selectedInUse, int[] sel, int size,
+ ColumnVector outputColVector) {
+ throw new RuntimeException("Not supported");
+ }
+
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a4689020/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java
----------------------------------------------------------------------
diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java
index 49e9184..443a076 100644
--- a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java
+++ b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java
@@ -54,44 +54,78 @@ public class LongColumnVector extends ColumnVector {
// Copy the current object contents into the output. Only copy selected entries,
// as indicated by selectedInUse and the sel array.
+ @Override
public void copySelected(
- boolean selectedInUse, int[] sel, int size, LongColumnVector output) {
+ boolean selectedInUse, int[] sel, int size, ColumnVector outputColVector) {
+
+ LongColumnVector output = (LongColumnVector) outputColVector;
+ boolean[] outputIsNull = output.isNull;
- // Output has nulls if and only if input has nulls.
- output.noNulls = noNulls;
+ // We do not need to do a column reset since we are carefully changing the output.
output.isRepeating = false;
// Handle repeating case
if (isRepeating) {
- output.vector[0] = vector[0];
- output.isNull[0] = isNull[0];
+ if (noNulls || !isNull[0]) {
+ outputIsNull[0] = false;
+ output.vector[0] = vector[0];
+ } else {
+ outputIsNull[0] = true;
+ output.noNulls = false;
+ }
output.isRepeating = true;
return;
}
// Handle normal case
- // Copy data values over
- if (selectedInUse) {
- for (int j = 0; j < size; j++) {
- int i = sel[j];
- output.vector[i] = vector[i];
+ if (noNulls) {
+ if (selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != size; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ output.vector[i] = vector[i];
+ }
+ } else {
+ for(int j = 0; j != size; j++) {
+ final int i = sel[j];
+ output.vector[i] = vector[i];;
+ }
+ }
+ } else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
+ System.arraycopy(vector, 0, output.vector, 0, size);
}
- }
- else {
- System.arraycopy(vector, 0, output.vector, 0, size);
- }
+ } else /* there are nulls in our column */ {
+
+ // Carefully handle NULLs...
+
+ /*
+ * For better performance on LONG/DOUBLE we don't want the conditional
+ * statements inside the for loop.
+ */
+ output.noNulls = false;
- // Copy nulls over if needed
- if (!noNulls) {
if (selectedInUse) {
for (int j = 0; j < size; j++) {
int i = sel[j];
output.isNull[i] = isNull[i];
+ output.vector[i] = vector[i];
}
- }
- else {
+ } else {
System.arraycopy(isNull, 0, output.isNull, 0, size);
+ System.arraycopy(vector, 0, output.vector, 0, size);
}
}
}
@@ -101,51 +135,81 @@ public class LongColumnVector extends ColumnVector {
public void copySelected(
boolean selectedInUse, int[] sel, int size, DoubleColumnVector output) {
- // Output has nulls if and only if input has nulls.
- output.noNulls = noNulls;
+ boolean[] outputIsNull = output.isNull;
+
+ // We do not need to do a column reset since we are carefully changing the output.
output.isRepeating = false;
// Handle repeating case
if (isRepeating) {
- output.vector[0] = vector[0]; // automatic conversion to double is done here
- output.isNull[0] = isNull[0];
+ if (noNulls || !isNull[0]) {
+ outputIsNull[0] = false;
+ output.vector[0] = vector[0]; // automatic conversion to double is done here
+ } else {
+ outputIsNull[0] = true;
+ output.noNulls = false;
+ }
output.isRepeating = true;
return;
}
// Handle normal case
- // Copy data values over
- if (selectedInUse) {
- for (int j = 0; j < size; j++) {
- int i = sel[j];
- output.vector[i] = vector[i];
- }
- }
- else {
- for(int i = 0; i < size; ++i) {
- output.vector[i] = vector[i];
+ if (noNulls) {
+ if (selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the tighter ELSE loop.
+
+ if (!output.noNulls) {
+ for(int j = 0; j != size; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ output.vector[i] = vector[i];
+ }
+ } else {
+ for(int j = 0; j != size; j++) {
+ final int i = sel[j];
+ output.vector[i] = vector[i];;
+ }
+ }
+ } else {
+ if (!output.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ output.noNulls = true;
+ }
+ System.arraycopy(vector, 0, output.vector, 0, size);
}
- }
+ } else /* there are NULLs in our column */ {
+
+ // Carefully handle NULLs...
+
+ /*
+ * For better performance on LONG/DOUBLE we don't want the conditional
+ * statements inside the for loop.
+ */
+ output.noNulls = false;
- // Copy nulls over if needed
- if (!noNulls) {
if (selectedInUse) {
for (int j = 0; j < size; j++) {
int i = sel[j];
output.isNull[i] = isNull[i];
+ output.vector[i] = vector[i];
}
- }
- else {
+ } else {
System.arraycopy(isNull, 0, output.isNull, 0, size);
+ System.arraycopy(vector, 0, output.vector, 0, size);
}
}
}
// Fill the column vector with the provided value
public void fill(long value) {
- noNulls = true;
isRepeating = true;
+ isNull[0] = false;
vector[0] = value;
}
@@ -178,17 +242,52 @@ public class LongColumnVector extends ColumnVector {
flattenNoNulls(selectedInUse, sel, size);
}
+ /**
+ * Set the element in this column vector from the given input vector.
+ *
+ * Both the inputElementNum and outElementNum must have been adjusted to 0 in ADVANCE
+ * when the input / output has isRepeating set.
+ *
+ * IMPORTANT: if the output entry is marked as NULL, this method will do NOTHING. This
+ * supports the caller to do output NULL processing in advance that may cause the output results
+ * operation to be ignored. Thus, make sure the output isNull entry is set in ADVANCE.
+ *
+ * The inputColVector noNulls and isNull entry will be examined. The output will only
+ * be set if the input is NOT NULL. I.e. noNulls || !isNull[inputElementNum] where
+ * inputElementNum may have been adjusted to 0 for isRepeating.
+ *
+ * If the input entry is NULL or out-of-range, the output will be marked as NULL.
+ * I.e. set output noNull = false and isNull[outElementNum] = true. An example of out-of-range
+ * is the DecimalColumnVector which can find the input decimal does not fit in the output
+ * precision/scale.
+ *
+ * (Since we return immediately if the output entry is NULL, we have no need and do not mark
+ * the output entry to NOT NULL).
+ *
+ */
@Override
- public void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) {
- if (inputVector.isRepeating) {
+ public void setElement(int outputElementNum, int inputElementNum, ColumnVector inputColVector) {
+
+ // Invariants.
+ if (isRepeating && outputElementNum != 0) {
+ throw new RuntimeException("Output column number expected to be 0 when isRepeating");
+ }
+ if (inputColVector.isRepeating) {
inputElementNum = 0;
}
- if (inputVector.noNulls || !inputVector.isNull[inputElementNum]) {
- isNull[outElementNum] = false;
- vector[outElementNum] =
- ((LongColumnVector) inputVector).vector[inputElementNum];
+
+ // Do NOTHING if output is NULL.
+ if (!noNulls && isNull[outputElementNum]) {
+ return;
+ }
+
+ if (inputColVector.noNulls || !inputColVector.isNull[inputElementNum]) {
+ vector[outputElementNum] =
+ ((LongColumnVector) inputColVector).vector[inputElementNum];
} else {
- isNull[outElementNum] = true;
+
+ // Only mark output NULL when input is NULL.
+ isNull[outputElementNum] = true;
noNulls = false;
}
}