You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jd...@apache.org on 2018/04/16 23:35:25 UTC
[1/2] hive git commit: HIVE-18609: Results cache invalidation based
on ACID table updates (Jason Dere, reviewed by GopalV)
Repository: hive
Updated Branches:
refs/heads/master 6afa544b8 -> ad20ff4b1
http://git-wip-us.apache.org/repos/asf/hive/blob/ad20ff4b/ql/src/test/results/clientpositive/llap/results_cache_transactional.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/results_cache_transactional.q.out b/ql/src/test/results/clientpositive/llap/results_cache_transactional.q.out
new file mode 100644
index 0000000..a38ddfa
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/results_cache_transactional.q.out
@@ -0,0 +1,624 @@
+PREHOOK: query: create table tab1 (key string, value string) stored as orc tblproperties ('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tab1
+POSTHOOK: query: create table tab1 (key string, value string) stored as orc tblproperties ('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tab1
+PREHOOK: query: create table tab2 (key string, value string) stored as orc tblproperties ('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tab2
+POSTHOOK: query: create table tab2 (key string, value string) stored as orc tblproperties ('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tab2
+PREHOOK: query: insert into tab1 select * from default.src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@tab1
+POSTHOOK: query: insert into tab1 select * from default.src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@tab1
+POSTHOOK: Lineage: tab1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: tab1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: insert into tab2 select * from default.src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@tab2
+POSTHOOK: query: insert into tab2 select * from default.src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@tab2
+POSTHOOK: Lineage: tab2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: tab2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: explain
+select max(key) from tab1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select max(key) from tab1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: tab1
+ Statistics: Num rows: 91 Data size: 16192 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: key
+ Statistics: Num rows: 91 Data size: 16192 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: max(key)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
+ Execution mode: llap
+ LLAP IO: may be used (ACID table)
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: max(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select max(key) from tab1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tab1
+#### A masked pattern was here ####
+POSTHOOK: query: select max(key) from tab1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tab1
+#### A masked pattern was here ####
+98
+test.comment="Query on transactional table should use cache"
+PREHOOK: query: explain
+select max(key) from tab1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select max(key) from tab1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+ Cached Query Result: true
+
+PREHOOK: query: select max(key) from tab1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tab1
+POSTHOOK: query: select max(key) from tab1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tab1
+98
+PREHOOK: query: explain
+select count(*) from tab1 join tab2 on (tab1.key = tab2.key)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from tab1 join tab2 on (tab1.key = tab2.key)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: tab1
+ Statistics: Num rows: 91 Data size: 16192 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 87 Data size: 15480 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 87 Data size: 15480 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 87 Data size: 15480 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: llap
+ LLAP IO: may be used (ACID table)
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: tab2
+ Statistics: Num rows: 91 Data size: 16192 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 87 Data size: 15480 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 87 Data size: 15480 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 87 Data size: 15480 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: llap
+ LLAP IO: may be used (ACID table)
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ Statistics: Num rows: 95 Data size: 17028 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from tab1 join tab2 on (tab1.key = tab2.key)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tab1
+PREHOOK: Input: default@tab2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from tab1 join tab2 on (tab1.key = tab2.key)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tab1
+POSTHOOK: Input: default@tab2
+#### A masked pattern was here ####
+1028
+test.comment="Join on transactional tables, should use cache"
+PREHOOK: query: explain
+select count(*) from tab1 join tab2 on (tab1.key = tab2.key)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from tab1 join tab2 on (tab1.key = tab2.key)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+ Cached Query Result: true
+
+PREHOOK: query: select count(*) from tab1 join tab2 on (tab1.key = tab2.key)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tab1
+PREHOOK: Input: default@tab2
+POSTHOOK: query: select count(*) from tab1 join tab2 on (tab1.key = tab2.key)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tab1
+POSTHOOK: Input: default@tab2
+1028
+PREHOOK: query: explain
+select max(key) from src
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select max(key) from src
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: key
+ Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: max(key)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: string)
+ Execution mode: llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: max(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select max(key) from src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select max(key) from src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+98
+test.comment="Query on non-transactional table should not use cache"
+PREHOOK: query: explain
+select max(key) from src
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select max(key) from src
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: key
+ Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: max(key)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: string)
+ Execution mode: llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: max(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select max(key) from src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select max(key) from src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+98
+PREHOOK: query: explain
+select count(*) from tab1 join src on (tab1.key = src.key)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from tab1 join src on (tab1.key = src.key)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: tab1
+ Statistics: Num rows: 91 Data size: 16192 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 87 Data size: 15480 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 87 Data size: 15480 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 87 Data size: 15480 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: llap
+ LLAP IO: may be used (ACID table)
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ Statistics: Num rows: 550 Data size: 47850 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from tab1 join src on (tab1.key = src.key)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@tab1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from tab1 join src on (tab1.key = src.key)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@tab1
+#### A masked pattern was here ####
+1028
+test.comment="Join uses non-transactional table, should not use cache"
+PREHOOK: query: explain
+select count(*) from tab1 join src on (tab1.key = src.key)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from tab1 join src on (tab1.key = src.key)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: tab1
+ Statistics: Num rows: 91 Data size: 16192 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 87 Data size: 15480 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 87 Data size: 15480 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 87 Data size: 15480 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: llap
+ LLAP IO: may be used (ACID table)
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
+ Execution mode: llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ Statistics: Num rows: 550 Data size: 47850 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from tab1 join src on (tab1.key = src.key)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@tab1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from tab1 join src on (tab1.key = src.key)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@tab1
+#### A masked pattern was here ####
+1028
http://git-wip-us.apache.org/repos/asf/hive/blob/ad20ff4b/ql/src/test/results/clientpositive/results_cache_invalidation.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/results_cache_invalidation.q.out b/ql/src/test/results/clientpositive/results_cache_invalidation.q.out
new file mode 100644
index 0000000..5f225a1
--- /dev/null
+++ b/ql/src/test/results/clientpositive/results_cache_invalidation.q.out
@@ -0,0 +1,748 @@
+PREHOOK: query: create table tab1 (key string, value string) stored as orc tblproperties ('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tab1
+POSTHOOK: query: create table tab1 (key string, value string) stored as orc tblproperties ('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tab1
+PREHOOK: query: create table tab2 (key string, value string) stored as orc tblproperties ('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tab2
+POSTHOOK: query: create table tab2 (key string, value string) stored as orc tblproperties ('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tab2
+PREHOOK: query: insert into tab1 select * from default.src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@tab1
+POSTHOOK: query: insert into tab1 select * from default.src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@tab1
+POSTHOOK: Lineage: tab1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: tab1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: insert into tab2 select * from default.src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@tab2
+POSTHOOK: query: insert into tab2 select * from default.src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@tab2
+POSTHOOK: Lineage: tab2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: tab2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+test.comment="Run queries to load into cache"
+PREHOOK: query: explain
+select count(*) from tab1 a where key >= 0
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from tab1 a where key >= 0
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 91 Data size: 35030 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (UDFToDouble(key) >= 0.0D) (type: boolean)
+ Statistics: Num rows: 30 Data size: 11548 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 30 Data size: 11548 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from tab1 a where key >= 0
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tab1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from tab1 a where key >= 0
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tab1
+#### A masked pattern was here ####
+500
+PREHOOK: query: explain
+select max(key) from tab2
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select max(key) from tab2
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: tab2
+ Statistics: Num rows: 91 Data size: 35030 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: key
+ Statistics: Num rows: 91 Data size: 35030 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: max(key)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: max(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select max(key) from tab2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tab2
+#### A masked pattern was here ####
+POSTHOOK: query: select max(key) from tab2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tab2
+#### A masked pattern was here ####
+98
+PREHOOK: query: explain
+select count(*) from tab1 join tab2 on (tab1.key = tab2.key)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from tab1 join tab2 on (tab1.key = tab2.key)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: tab1
+ Statistics: Num rows: 91 Data size: 35030 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 91 Data size: 35030 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 91 Data size: 35030 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 91 Data size: 35030 Basic stats: COMPLETE Column stats: NONE
+ TableScan
+ alias: tab2
+ Statistics: Num rows: 91 Data size: 35030 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 91 Data size: 35030 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 91 Data size: 35030 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 91 Data size: 35030 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ Statistics: Num rows: 100 Data size: 38533 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from tab1 join tab2 on (tab1.key = tab2.key)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tab1
+PREHOOK: Input: default@tab2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from tab1 join tab2 on (tab1.key = tab2.key)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tab1
+POSTHOOK: Input: default@tab2
+#### A masked pattern was here ####
+1028
+test.comment="Q1 should now be able to use cache"
+PREHOOK: query: explain
+select count(*) from tab1 a where key >= 0
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from tab1 a where key >= 0
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+ Cached Query Result: true
+
+PREHOOK: query: select count(*) from tab1 a where key >= 0
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tab1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from tab1 a where key >= 0
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tab1
+#### A masked pattern was here ####
+500
+test.comment="Q2 should now be able to use cache"
+PREHOOK: query: explain
+select max(key) from tab2
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select max(key) from tab2
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+ Cached Query Result: true
+
+PREHOOK: query: select max(key) from tab2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tab2
+#### A masked pattern was here ####
+POSTHOOK: query: select max(key) from tab2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tab2
+#### A masked pattern was here ####
+98
+test.comment="Q3 should now be able to use cache"
+PREHOOK: query: explain
+select count(*) from tab1 join tab2 on (tab1.key = tab2.key)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from tab1 join tab2 on (tab1.key = tab2.key)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+ Cached Query Result: true
+
+PREHOOK: query: select count(*) from tab1 join tab2 on (tab1.key = tab2.key)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tab1
+PREHOOK: Input: default@tab2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from tab1 join tab2 on (tab1.key = tab2.key)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tab1
+POSTHOOK: Input: default@tab2
+#### A masked pattern was here ####
+1028
+PREHOOK: query: insert into tab1 values ('88', 'val_88')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@tab1
+POSTHOOK: query: insert into tab1 values ('88', 'val_88')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@tab1
+POSTHOOK: Lineage: tab1.key SCRIPT []
+POSTHOOK: Lineage: tab1.value SCRIPT []
+test.comment="Q1 should not use cache"
+PREHOOK: query: explain
+select count(*) from tab1 a where key >= 0
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from tab1 a where key >= 0
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 110 Data size: 42290 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (UDFToDouble(key) >= 0.0D) (type: boolean)
+ Statistics: Num rows: 36 Data size: 13840 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 36 Data size: 13840 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from tab1 a where key >= 0
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tab1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from tab1 a where key >= 0
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tab1
+#### A masked pattern was here ####
+501
+test.comment="Q2 should still use cache since tab2 not updated"
+PREHOOK: query: explain
+select max(key) from tab2
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select max(key) from tab2
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+ Cached Query Result: true
+
+PREHOOK: query: select max(key) from tab2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tab2
+#### A masked pattern was here ####
+POSTHOOK: query: select max(key) from tab2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tab2
+#### A masked pattern was here ####
+98
+test.comment="Q3 should not use cache"
+PREHOOK: query: explain
+select count(*) from tab1 join tab2 on (tab1.key = tab2.key)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from tab1 join tab2 on (tab1.key = tab2.key)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: tab1
+ Statistics: Num rows: 110 Data size: 42290 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 110 Data size: 42290 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 110 Data size: 42290 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 110 Data size: 42290 Basic stats: COMPLETE Column stats: NONE
+ TableScan
+ alias: tab2
+ Statistics: Num rows: 91 Data size: 35030 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 91 Data size: 35030 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 91 Data size: 35030 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 91 Data size: 35030 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ Statistics: Num rows: 121 Data size: 46519 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from tab1 join tab2 on (tab1.key = tab2.key)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tab1
+PREHOOK: Input: default@tab2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from tab1 join tab2 on (tab1.key = tab2.key)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tab1
+POSTHOOK: Input: default@tab2
+#### A masked pattern was here ####
+1028
+PREHOOK: query: insert into tab2 values ('88', 'val_88')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@tab2
+POSTHOOK: query: insert into tab2 values ('88', 'val_88')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@tab2
+POSTHOOK: Lineage: tab2.key SCRIPT []
+POSTHOOK: Lineage: tab2.value SCRIPT []
+test.comment="Q1 should use cache"
+PREHOOK: query: explain
+select count(*) from tab1 a where key >= 0
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from tab1 a where key >= 0
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+ Cached Query Result: true
+
+PREHOOK: query: select count(*) from tab1 a where key >= 0
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tab1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from tab1 a where key >= 0
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tab1
+#### A masked pattern was here ####
+501
+test.comment="Q2 should not use cache"
+PREHOOK: query: explain
+select max(key) from tab2
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select max(key) from tab2
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: tab2
+ Statistics: Num rows: 110 Data size: 42290 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: key
+ Statistics: Num rows: 110 Data size: 42290 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: max(key)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: max(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select max(key) from tab2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tab2
+#### A masked pattern was here ####
+POSTHOOK: query: select max(key) from tab2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tab2
+#### A masked pattern was here ####
+98
+test.comment="Q3 should not use cache"
+PREHOOK: query: explain
+select count(*) from tab1 join tab2 on (tab1.key = tab2.key)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from tab1 join tab2 on (tab1.key = tab2.key)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: tab1
+ Statistics: Num rows: 110 Data size: 42290 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 110 Data size: 42290 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 110 Data size: 42290 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 110 Data size: 42290 Basic stats: COMPLETE Column stats: NONE
+ TableScan
+ alias: tab2
+ Statistics: Num rows: 110 Data size: 42290 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 110 Data size: 42290 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 110 Data size: 42290 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 110 Data size: 42290 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ Statistics: Num rows: 121 Data size: 46519 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from tab1 join tab2 on (tab1.key = tab2.key)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tab1
+PREHOOK: Input: default@tab2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from tab1 join tab2 on (tab1.key = tab2.key)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tab1
+POSTHOOK: Input: default@tab2
+#### A masked pattern was here ####
+1029
http://git-wip-us.apache.org/repos/asf/hive/blob/ad20ff4b/ql/src/test/results/clientpositive/results_cache_transactional.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/results_cache_transactional.q.out b/ql/src/test/results/clientpositive/results_cache_transactional.q.out
new file mode 100644
index 0000000..f2fac38
--- /dev/null
+++ b/ql/src/test/results/clientpositive/results_cache_transactional.q.out
@@ -0,0 +1,583 @@
+PREHOOK: query: create table tab1 (key string, value string) stored as orc tblproperties ('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tab1
+POSTHOOK: query: create table tab1 (key string, value string) stored as orc tblproperties ('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tab1
+PREHOOK: query: create table tab2 (key string, value string) stored as orc tblproperties ('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tab2
+POSTHOOK: query: create table tab2 (key string, value string) stored as orc tblproperties ('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tab2
+PREHOOK: query: insert into tab1 select * from default.src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@tab1
+POSTHOOK: query: insert into tab1 select * from default.src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@tab1
+POSTHOOK: Lineage: tab1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: tab1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: insert into tab2 select * from default.src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@tab2
+POSTHOOK: query: insert into tab2 select * from default.src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@tab2
+POSTHOOK: Lineage: tab2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: tab2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: explain
+select max(key) from tab1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select max(key) from tab1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: tab1
+ Statistics: Num rows: 91 Data size: 35030 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: key
+ Statistics: Num rows: 91 Data size: 35030 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: max(key)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: max(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select max(key) from tab1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tab1
+#### A masked pattern was here ####
+POSTHOOK: query: select max(key) from tab1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tab1
+#### A masked pattern was here ####
+98
+test.comment="Query on transactional table should use cache"
+PREHOOK: query: explain
+select max(key) from tab1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select max(key) from tab1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+ Cached Query Result: true
+
+PREHOOK: query: select max(key) from tab1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tab1
+#### A masked pattern was here ####
+POSTHOOK: query: select max(key) from tab1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tab1
+#### A masked pattern was here ####
+98
+PREHOOK: query: explain
+select count(*) from tab1 join tab2 on (tab1.key = tab2.key)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from tab1 join tab2 on (tab1.key = tab2.key)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: tab1
+ Statistics: Num rows: 91 Data size: 35030 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 91 Data size: 35030 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 91 Data size: 35030 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 91 Data size: 35030 Basic stats: COMPLETE Column stats: NONE
+ TableScan
+ alias: tab2
+ Statistics: Num rows: 91 Data size: 35030 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 91 Data size: 35030 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 91 Data size: 35030 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 91 Data size: 35030 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ Statistics: Num rows: 100 Data size: 38533 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from tab1 join tab2 on (tab1.key = tab2.key)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tab1
+PREHOOK: Input: default@tab2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from tab1 join tab2 on (tab1.key = tab2.key)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tab1
+POSTHOOK: Input: default@tab2
+#### A masked pattern was here ####
+1028
+test.comment="Join on transactional tables, should use cache"
+PREHOOK: query: explain
+select count(*) from tab1 join tab2 on (tab1.key = tab2.key)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from tab1 join tab2 on (tab1.key = tab2.key)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+ Cached Query Result: true
+
+PREHOOK: query: select count(*) from tab1 join tab2 on (tab1.key = tab2.key)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tab1
+PREHOOK: Input: default@tab2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from tab1 join tab2 on (tab1.key = tab2.key)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tab1
+POSTHOOK: Input: default@tab2
+#### A masked pattern was here ####
+1028
+PREHOOK: query: explain
+select max(key) from src
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select max(key) from src
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: key
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: max(key)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: max(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select max(key) from src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select max(key) from src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+98
+test.comment="Query on non-transactional table should not use cache"
+PREHOOK: query: explain
+select max(key) from src
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select max(key) from src
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: key
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: max(key)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: max(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select max(key) from src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select max(key) from src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+98
+PREHOOK: query: explain
+select count(*) from tab1 join src on (tab1.key = src.key)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from tab1 join src on (tab1.key = src.key)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: tab1
+ Statistics: Num rows: 91 Data size: 35030 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 91 Data size: 35030 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 91 Data size: 35030 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 91 Data size: 35030 Basic stats: COMPLETE Column stats: NONE
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from tab1 join src on (tab1.key = src.key)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@tab1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from tab1 join src on (tab1.key = src.key)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@tab1
+#### A masked pattern was here ####
+1028
+test.comment="Join uses non-transactional table, should not use cache"
+PREHOOK: query: explain
+select count(*) from tab1 join src on (tab1.key = src.key)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from tab1 join src on (tab1.key = src.key)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: tab1
+ Statistics: Num rows: 91 Data size: 35030 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 91 Data size: 35030 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 91 Data size: 35030 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 91 Data size: 35030 Basic stats: COMPLETE Column stats: NONE
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from tab1 join src on (tab1.key = src.key)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@tab1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from tab1 join src on (tab1.key = src.key)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@tab1
+#### A masked pattern was here ####
+1028
[2/2] hive git commit: HIVE-18609: Results cache invalidation based
on ACID table updates (Jason Dere, reviewed by GopalV)
Posted by jd...@apache.org.
HIVE-18609: Results cache invalidation based on ACID table updates (Jason Dere, reviewed by GopalV)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ad20ff4b
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ad20ff4b
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ad20ff4b
Branch: refs/heads/master
Commit: ad20ff4b1da9a20a74c5ef3c99b21ced9d756996
Parents: 6afa544
Author: Jason Dere <jd...@hortonworks.com>
Authored: Mon Apr 16 16:34:21 2018 -0700
Committer: Jason Dere <jd...@hortonworks.com>
Committed: Mon Apr 16 16:34:21 2018 -0700
----------------------------------------------------------------------
.../org/apache/hadoop/hive/conf/HiveConf.java | 4 +
.../test/resources/testconfiguration.properties | 2 +
.../java/org/apache/hadoop/hive/ql/Driver.java | 7 +-
.../ql/cache/results/QueryResultsCache.java | 65 +-
.../org/apache/hadoop/hive/ql/io/AcidUtils.java | 16 +-
.../hadoop/hive/ql/parse/SemanticAnalyzer.java | 55 +-
.../queries/clientpositive/results_cache_1.q | 1 +
.../queries/clientpositive/results_cache_2.q | 1 +
.../clientpositive/results_cache_capacity.q | 1 +
.../clientpositive/results_cache_empty_result.q | 1 +
.../clientpositive/results_cache_invalidation.q | 90 +++
.../clientpositive/results_cache_lifetime.q | 1 +
.../results_cache_quoted_identifiers.q | 1 +
.../clientpositive/results_cache_temptable.q | 1 +
.../results_cache_transactional.q | 57 ++
.../clientpositive/results_cache_with_masking.q | 1 +
.../llap/results_cache_invalidation.q.out | 793 +++++++++++++++++++
.../llap/results_cache_transactional.q.out | 624 +++++++++++++++
.../results_cache_invalidation.q.out | 748 +++++++++++++++++
.../results_cache_transactional.q.out | 583 ++++++++++++++
20 files changed, 3043 insertions(+), 9 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/ad20ff4b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index e533ee6..4da4e1d 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -4270,6 +4270,10 @@ public class HiveConf extends Configuration {
"If the query results cache is enabled. This will keep results of previously executed queries " +
"to be reused if the same query is executed again."),
+ HIVE_QUERY_RESULTS_CACHE_NONTRANSACTIONAL_TABLES_ENABLED("hive.query.results.cache.nontransactional.tables.enabled", false,
+ "If the query results cache is enabled for queries involving non-transactional tables." +
+ "Users who enable this setting should be willing to tolerate some amount of stale results in the cache."),
+
HIVE_QUERY_RESULTS_CACHE_WAIT_FOR_PENDING_RESULTS("hive.query.results.cache.wait.for.pending.results", true,
"Should a query wait for the pending results of an already running query, " +
"in order to use the cached result when it becomes ready"),
http://git-wip-us.apache.org/repos/asf/hive/blob/ad20ff4b/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 2845ab6..2b37053 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -222,6 +222,8 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\
ptf_streaming.q,\
results_cache_1.q,\
results_cache_empty_result.q,\
+ results_cache_invalidation.q,\
+ results_cache_transactional.q,\
sample1.q,\
selectDistinctStar.q,\
select_dummy_source.q,\
http://git-wip-us.apache.org/repos/asf/hive/blob/ad20ff4b/ql/src/java/org/apache/hadoop/hive/ql/Driver.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Driver.java b/ql/src/java/org/apache/hadoop/hive/ql/Driver.java
index a88453c..4acdd9b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/Driver.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/Driver.java
@@ -1977,9 +1977,14 @@ public class Driver implements IDriver {
PerfLogger perfLogger = SessionState.getPerfLogger();
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.SAVE_TO_RESULTS_CACHE);
+ ValidTxnWriteIdList txnWriteIdList = null;
+ if (plan.hasAcidResourcesInQuery()) {
+ txnWriteIdList = AcidUtils.getValidTxnWriteIdList(conf);
+ }
boolean savedToCache = QueryResultsCache.getInstance().setEntryValid(
cacheUsage.getCacheEntry(),
- plan.getFetchTask().getWork());
+ plan.getFetchTask().getWork(),
+ txnWriteIdList);
LOG.info("savedToCache: {}", savedToCache);
if (savedToCache) {
useFetchFromCache(cacheUsage.getCacheEntry());
http://git-wip-us.apache.org/repos/asf/hive/blob/ad20ff4b/ql/src/java/org/apache/hadoop/hive/ql/cache/results/QueryResultsCache.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/cache/results/QueryResultsCache.java b/ql/src/java/org/apache/hadoop/hive/ql/cache/results/QueryResultsCache.java
index b1a3646..90c8ec3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/cache/results/QueryResultsCache.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/cache/results/QueryResultsCache.java
@@ -43,6 +43,7 @@ import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReadWriteLock;
import java.util.concurrent.locks.ReentrantReadWriteLock;
+import java.util.function.Supplier;
import org.apache.hadoop.fs.ContentSummary;
import org.apache.hadoop.fs.FileSystem;
@@ -52,17 +53,21 @@ import org.apache.hadoop.hive.common.metrics.common.Metrics;
import org.apache.hadoop.hive.common.metrics.common.MetricsConstant;
import org.apache.hadoop.hive.common.metrics.common.MetricsFactory;
import org.apache.hadoop.hive.common.metrics.common.MetricsVariable;
+import org.apache.hadoop.hive.common.ValidTxnWriteIdList;
+import org.apache.hadoop.hive.common.ValidWriteIdList;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.hooks.Entity.Type;
import org.apache.hadoop.hive.ql.hooks.ReadEntity;
+import org.apache.hadoop.hive.ql.io.AcidUtils;
import org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.parse.ColumnAccessInfo;
import org.apache.hadoop.hive.ql.parse.TableAccessInfo;
import org.apache.hadoop.hive.ql.plan.FetchWork;
import org.apache.hadoop.hive.ql.plan.HiveOperation;
+import org.apache.hive.common.util.TxnIdUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -76,10 +81,12 @@ public final class QueryResultsCache {
public static class LookupInfo {
private String queryText;
+ private Supplier<ValidTxnWriteIdList> txnWriteIdListProvider;
- public LookupInfo(String queryText) {
+ public LookupInfo(String queryText, Supplier<ValidTxnWriteIdList> txnWriteIdListProvider) {
super();
this.queryText = queryText;
+ this.txnWriteIdListProvider = txnWriteIdListProvider;
}
public String getQueryText() {
@@ -174,6 +181,7 @@ public final class QueryResultsCache {
private AtomicInteger readers = new AtomicInteger(0);
private ScheduledFuture<?> invalidationFuture = null;
private volatile CacheEntryStatus status = CacheEntryStatus.PENDING;
+ private ValidTxnWriteIdList txnWriteIdList;
public void releaseReader() {
int readerCount = 0;
@@ -389,15 +397,20 @@ public final class QueryResultsCache {
LOG.debug("QueryResultsCache lookup for query: {}", request.queryText);
- boolean foundPending = false;
+ boolean foundPending = false;
+ // Cannot entries while we currently hold read lock, so keep track of them to delete later.
+ Set<CacheEntry> entriesToRemove = new HashSet<CacheEntry>();
Lock readLock = rwLock.readLock();
try {
+ // Note: ReentrantReadWriteLock deos not allow upgrading a read lock to a write lock.
+ // Care must be taken while under read lock, to make sure we do not perform any actions
+ // which attempt to take a write lock.
readLock.lock();
Set<CacheEntry> candidates = queryMap.get(request.queryText);
if (candidates != null) {
CacheEntry pendingResult = null;
for (CacheEntry candidate : candidates) {
- if (entryMatches(request, candidate)) {
+ if (entryMatches(request, candidate, entriesToRemove)) {
CacheEntryStatus entryStatus = candidate.status;
if (entryStatus == CacheEntryStatus.VALID) {
result = candidate;
@@ -422,6 +435,11 @@ public final class QueryResultsCache {
readLock.unlock();
}
+ // Now that we have exited read lock it is safe to remove any invalid entries.
+ for (CacheEntry invalidEntry : entriesToRemove) {
+ removeEntry(invalidEntry);
+ }
+
LOG.debug("QueryResultsCache lookup result: {}", result);
incrementMetric(MetricsConstant.QC_LOOKUPS);
if (result != null) {
@@ -477,7 +495,7 @@ public final class QueryResultsCache {
* @param fetchWork
* @return
*/
- public boolean setEntryValid(CacheEntry cacheEntry, FetchWork fetchWork) {
+ public boolean setEntryValid(CacheEntry cacheEntry, FetchWork fetchWork, ValidTxnWriteIdList txnWriteIdList) {
String queryText = cacheEntry.getQueryText();
boolean dataDirMoved = false;
Path queryResultsPath = null;
@@ -527,6 +545,7 @@ public final class QueryResultsCache {
cacheEntry.size = resultSize;
this.cacheSize += resultSize;
cacheEntry.createTime = System.currentTimeMillis();
+ cacheEntry.txnWriteIdList = txnWriteIdList;
cacheEntry.setStatus(CacheEntryStatus.VALID);
// Mark this entry as being in use. Caller will need to release later.
@@ -601,7 +620,15 @@ public final class QueryResultsCache {
private static final float LRU_LOAD_FACTOR = 0.75f;
private static final CacheEntry[] EMPTY_CACHEENTRY_ARRAY = {};
- private boolean entryMatches(LookupInfo lookupInfo, CacheEntry entry) {
+ /**
+ * Check that the cache entry matches the lookupInfo.
+ * @param lookupInfo
+ * @param entry
+ * @param entriesToRemove Set of entries to be removed after exiting read lock section.
+ * If the entry is found to be invalid it will be added to this set.
+ * @return
+ */
+ private boolean entryMatches(LookupInfo lookupInfo, CacheEntry entry, Set<CacheEntry> entriesToRemove) {
QueryInfo queryInfo = entry.getQueryInfo();
for (ReadEntity readEntity : queryInfo.getInputs()) {
// Check that the tables used do not resolve to temp tables.
@@ -614,6 +641,34 @@ public final class QueryResultsCache {
tableUsed.getTableName());
return false;
}
+
+ // Has the table changed since the query was cached?
+ // For transactional tables, can compare the table writeIDs of the current/cached query.
+ if (AcidUtils.isTransactionalTable(tableUsed)) {
+ boolean writeIdCheckPassed = false;
+ String tableName = tableUsed.getFullyQualifiedName();
+ ValidTxnWriteIdList currentTxnWriteIdList = lookupInfo.txnWriteIdListProvider.get();
+ ValidWriteIdList currentWriteIdForTable =
+ currentTxnWriteIdList.getTableValidWriteIdList(tableName);
+ ValidWriteIdList cachedWriteIdForTable = entry.txnWriteIdList.getTableValidWriteIdList(tableName);
+
+ LOG.debug("Checking writeIds for table {}: currentWriteIdForTable {}, cachedWriteIdForTable {}",
+ tableName, currentWriteIdForTable, cachedWriteIdForTable);
+ if (currentWriteIdForTable != null && cachedWriteIdForTable != null) {
+ if (TxnIdUtils.checkEquivalentWriteIds(currentWriteIdForTable, cachedWriteIdForTable)) {
+ writeIdCheckPassed = true;
+ }
+ }
+
+ if (!writeIdCheckPassed) {
+ LOG.debug("Cached query no longer valid due to table {}", tableUsed.getFullyQualifiedName());
+ // We can invalidate the entry now, but calling removeEntry() requires a write lock
+ // and we may already have read lock taken now. Add to entriesToRemove to delete later.
+ entriesToRemove.add(entry);
+ entry.invalidate();
+ return false;
+ }
+ }
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/ad20ff4b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
index 44a7496..2b1960c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
@@ -27,6 +27,8 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.hive.common.HiveStatsUtils;
+import org.apache.hadoop.hive.common.JavaUtils;
+import org.apache.hadoop.hive.common.ValidTxnList;
import org.apache.hadoop.hive.common.ValidTxnWriteIdList;
import org.apache.hadoop.hive.common.ValidWriteIdList;
import org.apache.hadoop.hive.conf.HiveConf;
@@ -41,6 +43,8 @@ import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat;
import org.apache.hadoop.hive.ql.io.orc.OrcRecordUpdater;
import org.apache.hadoop.hive.ql.io.orc.Reader;
import org.apache.hadoop.hive.ql.io.orc.Writer;
+import org.apache.hadoop.hive.ql.lockmgr.HiveTxnManager;
+import org.apache.hadoop.hive.ql.lockmgr.LockException;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.plan.CreateTableDesc;
import org.apache.hadoop.hive.ql.plan.TableScanDesc;
@@ -1508,11 +1512,19 @@ public class AcidUtils {
}
/**
- * Extract the ValidWriteIdList for the given table from the list of tables' ValidWriteIdList.
+ * Get the ValidTxnWriteIdList saved in the configuration.
*/
- public static ValidWriteIdList getTableValidWriteIdList(Configuration conf, String fullTableName) {
+ public static ValidTxnWriteIdList getValidTxnWriteIdList(Configuration conf) {
String txnString = conf.get(ValidTxnWriteIdList.VALID_TABLES_WRITEIDS_KEY);
ValidTxnWriteIdList validTxnList = new ValidTxnWriteIdList(txnString);
+ return validTxnList;
+ }
+
+ /**
+ * Extract the ValidWriteIdList for the given table from the list of tables' ValidWriteIdList.
+ */
+ public static ValidWriteIdList getTableValidWriteIdList(Configuration conf, String fullTableName) {
+ ValidTxnWriteIdList validTxnList = getValidTxnWriteIdList(conf);
return validTxnList.getTableValidWriteIdList(fullTableName);
}
http://git-wip-us.apache.org/repos/asf/hive/blob/ad20ff4b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 318b4cf..ebf89eb 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -40,8 +40,10 @@ import java.util.Queue;
import java.util.Set;
import java.util.TreeSet;
import java.util.UUID;
+import java.util.function.Supplier;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
+import java.util.stream.Collectors;
import org.antlr.runtime.ClassicToken;
import org.antlr.runtime.CommonToken;
@@ -63,6 +65,8 @@ import org.apache.hadoop.fs.permission.FsAction;
import org.apache.hadoop.hive.common.FileUtils;
import org.apache.hadoop.hive.common.ObjectPair;
import org.apache.hadoop.hive.common.StatsSetupConst;
+import org.apache.hadoop.hive.common.ValidTxnList;
+import org.apache.hadoop.hive.common.ValidTxnWriteIdList;
import org.apache.hadoop.hive.common.StatsSetupConst.StatDB;
import org.apache.hadoop.hive.common.metrics.common.MetricsConstant;
import org.apache.hadoop.hive.conf.HiveConf;
@@ -117,6 +121,7 @@ import org.apache.hadoop.hive.ql.exec.Task;
import org.apache.hadoop.hive.ql.exec.TaskFactory;
import org.apache.hadoop.hive.ql.exec.UnionOperator;
import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.hooks.Entity;
import org.apache.hadoop.hive.ql.hooks.ReadEntity;
import org.apache.hadoop.hive.ql.hooks.WriteEntity;
import org.apache.hadoop.hive.ql.hooks.WriteEntity.WriteType;
@@ -14522,7 +14527,33 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
QueryResultsCache.LookupInfo lookupInfo = null;
String queryString = getQueryStringForCache(astNode);
if (queryString != null) {
- lookupInfo = new QueryResultsCache.LookupInfo(queryString);
+ lookupInfo = new QueryResultsCache.LookupInfo(queryString,
+ new Supplier<ValidTxnWriteIdList>() {
+ ValidTxnWriteIdList cachedWriteIdList = null;
+ @Override
+ public ValidTxnWriteIdList get() {
+ if (cachedWriteIdList == null) {
+ // TODO: Once HIVE-18948 is in, should be able to retrieve writeIdList from the conf.
+ //cachedWriteIdList = AcidUtils.getValidTxnWriteIdList(conf);
+ //
+ List<String> transactionalTables = tablesFromReadEntities(inputs)
+ .stream()
+ .filter(table -> AcidUtils.isTransactionalTable(table))
+ .map(table -> table.getFullyQualifiedName())
+ .collect(Collectors.toList());
+ try {
+ String txnString = conf.get(ValidTxnList.VALID_TXNS_KEY);
+ cachedWriteIdList =
+ getTxnMgr().getValidWriteIds(transactionalTables, txnString);
+ } catch (Exception err) {
+ String msg = "Error while getting the txnWriteIdList for tables " + transactionalTables
+ + " and validTxnList " + conf.get(ValidTxnList.VALID_TXNS_KEY);
+ throw new RuntimeException(msg, err);
+ }
+ }
+ return cachedWriteIdList;
+ }
+ });
}
return lookupInfo;
}
@@ -14620,9 +14651,31 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
return false;
}
+ if (!conf.getBoolVar(ConfVars.HIVE_QUERY_RESULTS_CACHE_NONTRANSACTIONAL_TABLES_ENABLED)) {
+ List<Table> nonTransactionalTables = getNonTransactionalTables();
+ if (nonTransactionalTables.size() > 0) {
+ LOG.info("Not eligible for results caching - query contains non-transactional tables {}",
+ nonTransactionalTables);
+ return false;
+ }
+ }
return true;
}
+ private static Set<Table> tablesFromReadEntities(Set<ReadEntity> readEntities) {
+ return readEntities.stream()
+ .filter(entity -> entity.getType() == Entity.Type.TABLE)
+ .map(entity -> entity.getTable())
+ .collect(Collectors.toSet());
+ }
+
+ private List<Table> getNonTransactionalTables() {
+ return tablesFromReadEntities(inputs)
+ .stream()
+ .filter(table -> !AcidUtils.isTransactionalTable(table))
+ .collect(Collectors.toList());
+ }
+
/**
* Check the query results cache to see if the query represented by the lookupInfo can be
* answered using the results cache. If the cache contains a suitable entry, the semantic analyzer
http://git-wip-us.apache.org/repos/asf/hive/blob/ad20ff4b/ql/src/test/queries/clientpositive/results_cache_1.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/results_cache_1.q b/ql/src/test/queries/clientpositive/results_cache_1.q
index b4149fd..f05c793 100644
--- a/ql/src/test/queries/clientpositive/results_cache_1.q
+++ b/ql/src/test/queries/clientpositive/results_cache_1.q
@@ -4,6 +4,7 @@
--! qt:dataset:cbo_t1
set hive.query.results.cache.enabled=true;
+set hive.query.results.cache.nontransactional.tables.enabled=true;
explain
select count(*) from src a join src b on (a.key = b.key);
http://git-wip-us.apache.org/repos/asf/hive/blob/ad20ff4b/ql/src/test/queries/clientpositive/results_cache_2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/results_cache_2.q b/ql/src/test/queries/clientpositive/results_cache_2.q
index 2fbb381..d939b8e 100644
--- a/ql/src/test/queries/clientpositive/results_cache_2.q
+++ b/ql/src/test/queries/clientpositive/results_cache_2.q
@@ -1,6 +1,7 @@
--! qt:dataset:src
set hive.query.results.cache.enabled=true;
+set hive.query.results.cache.nontransactional.tables.enabled=true;
set hive.fetch.task.conversion=more;
-- Test 1: fetch task
http://git-wip-us.apache.org/repos/asf/hive/blob/ad20ff4b/ql/src/test/queries/clientpositive/results_cache_capacity.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/results_cache_capacity.q b/ql/src/test/queries/clientpositive/results_cache_capacity.q
index c29ff29..2d4f03a 100644
--- a/ql/src/test/queries/clientpositive/results_cache_capacity.q
+++ b/ql/src/test/queries/clientpositive/results_cache_capacity.q
@@ -1,6 +1,7 @@
--! qt:dataset:src
set hive.query.results.cache.enabled=true;
+set hive.query.results.cache.nontransactional.tables.enabled=true;
-- Allow results cache to hold entries up to 125 bytes
-- The single row queries are small enough to fit in the cache (103 bytes)
http://git-wip-us.apache.org/repos/asf/hive/blob/ad20ff4b/ql/src/test/queries/clientpositive/results_cache_empty_result.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/results_cache_empty_result.q b/ql/src/test/queries/clientpositive/results_cache_empty_result.q
index a18a6c0..51668e8 100644
--- a/ql/src/test/queries/clientpositive/results_cache_empty_result.q
+++ b/ql/src/test/queries/clientpositive/results_cache_empty_result.q
@@ -1,6 +1,7 @@
--! qt:dataset:src
set hive.query.results.cache.enabled=true;
+set hive.query.results.cache.nontransactional.tables.enabled=true;
explain
select count(*), key from src a where key < 0 group by key;
http://git-wip-us.apache.org/repos/asf/hive/blob/ad20ff4b/ql/src/test/queries/clientpositive/results_cache_invalidation.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/results_cache_invalidation.q b/ql/src/test/queries/clientpositive/results_cache_invalidation.q
new file mode 100644
index 0000000..0ef5c66
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/results_cache_invalidation.q
@@ -0,0 +1,90 @@
+--! qt:dataset:src
+
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+
+create table tab1 (key string, value string) stored as orc tblproperties ('transactional'='true');
+create table tab2 (key string, value string) stored as orc tblproperties ('transactional'='true');
+
+insert into tab1 select * from default.src;
+insert into tab2 select * from default.src;
+
+set hive.query.results.cache.enabled=true;
+
+set test.comment="Run queries to load into cache";
+set test.comment;
+
+-- Q1
+explain
+select count(*) from tab1 a where key >= 0;
+select count(*) from tab1 a where key >= 0;
+
+-- Q2
+explain
+select max(key) from tab2;
+select max(key) from tab2;
+
+-- Q3
+explain
+select count(*) from tab1 join tab2 on (tab1.key = tab2.key);
+select count(*) from tab1 join tab2 on (tab1.key = tab2.key);
+
+set test.comment="Q1 should now be able to use cache";
+set test.comment;
+explain
+select count(*) from tab1 a where key >= 0;
+select count(*) from tab1 a where key >= 0;
+
+set test.comment="Q2 should now be able to use cache";
+set test.comment;
+explain
+select max(key) from tab2;
+select max(key) from tab2;
+
+set test.comment="Q3 should now be able to use cache";
+set test.comment;
+explain
+select count(*) from tab1 join tab2 on (tab1.key = tab2.key);
+select count(*) from tab1 join tab2 on (tab1.key = tab2.key);
+
+-- Update tab1 which should invalidate Q1 and Q3.
+insert into tab1 values ('88', 'val_88');
+
+set test.comment="Q1 should not use cache";
+set test.comment;
+explain
+select count(*) from tab1 a where key >= 0;
+select count(*) from tab1 a where key >= 0;
+
+set test.comment="Q2 should still use cache since tab2 not updated";
+set test.comment;
+explain
+select max(key) from tab2;
+select max(key) from tab2;
+
+set test.comment="Q3 should not use cache";
+set test.comment;
+explain
+select count(*) from tab1 join tab2 on (tab1.key = tab2.key);
+select count(*) from tab1 join tab2 on (tab1.key = tab2.key);
+
+-- Update tab2 which should invalidate Q2 and Q3.
+insert into tab2 values ('88', 'val_88');
+
+set test.comment="Q1 should use cache";
+set test.comment;
+explain
+select count(*) from tab1 a where key >= 0;
+select count(*) from tab1 a where key >= 0;
+
+set test.comment="Q2 should not use cache";
+set test.comment;
+explain
+select max(key) from tab2;
+select max(key) from tab2;
+
+set test.comment="Q3 should not use cache";
+set test.comment;
+explain
+select count(*) from tab1 join tab2 on (tab1.key = tab2.key);
+select count(*) from tab1 join tab2 on (tab1.key = tab2.key);
http://git-wip-us.apache.org/repos/asf/hive/blob/ad20ff4b/ql/src/test/queries/clientpositive/results_cache_lifetime.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/results_cache_lifetime.q b/ql/src/test/queries/clientpositive/results_cache_lifetime.q
index 1140404..0eb1c03 100644
--- a/ql/src/test/queries/clientpositive/results_cache_lifetime.q
+++ b/ql/src/test/queries/clientpositive/results_cache_lifetime.q
@@ -1,6 +1,7 @@
--! qt:dataset:src
set hive.query.results.cache.enabled=true;
+set hive.query.results.cache.nontransactional.tables.enabled=true;
set hive.query.results.cache.max.entry.lifetime=2;
-- This query used the cache from results_cache_1.q. Load it up.
http://git-wip-us.apache.org/repos/asf/hive/blob/ad20ff4b/ql/src/test/queries/clientpositive/results_cache_quoted_identifiers.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/results_cache_quoted_identifiers.q b/ql/src/test/queries/clientpositive/results_cache_quoted_identifiers.q
index 29595ed..09f8cc9 100644
--- a/ql/src/test/queries/clientpositive/results_cache_quoted_identifiers.q
+++ b/ql/src/test/queries/clientpositive/results_cache_quoted_identifiers.q
@@ -10,6 +10,7 @@ create table quoted1 (
insert into quoted1 select key, key, value, value from src;
set hive.query.results.cache.enabled=true;
+set hive.query.results.cache.nontransactional.tables.enabled=true;
explain
select max(`_c1`), max(`int`), max(`col 3`), max(`col``4`) from quoted1;
http://git-wip-us.apache.org/repos/asf/hive/blob/ad20ff4b/ql/src/test/queries/clientpositive/results_cache_temptable.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/results_cache_temptable.q b/ql/src/test/queries/clientpositive/results_cache_temptable.q
index fc41722..7c315cc 100644
--- a/ql/src/test/queries/clientpositive/results_cache_temptable.q
+++ b/ql/src/test/queries/clientpositive/results_cache_temptable.q
@@ -1,5 +1,6 @@
--! qt:dataset:src
set hive.query.results.cache.enabled=true;
+set hive.query.results.cache.nontransactional.tables.enabled=true;
create table rct (key string, value string);
load data local inpath '../../data/files/kv1.txt' overwrite into table rct;
http://git-wip-us.apache.org/repos/asf/hive/blob/ad20ff4b/ql/src/test/queries/clientpositive/results_cache_transactional.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/results_cache_transactional.q b/ql/src/test/queries/clientpositive/results_cache_transactional.q
new file mode 100644
index 0000000..13fb848
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/results_cache_transactional.q
@@ -0,0 +1,57 @@
+--! qt:dataset:src
+
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+
+create table tab1 (key string, value string) stored as orc tblproperties ('transactional'='true');
+create table tab2 (key string, value string) stored as orc tblproperties ('transactional'='true');
+
+insert into tab1 select * from default.src;
+insert into tab2 select * from default.src;
+
+set hive.query.results.cache.enabled=true;
+set hive.query.results.cache.nontransactional.tables.enabled=false;
+
+explain
+select max(key) from tab1;
+select max(key) from tab1;
+
+set test.comment="Query on transactional table should use cache";
+set test.comment;
+explain
+select max(key) from tab1;
+select max(key) from tab1;
+
+explain
+select count(*) from tab1 join tab2 on (tab1.key = tab2.key);
+select count(*) from tab1 join tab2 on (tab1.key = tab2.key);
+
+set test.comment="Join on transactional tables, should use cache";
+set test.comment;
+explain
+select count(*) from tab1 join tab2 on (tab1.key = tab2.key);
+select count(*) from tab1 join tab2 on (tab1.key = tab2.key);
+
+
+-- Non-transactional tables
+
+explain
+select max(key) from src;
+select max(key) from src;
+
+set test.comment="Query on non-transactional table should not use cache";
+set test.comment;
+explain
+select max(key) from src;
+select max(key) from src;
+
+explain
+select count(*) from tab1 join src on (tab1.key = src.key);
+select count(*) from tab1 join src on (tab1.key = src.key);
+
+set test.comment="Join uses non-transactional table, should not use cache";
+set test.comment;
+explain
+select count(*) from tab1 join src on (tab1.key = src.key);
+select count(*) from tab1 join src on (tab1.key = src.key);
+
http://git-wip-us.apache.org/repos/asf/hive/blob/ad20ff4b/ql/src/test/queries/clientpositive/results_cache_with_masking.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/results_cache_with_masking.q b/ql/src/test/queries/clientpositive/results_cache_with_masking.q
index e9e7982..db27525 100644
--- a/ql/src/test/queries/clientpositive/results_cache_with_masking.q
+++ b/ql/src/test/queries/clientpositive/results_cache_with_masking.q
@@ -4,6 +4,7 @@ set hive.mapred.mode=nonstrict;
set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
set hive.query.results.cache.enabled=true;
+set hive.query.results.cache.nontransactional.tables.enabled=true;
create table masking_test as select cast(key as int) as key, value from src;
http://git-wip-us.apache.org/repos/asf/hive/blob/ad20ff4b/ql/src/test/results/clientpositive/llap/results_cache_invalidation.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/results_cache_invalidation.q.out b/ql/src/test/results/clientpositive/llap/results_cache_invalidation.q.out
new file mode 100644
index 0000000..c76de92
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/results_cache_invalidation.q.out
@@ -0,0 +1,793 @@
+PREHOOK: query: create table tab1 (key string, value string) stored as orc tblproperties ('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tab1
+POSTHOOK: query: create table tab1 (key string, value string) stored as orc tblproperties ('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tab1
+PREHOOK: query: create table tab2 (key string, value string) stored as orc tblproperties ('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tab2
+POSTHOOK: query: create table tab2 (key string, value string) stored as orc tblproperties ('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tab2
+PREHOOK: query: insert into tab1 select * from default.src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@tab1
+POSTHOOK: query: insert into tab1 select * from default.src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@tab1
+POSTHOOK: Lineage: tab1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: tab1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: insert into tab2 select * from default.src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@tab2
+POSTHOOK: query: insert into tab2 select * from default.src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@tab2
+POSTHOOK: Lineage: tab2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: tab2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+test.comment="Run queries to load into cache"
+PREHOOK: query: explain
+select count(*) from tab1 a where key >= 0
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from tab1 a where key >= 0
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 91 Data size: 16192 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (UDFToDouble(key) >= 0.0D) (type: boolean)
+ Statistics: Num rows: 30 Data size: 5338 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 30 Data size: 5338 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Execution mode: llap
+ LLAP IO: may be used (ACID table)
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from tab1 a where key >= 0
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tab1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from tab1 a where key >= 0
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tab1
+#### A masked pattern was here ####
+500
+PREHOOK: query: explain
+select max(key) from tab2
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select max(key) from tab2
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: tab2
+ Statistics: Num rows: 91 Data size: 16192 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: key
+ Statistics: Num rows: 91 Data size: 16192 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: max(key)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
+ Execution mode: llap
+ LLAP IO: may be used (ACID table)
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: max(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select max(key) from tab2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tab2
+#### A masked pattern was here ####
+POSTHOOK: query: select max(key) from tab2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tab2
+#### A masked pattern was here ####
+98
+PREHOOK: query: explain
+select count(*) from tab1 join tab2 on (tab1.key = tab2.key)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from tab1 join tab2 on (tab1.key = tab2.key)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: tab1
+ Statistics: Num rows: 91 Data size: 16192 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 87 Data size: 15480 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 87 Data size: 15480 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 87 Data size: 15480 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: llap
+ LLAP IO: may be used (ACID table)
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: tab2
+ Statistics: Num rows: 91 Data size: 16192 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 87 Data size: 15480 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 87 Data size: 15480 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 87 Data size: 15480 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: llap
+ LLAP IO: may be used (ACID table)
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ Statistics: Num rows: 95 Data size: 17028 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from tab1 join tab2 on (tab1.key = tab2.key)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tab1
+PREHOOK: Input: default@tab2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from tab1 join tab2 on (tab1.key = tab2.key)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tab1
+POSTHOOK: Input: default@tab2
+#### A masked pattern was here ####
+1028
+test.comment="Q1 should now be able to use cache"
+PREHOOK: query: explain
+select count(*) from tab1 a where key >= 0
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from tab1 a where key >= 0
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+ Cached Query Result: true
+
+PREHOOK: query: select count(*) from tab1 a where key >= 0
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tab1
+POSTHOOK: query: select count(*) from tab1 a where key >= 0
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tab1
+500
+test.comment="Q2 should now be able to use cache"
+PREHOOK: query: explain
+select max(key) from tab2
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select max(key) from tab2
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+ Cached Query Result: true
+
+PREHOOK: query: select max(key) from tab2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tab2
+POSTHOOK: query: select max(key) from tab2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tab2
+98
+test.comment="Q3 should now be able to use cache"
+PREHOOK: query: explain
+select count(*) from tab1 join tab2 on (tab1.key = tab2.key)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from tab1 join tab2 on (tab1.key = tab2.key)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+ Cached Query Result: true
+
+PREHOOK: query: select count(*) from tab1 join tab2 on (tab1.key = tab2.key)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tab1
+PREHOOK: Input: default@tab2
+POSTHOOK: query: select count(*) from tab1 join tab2 on (tab1.key = tab2.key)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tab1
+POSTHOOK: Input: default@tab2
+1028
+PREHOOK: query: insert into tab1 values ('88', 'val_88')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@tab1
+POSTHOOK: query: insert into tab1 values ('88', 'val_88')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@tab1
+POSTHOOK: Lineage: tab1.key SCRIPT []
+POSTHOOK: Lineage: tab1.value SCRIPT []
+test.comment="Q1 should not use cache"
+PREHOOK: query: explain
+select count(*) from tab1 a where key >= 0
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from tab1 a where key >= 0
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 110 Data size: 19504 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (UDFToDouble(key) >= 0.0D) (type: boolean)
+ Statistics: Num rows: 36 Data size: 6383 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ Statistics: Num rows: 36 Data size: 6383 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Execution mode: llap
+ LLAP IO: may be used (ACID table)
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from tab1 a where key >= 0
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tab1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from tab1 a where key >= 0
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tab1
+#### A masked pattern was here ####
+501
+test.comment="Q2 should still use cache since tab2 not updated"
+PREHOOK: query: explain
+select max(key) from tab2
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select max(key) from tab2
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+ Cached Query Result: true
+
+PREHOOK: query: select max(key) from tab2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tab2
+POSTHOOK: query: select max(key) from tab2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tab2
+98
+test.comment="Q3 should not use cache"
+PREHOOK: query: explain
+select count(*) from tab1 join tab2 on (tab1.key = tab2.key)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from tab1 join tab2 on (tab1.key = tab2.key)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: tab1
+ Statistics: Num rows: 110 Data size: 19504 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 105 Data size: 18617 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 105 Data size: 18617 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 105 Data size: 18617 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: llap
+ LLAP IO: may be used (ACID table)
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: tab2
+ Statistics: Num rows: 91 Data size: 16192 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 87 Data size: 15480 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 87 Data size: 15480 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 87 Data size: 15480 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: llap
+ LLAP IO: may be used (ACID table)
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ Statistics: Num rows: 115 Data size: 20478 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from tab1 join tab2 on (tab1.key = tab2.key)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tab1
+PREHOOK: Input: default@tab2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from tab1 join tab2 on (tab1.key = tab2.key)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tab1
+POSTHOOK: Input: default@tab2
+#### A masked pattern was here ####
+1028
+PREHOOK: query: insert into tab2 values ('88', 'val_88')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@tab2
+POSTHOOK: query: insert into tab2 values ('88', 'val_88')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@tab2
+POSTHOOK: Lineage: tab2.key SCRIPT []
+POSTHOOK: Lineage: tab2.value SCRIPT []
+test.comment="Q1 should use cache"
+PREHOOK: query: explain
+select count(*) from tab1 a where key >= 0
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from tab1 a where key >= 0
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+ Cached Query Result: true
+
+PREHOOK: query: select count(*) from tab1 a where key >= 0
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tab1
+POSTHOOK: query: select count(*) from tab1 a where key >= 0
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tab1
+501
+test.comment="Q2 should not use cache"
+PREHOOK: query: explain
+select max(key) from tab2
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select max(key) from tab2
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: tab2
+ Statistics: Num rows: 110 Data size: 19504 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: key
+ Statistics: Num rows: 110 Data size: 19504 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: max(key)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string)
+ Execution mode: llap
+ LLAP IO: may be used (ACID table)
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: max(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select max(key) from tab2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tab2
+#### A masked pattern was here ####
+POSTHOOK: query: select max(key) from tab2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tab2
+#### A masked pattern was here ####
+98
+test.comment="Q3 should not use cache"
+PREHOOK: query: explain
+select count(*) from tab1 join tab2 on (tab1.key = tab2.key)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from tab1 join tab2 on (tab1.key = tab2.key)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: tab1
+ Statistics: Num rows: 110 Data size: 19504 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 105 Data size: 18617 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 105 Data size: 18617 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 105 Data size: 18617 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: llap
+ LLAP IO: may be used (ACID table)
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: tab2
+ Statistics: Num rows: 110 Data size: 19504 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 105 Data size: 18617 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 105 Data size: 18617 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 105 Data size: 18617 Basic stats: COMPLETE Column stats: NONE
+ Execution mode: llap
+ LLAP IO: may be used (ACID table)
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col0 (type: string)
+ Statistics: Num rows: 115 Data size: 20478 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: count()
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint)
+ Reducer 3
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from tab1 join tab2 on (tab1.key = tab2.key)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tab1
+PREHOOK: Input: default@tab2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from tab1 join tab2 on (tab1.key = tab2.key)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tab1
+POSTHOOK: Input: default@tab2
+#### A masked pattern was here ####
+1029