You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by rb...@apache.org on 2017/02/23 22:20:26 UTC

hive git commit: HIVE-15964: LLAP: Llap IO codepath not getting invoked due to file column id mismatch (Rajesh Balamohan, reviewed by Prasanth Jayachandran, Sergey Shelukhin)

Repository: hive
Updated Branches:
  refs/heads/master 53f033583 -> 539d3c626


HIVE-15964: LLAP: Llap IO codepath not getting invoked due to file column id mismatch (Rajesh Balamohan, reviewed by Prasanth Jayachandran, Sergey Shelukhin)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/539d3c62
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/539d3c62
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/539d3c62

Branch: refs/heads/master
Commit: 539d3c6264dc8ae462e906a390dcb1d45a63422e
Parents: 53f0335
Author: Rajesh Balamohan <rb...@apache.org>
Authored: Fri Feb 24 03:50:09 2017 +0530
Committer: Rajesh Balamohan <rb...@apache.org>
Committed: Fri Feb 24 03:50:09 2017 +0530

----------------------------------------------------------------------
 .../hive/llap/io/api/impl/LlapRecordReader.java |  11 +-
 .../test/queries/clientpositive/llap_reader.q   |  40 +++++
 .../clientpositive/llap/llap_reader.q.out       | 167 +++++++++++++++++++
 .../results/clientpositive/llap_reader.q.out    |  86 ++++++++++
 4 files changed, 301 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/539d3c62/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java
----------------------------------------------------------------------
diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java
index 9b1a905..d4e14a8 100644
--- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java
@@ -93,6 +93,8 @@ class LlapRecordReader
 
   private SchemaEvolution evolution;
 
+  private final boolean isAcidScan;
+
   public LlapRecordReader(JobConf job, FileSplit split, List<Integer> includedCols,
       String hostName, ColumnVectorProducer cvp, ExecutorService executor,
       InputFormat<?, ?> sourceInputFormat, Deserializer sourceSerDe, Reporter reporter)
@@ -139,7 +141,7 @@ class LlapRecordReader
       partitionValues = null;
     }
 
-    boolean isAcidScan = HiveConf.getBoolVar(jobConf, ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN);
+    isAcidScan = HiveConf.getBoolVar(jobConf, ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN);
     TypeDescription schema = OrcInputFormat.getDesiredRowTypeDescr(
         job, isAcidScan, Integer.MAX_VALUE);
 
@@ -169,8 +171,11 @@ class LlapRecordReader
 
   private boolean checkOrcSchemaEvolution() {
     for (int i = 0; i < columnCount; ++i) {
-      int colId = columnIds == null ? i : columnIds.get(i);
-      if (!evolution.isPPDSafeConversion(colId)) {
+      int projectedColId = columnIds == null ? i : columnIds.get(i);
+      // Adjust file column index for ORC struct.
+      // LLAP IO does not support ACID. When it supports, this would be auto adjusted.
+      int fileColId =  OrcInputFormat.getRootColumn(!isAcidScan) + projectedColId + 1;
+      if (!evolution.isPPDSafeConversion(fileColId)) {
         LlapIoImpl.LOG.warn("Unsupported schema evolution! Disabling Llap IO for {}", split);
         return false;
       }

http://git-wip-us.apache.org/repos/asf/hive/blob/539d3c62/ql/src/test/queries/clientpositive/llap_reader.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/llap_reader.q b/ql/src/test/queries/clientpositive/llap_reader.q
new file mode 100644
index 0000000..ac0624d
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/llap_reader.q
@@ -0,0 +1,40 @@
+SET hive.vectorized.execution.enabled=true;
+SET hive.llap.io.enabled=true;
+SET hive.map.aggr=false;
+SET hive.exec.post.hooks=;
+
+CREATE TABLE test(f1 int, f2 int, f3 int) stored as orc;
+INSERT INTO TABLE test VALUES (1,1,1), (2,2,2), (3,3,3), (4,4,4);
+
+ALTER TABLE test CHANGE f1 f1 bigint;
+ALTER TABLE test CHANGE f2 f2 bigint;
+ALTER TABLE test CHANGE f3 f3 bigint;
+
+-- llap counters with data and meta cache
+SET hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.PostExecTezSummaryPrinter;
+SELECT count(f1) FROM test GROUP BY f1;
+SELECT count(f1) FROM test GROUP BY f1;
+
+SET hive.exec.post.hooks=;
+CREATE TABLE test_bigint(f1 bigint, f2 bigint, f3 bigint) stored as orc;
+INSERT OVERWRITE TABLE test_bigint select * from test;
+ALTER TABLE test_bigint CHANGE f1 f1 double;
+ALTER TABLE test_bigint CHANGE f2 f2 double;
+ALTER TABLE test_bigint CHANGE f3 f3 double;
+
+-- llap counters with meta cache alone
+SET hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.PostExecTezSummaryPrinter;
+select count(f1) from test_bigint group by f1;
+select count(f1) from test_bigint group by f1;
+
+
+-- Check with ACID table
+SET hive.exec.post.hooks=;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+set hive.support.concurrency=true;
+CREATE TABLE test_acid (f1 int, f2 int, val string) clustered by (val) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true');
+INSERT INTO TABLE test_acid VALUES (1,1,'b1'), (2,2,'b2'), (3,3,'b3'), (4,4,'b4');
+
+-- should not have llap counters
+SET hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.PostExecTezSummaryPrinter;
+SELECT count(f1) FROM test_acid GROUP BY f1;

http://git-wip-us.apache.org/repos/asf/hive/blob/539d3c62/ql/src/test/results/clientpositive/llap/llap_reader.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/llap_reader.q.out b/ql/src/test/results/clientpositive/llap/llap_reader.q.out
new file mode 100644
index 0000000..cc556a9
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/llap_reader.q.out
@@ -0,0 +1,167 @@
+PREHOOK: query: CREATE TABLE test(f1 int, f2 int, f3 int) stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test
+PREHOOK: query: INSERT INTO TABLE test VALUES (1,1,1), (2,2,2), (3,3,3), (4,4,4)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@test
+PREHOOK: query: ALTER TABLE test CHANGE f1 f1 bigint
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@test
+PREHOOK: Output: default@test
+PREHOOK: query: ALTER TABLE test CHANGE f2 f2 bigint
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@test
+PREHOOK: Output: default@test
+PREHOOK: query: ALTER TABLE test CHANGE f3 f3 bigint
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@test
+PREHOOK: Output: default@test
+PREHOOK: query: SELECT count(f1) FROM test GROUP BY f1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+   HDFS_BYTES_READ: 358
+   HDFS_BYTES_WRITTEN: 143
+   HDFS_READ_OPS: 6
+   HDFS_LARGE_READ_OPS: 0
+   HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+   CREATED_FILES: 1
+   DESERIALIZE_ERRORS: 0
+   RECORDS_IN_Map_1: 4
+   RECORDS_OUT_0: 4
+   RECORDS_OUT_INTERMEDIATE_Map_1: 4
+Stage-1 LLAP IO COUNTERS:
+   ALLOCATED_BYTES: 262144
+   ALLOCATED_USED_BYTES: 4
+   CACHE_MISS_BYTES: 7
+   METADATA_CACHE_MISS: 2
+   NUM_DECODED_BATCHES: 1
+   NUM_VECTOR_BATCHES: 1
+   ROWS_EMITTED: 4
+   SELECTED_ROWGROUPS: 1
+1
+1
+1
+1
+PREHOOK: query: SELECT count(f1) FROM test GROUP BY f1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+   HDFS_BYTES_READ: 0
+   HDFS_BYTES_WRITTEN: 143
+   HDFS_READ_OPS: 2
+   HDFS_LARGE_READ_OPS: 0
+   HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+   CREATED_FILES: 1
+   DESERIALIZE_ERRORS: 0
+   RECORDS_IN_Map_1: 4
+   RECORDS_OUT_0: 4
+   RECORDS_OUT_INTERMEDIATE_Map_1: 4
+Stage-1 LLAP IO COUNTERS:
+   CACHE_HIT_BYTES: 7
+   CACHE_MISS_BYTES: 0
+   METADATA_CACHE_HIT: 2
+   NUM_DECODED_BATCHES: 1
+   NUM_VECTOR_BATCHES: 1
+   ROWS_EMITTED: 4
+   SELECTED_ROWGROUPS: 1
+1
+1
+1
+1
+PREHOOK: query: CREATE TABLE test_bigint(f1 bigint, f2 bigint, f3 bigint) stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test_bigint
+PREHOOK: query: INSERT OVERWRITE TABLE test_bigint select * from test
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test
+PREHOOK: Output: default@test_bigint
+PREHOOK: query: ALTER TABLE test_bigint CHANGE f1 f1 double
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@test_bigint
+PREHOOK: Output: default@test_bigint
+PREHOOK: query: ALTER TABLE test_bigint CHANGE f2 f2 double
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@test_bigint
+PREHOOK: Output: default@test_bigint
+PREHOOK: query: ALTER TABLE test_bigint CHANGE f3 f3 double
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@test_bigint
+PREHOOK: Output: default@test_bigint
+PREHOOK: query: select count(f1) from test_bigint group by f1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_bigint
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+   HDFS_BYTES_READ: 595
+   HDFS_BYTES_WRITTEN: 143
+   HDFS_READ_OPS: 6
+   HDFS_LARGE_READ_OPS: 0
+   HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+   CREATED_FILES: 1
+   DESERIALIZE_ERRORS: 0
+   RECORDS_IN_Map_1: 4
+   RECORDS_OUT_0: 4
+   RECORDS_OUT_INTERMEDIATE_Map_1: 4
+Stage-1 LLAP IO COUNTERS:
+   METADATA_CACHE_MISS: 1
+1
+1
+1
+1
+PREHOOK: query: select count(f1) from test_bigint group by f1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_bigint
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+   HDFS_BYTES_READ: 323
+   HDFS_BYTES_WRITTEN: 143
+   HDFS_READ_OPS: 4
+   HDFS_LARGE_READ_OPS: 0
+   HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+   CREATED_FILES: 1
+   DESERIALIZE_ERRORS: 0
+   RECORDS_IN_Map_1: 4
+   RECORDS_OUT_0: 4
+   RECORDS_OUT_INTERMEDIATE_Map_1: 4
+Stage-1 LLAP IO COUNTERS:
+   METADATA_CACHE_HIT: 1
+1
+1
+1
+1
+PREHOOK: query: CREATE TABLE test_acid (f1 int, f2 int, val string) clustered by (val) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test_acid
+PREHOOK: query: INSERT INTO TABLE test_acid VALUES (1,1,'b1'), (2,2,'b2'), (3,3,'b3'), (4,4,'b4')
+PREHOOK: type: QUERY
+PREHOOK: Output: default@test_acid
+PREHOOK: query: SELECT count(f1) FROM test_acid GROUP BY f1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_acid
+#### A masked pattern was here ####
+Stage-1 FILE SYSTEM COUNTERS:
+   HDFS_BYTES_READ: 1567
+   HDFS_BYTES_WRITTEN: 143
+   HDFS_READ_OPS: 12
+   HDFS_LARGE_READ_OPS: 0
+   HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+   CREATED_FILES: 1
+   DESERIALIZE_ERRORS: 0
+   RECORDS_IN_Map_1: 4
+   RECORDS_OUT_0: 4
+   RECORDS_OUT_INTERMEDIATE_Map_1: 4
+1
+1
+1
+1

http://git-wip-us.apache.org/repos/asf/hive/blob/539d3c62/ql/src/test/results/clientpositive/llap_reader.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap_reader.q.out b/ql/src/test/results/clientpositive/llap_reader.q.out
new file mode 100644
index 0000000..dcbd3aa
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap_reader.q.out
@@ -0,0 +1,86 @@
+PREHOOK: query: CREATE TABLE test(f1 int, f2 int, f3 int) stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test
+PREHOOK: query: INSERT INTO TABLE test VALUES (1,1,1), (2,2,2), (3,3,3), (4,4,4)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@test
+PREHOOK: query: ALTER TABLE test CHANGE f1 f1 bigint
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@test
+PREHOOK: Output: default@test
+PREHOOK: query: ALTER TABLE test CHANGE f2 f2 bigint
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@test
+PREHOOK: Output: default@test
+PREHOOK: query: ALTER TABLE test CHANGE f3 f3 bigint
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@test
+PREHOOK: Output: default@test
+PREHOOK: query: SELECT count(f1) FROM test GROUP BY f1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test
+#### A masked pattern was here ####
+1
+1
+1
+1
+PREHOOK: query: SELECT count(f1) FROM test GROUP BY f1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test
+#### A masked pattern was here ####
+1
+1
+1
+1
+PREHOOK: query: CREATE TABLE test_bigint(f1 bigint, f2 bigint, f3 bigint) stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test_bigint
+PREHOOK: query: INSERT OVERWRITE TABLE test_bigint select * from test
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test
+PREHOOK: Output: default@test_bigint
+PREHOOK: query: ALTER TABLE test_bigint CHANGE f1 f1 double
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@test_bigint
+PREHOOK: Output: default@test_bigint
+PREHOOK: query: ALTER TABLE test_bigint CHANGE f2 f2 double
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@test_bigint
+PREHOOK: Output: default@test_bigint
+PREHOOK: query: ALTER TABLE test_bigint CHANGE f3 f3 double
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@test_bigint
+PREHOOK: Output: default@test_bigint
+PREHOOK: query: select count(f1) from test_bigint group by f1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_bigint
+#### A masked pattern was here ####
+1
+1
+1
+1
+PREHOOK: query: select count(f1) from test_bigint group by f1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_bigint
+#### A masked pattern was here ####
+1
+1
+1
+1
+PREHOOK: query: CREATE TABLE test_acid (f1 int, f2 int, val string) clustered by (val) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test_acid
+PREHOOK: query: INSERT INTO TABLE test_acid VALUES (1,1,'b1'), (2,2,'b2'), (3,3,'b3'), (4,4,'b4')
+PREHOOK: type: QUERY
+PREHOOK: Output: default@test_acid
+PREHOOK: query: SELECT count(f1) FROM test_acid GROUP BY f1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_acid
+#### A masked pattern was here ####
+1
+1
+1
+1