You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by sz...@apache.org on 2020/03/02 08:48:18 UTC

[hive] branch master updated: HIVE-22583: LLAP cache always misses with non-vectorized serde readers such as OpenCSV (Adam Szita, reviewed by Slim Bougerra)

This is an automated email from the ASF dual-hosted git repository.

szita pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new bdd99f1  HIVE-22583: LLAP cache always misses with non-vectorized serde readers such as OpenCSV (Adam Szita, reviewed by Slim Bougerra)
bdd99f1 is described below

commit bdd99f1441967011cd1b74555ff6471e2b04dac1
Author: Adam Szita <sz...@cloudera.com>
AuthorDate: Wed Dec 4 16:25:42 2019 +0100

    HIVE-22583: LLAP cache always misses with non-vectorized serde readers such as OpenCSV (Adam Szita, reviewed by Slim Bougerra)
---
 .../test/resources/testconfiguration.properties    |  1 +
 .../llap/io/encoded/SerDeEncodedDataReader.java    |  4 ++
 ql/src/test/queries/clientpositive/csv_llap.q      | 17 ++++++++
 .../results/clientpositive/llap/csv_llap.q.out     | 46 ++++++++++++++++++++++
 .../results/clientpositive/llap/llap_io_etl.q.out  |  2 +-
 5 files changed, 69 insertions(+), 1 deletion(-)

diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 18697cc..375e22c 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -539,6 +539,7 @@ minillaplocal.query.files=\
   correlationoptimizer2.q,\
   correlationoptimizer4.q,\
   correlationoptimizer6.q,\
+  csv_llap.q,\
   default_constraint.q,\
   disable_merge_for_bucketing.q,\
   cross_prod_1.q,\
diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/SerDeEncodedDataReader.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/SerDeEncodedDataReader.java
index c9e9c02..f489dda 100644
--- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/SerDeEncodedDataReader.java
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/SerDeEncodedDataReader.java
@@ -773,6 +773,10 @@ public class SerDeEncodedDataReader extends CallableWithNdc<Void>
       // Note that we cache each slice separately. We could cache them together at the end, but
       // then we won't be able to pass them to users without inc-refing explicitly.
       ColumnEncoding[] encodings = sd.getEncodings();
+      // Force creation of cache data entry for root (struct) column if not present.
+      if (encodings[0] != null && sd.getData()[0] == null) {
+        createArrayToCache(sd, 0, null);
+      }
       for (int i = 0; i < encodings.length; ++i) {
         // Make data consistent with encodings, don't store useless information.
         if (sd.getData()[i] == null) {
diff --git a/ql/src/test/queries/clientpositive/csv_llap.q b/ql/src/test/queries/clientpositive/csv_llap.q
new file mode 100644
index 0000000..c262c92
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/csv_llap.q
@@ -0,0 +1,17 @@
+CREATE EXTERNAL TABLE csv_llap_test (ts int, id string, b1 boolean, b2 boolean, b3 boolean, b4 boolean)
+ROW FORMAT SERDE
+  'org.apache.hadoop.hive.serde2.OpenCSVSerde'
+STORED AS INPUTFORMAT
+  'org.apache.hadoop.mapred.TextInputFormat'
+OUTPUTFORMAT
+  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat';
+
+LOAD DATA LOCAL INPATH '../../data/files/small_csv.csv' INTO TABLE csv_llap_test;
+--location '../../data/files/small_csv.csv';
+
+SELECT MIN(ts) FROM csv_llap_test;
+
+set hive.llap.io.cache.only=true;
+--an exception would be thrown from here on for cache miss
+
+SELECT MIN(ts) FROM csv_llap_test;
diff --git a/ql/src/test/results/clientpositive/llap/csv_llap.q.out b/ql/src/test/results/clientpositive/llap/csv_llap.q.out
new file mode 100644
index 0000000..1bf6d09
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/csv_llap.q.out
@@ -0,0 +1,46 @@
+PREHOOK: query: CREATE EXTERNAL TABLE csv_llap_test (ts int, id string, b1 boolean, b2 boolean, b3 boolean, b4 boolean)
+ROW FORMAT SERDE
+  'org.apache.hadoop.hive.serde2.OpenCSVSerde'
+STORED AS INPUTFORMAT
+  'org.apache.hadoop.mapred.TextInputFormat'
+OUTPUTFORMAT
+  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@csv_llap_test
+POSTHOOK: query: CREATE EXTERNAL TABLE csv_llap_test (ts int, id string, b1 boolean, b2 boolean, b3 boolean, b4 boolean)
+ROW FORMAT SERDE
+  'org.apache.hadoop.hive.serde2.OpenCSVSerde'
+STORED AS INPUTFORMAT
+  'org.apache.hadoop.mapred.TextInputFormat'
+OUTPUTFORMAT
+  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@csv_llap_test
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/small_csv.csv' INTO TABLE csv_llap_test
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@csv_llap_test
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/small_csv.csv' INTO TABLE csv_llap_test
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@csv_llap_test
+PREHOOK: query: SELECT MIN(ts) FROM csv_llap_test
+PREHOOK: type: QUERY
+PREHOOK: Input: default@csv_llap_test
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT MIN(ts) FROM csv_llap_test
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@csv_llap_test
+#### A masked pattern was here ####
+00117
+PREHOOK: query: SELECT MIN(ts) FROM csv_llap_test
+PREHOOK: type: QUERY
+PREHOOK: Input: default@csv_llap_test
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT MIN(ts) FROM csv_llap_test
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@csv_llap_test
+#### A masked pattern was here ####
+00117
diff --git a/ql/src/test/results/clientpositive/llap/llap_io_etl.q.out b/ql/src/test/results/clientpositive/llap/llap_io_etl.q.out
index 1a967fa..a527469 100644
--- a/ql/src/test/results/clientpositive/llap/llap_io_etl.q.out
+++ b/ql/src/test/results/clientpositive/llap/llap_io_etl.q.out
@@ -180,7 +180,7 @@ Stage-1 HIVE COUNTERS:
    RECORDS_OUT_OPERATOR_TS_0: 2
    TOTAL_TABLE_ROWS_WRITTEN: 2
 Stage-1 LLAP IO COUNTERS:
-   CACHE_MISS_BYTES: 244
+   CACHE_HIT_BYTES: 244
    NUM_DECODED_BATCHES: 1
    NUM_VECTOR_BATCHES: 1
    ROWS_EMITTED: 2