You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by sz...@apache.org on 2020/03/02 08:48:18 UTC
[hive] branch master updated: HIVE-22583: LLAP cache always misses
with non-vectorized serde readers such as OpenCSV (Adam Szita,
reviewed by Slim Bougerra)
This is an automated email from the ASF dual-hosted git repository.
szita pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new bdd99f1 HIVE-22583: LLAP cache always misses with non-vectorized serde readers such as OpenCSV (Adam Szita, reviewed by Slim Bougerra)
bdd99f1 is described below
commit bdd99f1441967011cd1b74555ff6471e2b04dac1
Author: Adam Szita <sz...@cloudera.com>
AuthorDate: Wed Dec 4 16:25:42 2019 +0100
HIVE-22583: LLAP cache always misses with non-vectorized serde readers such as OpenCSV (Adam Szita, reviewed by Slim Bougerra)
---
.../test/resources/testconfiguration.properties | 1 +
.../llap/io/encoded/SerDeEncodedDataReader.java | 4 ++
ql/src/test/queries/clientpositive/csv_llap.q | 17 ++++++++
.../results/clientpositive/llap/csv_llap.q.out | 46 ++++++++++++++++++++++
.../results/clientpositive/llap/llap_io_etl.q.out | 2 +-
5 files changed, 69 insertions(+), 1 deletion(-)
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 18697cc..375e22c 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -539,6 +539,7 @@ minillaplocal.query.files=\
correlationoptimizer2.q,\
correlationoptimizer4.q,\
correlationoptimizer6.q,\
+ csv_llap.q,\
default_constraint.q,\
disable_merge_for_bucketing.q,\
cross_prod_1.q,\
diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/SerDeEncodedDataReader.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/SerDeEncodedDataReader.java
index c9e9c02..f489dda 100644
--- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/SerDeEncodedDataReader.java
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/SerDeEncodedDataReader.java
@@ -773,6 +773,10 @@ public class SerDeEncodedDataReader extends CallableWithNdc<Void>
// Note that we cache each slice separately. We could cache them together at the end, but
// then we won't be able to pass them to users without inc-refing explicitly.
ColumnEncoding[] encodings = sd.getEncodings();
+ // Force creation of cache data entry for root (struct) column if not present.
+ if (encodings[0] != null && sd.getData()[0] == null) {
+ createArrayToCache(sd, 0, null);
+ }
for (int i = 0; i < encodings.length; ++i) {
// Make data consistent with encodings, don't store useless information.
if (sd.getData()[i] == null) {
diff --git a/ql/src/test/queries/clientpositive/csv_llap.q b/ql/src/test/queries/clientpositive/csv_llap.q
new file mode 100644
index 0000000..c262c92
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/csv_llap.q
@@ -0,0 +1,17 @@
+CREATE EXTERNAL TABLE csv_llap_test (ts int, id string, b1 boolean, b2 boolean, b3 boolean, b4 boolean)
+ROW FORMAT SERDE
+ 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
+STORED AS INPUTFORMAT
+ 'org.apache.hadoop.mapred.TextInputFormat'
+OUTPUTFORMAT
+ 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat';
+
+LOAD DATA LOCAL INPATH '../../data/files/small_csv.csv' INTO TABLE csv_llap_test;
+--location '../../data/files/small_csv.csv';
+
+SELECT MIN(ts) FROM csv_llap_test;
+
+set hive.llap.io.cache.only=true;
+--an exception would be thrown from here on for cache miss
+
+SELECT MIN(ts) FROM csv_llap_test;
diff --git a/ql/src/test/results/clientpositive/llap/csv_llap.q.out b/ql/src/test/results/clientpositive/llap/csv_llap.q.out
new file mode 100644
index 0000000..1bf6d09
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/csv_llap.q.out
@@ -0,0 +1,46 @@
+PREHOOK: query: CREATE EXTERNAL TABLE csv_llap_test (ts int, id string, b1 boolean, b2 boolean, b3 boolean, b4 boolean)
+ROW FORMAT SERDE
+ 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
+STORED AS INPUTFORMAT
+ 'org.apache.hadoop.mapred.TextInputFormat'
+OUTPUTFORMAT
+ 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@csv_llap_test
+POSTHOOK: query: CREATE EXTERNAL TABLE csv_llap_test (ts int, id string, b1 boolean, b2 boolean, b3 boolean, b4 boolean)
+ROW FORMAT SERDE
+ 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
+STORED AS INPUTFORMAT
+ 'org.apache.hadoop.mapred.TextInputFormat'
+OUTPUTFORMAT
+ 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@csv_llap_test
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/small_csv.csv' INTO TABLE csv_llap_test
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@csv_llap_test
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/small_csv.csv' INTO TABLE csv_llap_test
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@csv_llap_test
+PREHOOK: query: SELECT MIN(ts) FROM csv_llap_test
+PREHOOK: type: QUERY
+PREHOOK: Input: default@csv_llap_test
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT MIN(ts) FROM csv_llap_test
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@csv_llap_test
+#### A masked pattern was here ####
+00117
+PREHOOK: query: SELECT MIN(ts) FROM csv_llap_test
+PREHOOK: type: QUERY
+PREHOOK: Input: default@csv_llap_test
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT MIN(ts) FROM csv_llap_test
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@csv_llap_test
+#### A masked pattern was here ####
+00117
diff --git a/ql/src/test/results/clientpositive/llap/llap_io_etl.q.out b/ql/src/test/results/clientpositive/llap/llap_io_etl.q.out
index 1a967fa..a527469 100644
--- a/ql/src/test/results/clientpositive/llap/llap_io_etl.q.out
+++ b/ql/src/test/results/clientpositive/llap/llap_io_etl.q.out
@@ -180,7 +180,7 @@ Stage-1 HIVE COUNTERS:
RECORDS_OUT_OPERATOR_TS_0: 2
TOTAL_TABLE_ROWS_WRITTEN: 2
Stage-1 LLAP IO COUNTERS:
- CACHE_MISS_BYTES: 244
+ CACHE_HIT_BYTES: 244
NUM_DECODED_BATCHES: 1
NUM_VECTOR_BATCHES: 1
ROWS_EMITTED: 2