You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by dk...@apache.org on 2023/01/20 08:51:19 UTC

[hive] branch master updated: HIVE-26928: LlapIoImpl::getParquetFooterBuffersFromCache throws exception when metadata cache is disabled (Simhadri Govindappa, reviewed by Denys Kuzmenko)

This is an automated email from the ASF dual-hosted git repository.

dkuzmenko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 2cdd5f2ce7b HIVE-26928: LlapIoImpl::getParquetFooterBuffersFromCache throws exception when metadata cache is disabled (Simhadri Govindappa, reviewed by Denys Kuzmenko)
2cdd5f2ce7b is described below

commit 2cdd5f2ce7bc6ea08f73b3737a58c06398aa5910
Author: SimhadriGovindappa <si...@gmail.com>
AuthorDate: Fri Jan 20 14:21:08 2023 +0530

    HIVE-26928: LlapIoImpl::getParquetFooterBuffersFromCache throws exception when metadata cache is disabled (Simhadri Govindappa, reviewed by Denys Kuzmenko)
    
    Closes #3962
---
 .../mr/hive/vector/HiveVectorizedReader.java       |   2 +-
 .../test/queries/positive/llap_cache_disabled.q    |  48 +++++++++
 .../results/positive/llap_cache_disabled.q.out     | 116 +++++++++++++++++++++
 .../org/apache/hadoop/hive/llap/io/api/LlapIo.java |   2 +
 .../hadoop/hive/llap/io/api/impl/LlapIoImpl.java   |   5 +
 5 files changed, 172 insertions(+), 1 deletion(-)

diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/vector/HiveVectorizedReader.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/vector/HiveVectorizedReader.java
index e3d206189af..02ba73a476e 100644
--- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/vector/HiveVectorizedReader.java
+++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/vector/HiveVectorizedReader.java
@@ -222,7 +222,7 @@ public class HiveVectorizedReader {
 
     MemoryBufferOrBuffers footerData = null;
     if (HiveConf.getBoolVar(job, HiveConf.ConfVars.LLAP_IO_ENABLED, LlapProxy.isDaemon()) &&
-        LlapProxy.getIo() != null) {
+        LlapProxy.getIo() != null && LlapProxy.getIo().usingLowLevelCache()) {
       LlapProxy.getIo().initCacheOnlyInputFormat(inputFormat);
       footerData = LlapProxy.getIo().getParquetFooterBuffersFromCache(path, job, fileId);
     }
diff --git a/iceberg/iceberg-handler/src/test/queries/positive/llap_cache_disabled.q b/iceberg/iceberg-handler/src/test/queries/positive/llap_cache_disabled.q
new file mode 100644
index 00000000000..6e7a157567a
--- /dev/null
+++ b/iceberg/iceberg-handler/src/test/queries/positive/llap_cache_disabled.q
@@ -0,0 +1,48 @@
+-- Mask random uuid
+--! qt:replace:/(\s+'uuid'=')\S+('\s*)/$1#Masked#$2/
+
+set hive.llap.io.memory.mode=none;
+set hive.llap.io.enabled=true;
+set hive.vectorized.execution.enabled=true;
+
+CREATE EXTERNAL TABLE llap_items_parquet (itemid INT, price INT, category STRING, name STRING, description STRING) STORED BY ICEBERG STORED AS PARQUET;
+INSERT INTO llap_items_parquet VALUES
+(0, 35000,  'Sedan',     'Model 3', 'Standard range plus'),
+(1, 45000,  'Sedan',     'Model 3', 'Long range'),
+(2, 50000,  'Sedan',     'Model 3', 'Performance'),
+(3, 48000,  'Crossover', 'Model Y', 'Long range'),
+(4, 55000,  'Crossover', 'Model Y', 'Performance'),
+(5, 83000,  'Sports',    'Model S', 'Long range'),
+(6, 123000, 'Sports',   'Model S', 'Plaid');
+
+
+
+CREATE EXTERNAL TABLE llap_orders_parquet (orderid INT, quantity INT, itemid INT, tradets TIMESTAMP) PARTITIONED BY (p1 STRING, p2 STRING) STORED BY ICEBERG STORED AS PARQUET;
+INSERT INTO llap_orders_parquet VALUES
+(0, 48, 5, timestamp('2000-06-04 19:55:46.129'), 'EU', 'DE'),
+(1, 12, 6, timestamp('2007-06-24 19:23:22.829'), 'US', 'TX'),
+(2, 76, 4, timestamp('2018-02-19 23:43:51.995'), 'EU', 'DE'),
+(3, 91, 5, timestamp('2000-07-15 09:09:11.587'), 'US', 'NJ'),
+(4, 18, 6, timestamp('2007-12-02 22:30:39.302'), 'EU', 'ES'),
+(5, 71, 5, timestamp('2010-02-08 20:31:23.430'), 'EU', 'DE'),
+(6, 78, 3, timestamp('2016-02-22 20:37:37.025'), 'EU', 'FR'),
+(7, 88, 0, timestamp('2020-03-26 18:47:40.611'), 'EU', 'FR'),
+(8, 87, 4, timestamp('2003-02-20 00:48:09.139'), 'EU', 'ES'),
+(9, 60, 6, timestamp('2012-08-28 01:35:54.283'), 'EU', 'IT'),
+(10, 24, 5, timestamp('2015-03-28 18:57:50.069'), 'US', 'NY'),
+(11, 42, 2, timestamp('2012-06-27 01:13:32.350'), 'EU', 'UK'),
+(12, 37, 4, timestamp('2020-08-09 01:18:50.153'), 'US', 'NY'),
+(13, 52, 1, timestamp('2019-09-04 01:46:19.558'), 'EU', 'UK'),
+(14, 96, 3, timestamp('2019-03-05 22:00:03.020'), 'US', 'NJ'),
+(15, 18, 3, timestamp('2001-09-11 00:14:12.687'), 'EU', 'FR'),
+(16, 46, 0, timestamp('2013-08-31 02:16:17.878'), 'EU', 'UK'),
+(17, 26, 5, timestamp('2001-02-01 20:05:32.317'), 'EU', 'FR'),
+(18, 68, 5, timestamp('2009-12-29 08:44:08.048'), 'EU', 'ES'),
+(19, 54, 6, timestamp('2015-08-15 01:59:22.177'), 'EU', 'HU'),
+(20, 10, 0, timestamp('2018-05-06 12:56:12.789'), 'US', 'CA');
+
+--select query without any schema change yet
+SELECT i.name, i.description, SUM(o.quantity) FROM llap_items_parquet i JOIN llap_orders_parquet o ON i.itemid = o.itemid  WHERE p1 = 'EU' and i.price >= 50000 GROUP BY i.name, i.description;
+
+set hive.llap.io.memory.mode=cache;
+SELECT i.name, i.description, SUM(o.quantity) FROM llap_items_parquet i JOIN llap_orders_parquet o ON i.itemid = o.itemid  WHERE p1 = 'EU' and i.price >= 50000 GROUP BY i.name, i.description;
diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap_cache_disabled.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap_cache_disabled.q.out
new file mode 100644
index 00000000000..ae111b83329
--- /dev/null
+++ b/iceberg/iceberg-handler/src/test/results/positive/llap_cache_disabled.q.out
@@ -0,0 +1,116 @@
+PREHOOK: query: CREATE EXTERNAL TABLE llap_items_parquet (itemid INT, price INT, category STRING, name STRING, description STRING) STORED BY ICEBERG STORED AS PARQUET
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@llap_items_parquet
+POSTHOOK: query: CREATE EXTERNAL TABLE llap_items_parquet (itemid INT, price INT, category STRING, name STRING, description STRING) STORED BY ICEBERG STORED AS PARQUET
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@llap_items_parquet
+PREHOOK: query: INSERT INTO llap_items_parquet VALUES
+(0, 35000,  'Sedan',     'Model 3', 'Standard range plus'),
+(1, 45000,  'Sedan',     'Model 3', 'Long range'),
+(2, 50000,  'Sedan',     'Model 3', 'Performance'),
+(3, 48000,  'Crossover', 'Model Y', 'Long range'),
+(4, 55000,  'Crossover', 'Model Y', 'Performance'),
+(5, 83000,  'Sports',    'Model S', 'Long range'),
+(6, 123000, 'Sports',   'Model S', 'Plaid')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@llap_items_parquet
+POSTHOOK: query: INSERT INTO llap_items_parquet VALUES
+(0, 35000,  'Sedan',     'Model 3', 'Standard range plus'),
+(1, 45000,  'Sedan',     'Model 3', 'Long range'),
+(2, 50000,  'Sedan',     'Model 3', 'Performance'),
+(3, 48000,  'Crossover', 'Model Y', 'Long range'),
+(4, 55000,  'Crossover', 'Model Y', 'Performance'),
+(5, 83000,  'Sports',    'Model S', 'Long range'),
+(6, 123000, 'Sports',   'Model S', 'Plaid')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@llap_items_parquet
+PREHOOK: query: CREATE EXTERNAL TABLE llap_orders_parquet (orderid INT, quantity INT, itemid INT, tradets TIMESTAMP) PARTITIONED BY (p1 STRING, p2 STRING) STORED BY ICEBERG STORED AS PARQUET
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@llap_orders_parquet
+POSTHOOK: query: CREATE EXTERNAL TABLE llap_orders_parquet (orderid INT, quantity INT, itemid INT, tradets TIMESTAMP) PARTITIONED BY (p1 STRING, p2 STRING) STORED BY ICEBERG STORED AS PARQUET
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@llap_orders_parquet
+PREHOOK: query: INSERT INTO llap_orders_parquet VALUES
+(0, 48, 5, timestamp('2000-06-04 19:55:46.129'), 'EU', 'DE'),
+(1, 12, 6, timestamp('2007-06-24 19:23:22.829'), 'US', 'TX'),
+(2, 76, 4, timestamp('2018-02-19 23:43:51.995'), 'EU', 'DE'),
+(3, 91, 5, timestamp('2000-07-15 09:09:11.587'), 'US', 'NJ'),
+(4, 18, 6, timestamp('2007-12-02 22:30:39.302'), 'EU', 'ES'),
+(5, 71, 5, timestamp('2010-02-08 20:31:23.430'), 'EU', 'DE'),
+(6, 78, 3, timestamp('2016-02-22 20:37:37.025'), 'EU', 'FR'),
+(7, 88, 0, timestamp('2020-03-26 18:47:40.611'), 'EU', 'FR'),
+(8, 87, 4, timestamp('2003-02-20 00:48:09.139'), 'EU', 'ES'),
+(9, 60, 6, timestamp('2012-08-28 01:35:54.283'), 'EU', 'IT'),
+(10, 24, 5, timestamp('2015-03-28 18:57:50.069'), 'US', 'NY'),
+(11, 42, 2, timestamp('2012-06-27 01:13:32.350'), 'EU', 'UK'),
+(12, 37, 4, timestamp('2020-08-09 01:18:50.153'), 'US', 'NY'),
+(13, 52, 1, timestamp('2019-09-04 01:46:19.558'), 'EU', 'UK'),
+(14, 96, 3, timestamp('2019-03-05 22:00:03.020'), 'US', 'NJ'),
+(15, 18, 3, timestamp('2001-09-11 00:14:12.687'), 'EU', 'FR'),
+(16, 46, 0, timestamp('2013-08-31 02:16:17.878'), 'EU', 'UK'),
+(17, 26, 5, timestamp('2001-02-01 20:05:32.317'), 'EU', 'FR'),
+(18, 68, 5, timestamp('2009-12-29 08:44:08.048'), 'EU', 'ES'),
+(19, 54, 6, timestamp('2015-08-15 01:59:22.177'), 'EU', 'HU'),
+(20, 10, 0, timestamp('2018-05-06 12:56:12.789'), 'US', 'CA')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@llap_orders_parquet
+POSTHOOK: query: INSERT INTO llap_orders_parquet VALUES
+(0, 48, 5, timestamp('2000-06-04 19:55:46.129'), 'EU', 'DE'),
+(1, 12, 6, timestamp('2007-06-24 19:23:22.829'), 'US', 'TX'),
+(2, 76, 4, timestamp('2018-02-19 23:43:51.995'), 'EU', 'DE'),
+(3, 91, 5, timestamp('2000-07-15 09:09:11.587'), 'US', 'NJ'),
+(4, 18, 6, timestamp('2007-12-02 22:30:39.302'), 'EU', 'ES'),
+(5, 71, 5, timestamp('2010-02-08 20:31:23.430'), 'EU', 'DE'),
+(6, 78, 3, timestamp('2016-02-22 20:37:37.025'), 'EU', 'FR'),
+(7, 88, 0, timestamp('2020-03-26 18:47:40.611'), 'EU', 'FR'),
+(8, 87, 4, timestamp('2003-02-20 00:48:09.139'), 'EU', 'ES'),
+(9, 60, 6, timestamp('2012-08-28 01:35:54.283'), 'EU', 'IT'),
+(10, 24, 5, timestamp('2015-03-28 18:57:50.069'), 'US', 'NY'),
+(11, 42, 2, timestamp('2012-06-27 01:13:32.350'), 'EU', 'UK'),
+(12, 37, 4, timestamp('2020-08-09 01:18:50.153'), 'US', 'NY'),
+(13, 52, 1, timestamp('2019-09-04 01:46:19.558'), 'EU', 'UK'),
+(14, 96, 3, timestamp('2019-03-05 22:00:03.020'), 'US', 'NJ'),
+(15, 18, 3, timestamp('2001-09-11 00:14:12.687'), 'EU', 'FR'),
+(16, 46, 0, timestamp('2013-08-31 02:16:17.878'), 'EU', 'UK'),
+(17, 26, 5, timestamp('2001-02-01 20:05:32.317'), 'EU', 'FR'),
+(18, 68, 5, timestamp('2009-12-29 08:44:08.048'), 'EU', 'ES'),
+(19, 54, 6, timestamp('2015-08-15 01:59:22.177'), 'EU', 'HU'),
+(20, 10, 0, timestamp('2018-05-06 12:56:12.789'), 'US', 'CA')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@llap_orders_parquet
+PREHOOK: query: SELECT i.name, i.description, SUM(o.quantity) FROM llap_items_parquet i JOIN llap_orders_parquet o ON i.itemid = o.itemid  WHERE p1 = 'EU' and i.price >= 50000 GROUP BY i.name, i.description
+PREHOOK: type: QUERY
+PREHOOK: Input: default@llap_items_parquet
+PREHOOK: Input: default@llap_orders_parquet
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: SELECT i.name, i.description, SUM(o.quantity) FROM llap_items_parquet i JOIN llap_orders_parquet o ON i.itemid = o.itemid  WHERE p1 = 'EU' and i.price >= 50000 GROUP BY i.name, i.description
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@llap_items_parquet
+POSTHOOK: Input: default@llap_orders_parquet
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+Model 3	Performance	42
+Model S	Long range	213
+Model S	Plaid	132
+Model Y	Performance	163
+PREHOOK: query: SELECT i.name, i.description, SUM(o.quantity) FROM llap_items_parquet i JOIN llap_orders_parquet o ON i.itemid = o.itemid  WHERE p1 = 'EU' and i.price >= 50000 GROUP BY i.name, i.description
+PREHOOK: type: QUERY
+PREHOOK: Input: default@llap_items_parquet
+PREHOOK: Input: default@llap_orders_parquet
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: SELECT i.name, i.description, SUM(o.quantity) FROM llap_items_parquet i JOIN llap_orders_parquet o ON i.itemid = o.itemid  WHERE p1 = 'EU' and i.price >= 50000 GROUP BY i.name, i.description
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@llap_items_parquet
+POSTHOOK: Input: default@llap_orders_parquet
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+Model 3	Performance	42
+Model S	Long range	213
+Model S	Plaid	132
+Model Y	Performance	163
diff --git a/llap-client/src/java/org/apache/hadoop/hive/llap/io/api/LlapIo.java b/llap-client/src/java/org/apache/hadoop/hive/llap/io/api/LlapIo.java
index c650288a93a..f1b8c17b727 100644
--- a/llap-client/src/java/org/apache/hadoop/hive/llap/io/api/LlapIo.java
+++ b/llap-client/src/java/org/apache/hadoop/hive/llap/io/api/LlapIo.java
@@ -116,4 +116,6 @@ public interface LlapIo<T> {
    */
   void loadDataIntoCache(LlapDaemonProtocolProtos.CacheEntryList metadata);
 
+  boolean usingLowLevelCache();
+
 }
diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapIoImpl.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapIoImpl.java
index 4634c4639f0..0344b252c54 100644
--- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapIoImpl.java
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapIoImpl.java
@@ -537,4 +537,9 @@ public class LlapIoImpl implements LlapIo<VectorizedRowBatch>, LlapIoDebugDump {
     }
   }
 
+  @Override
+  public boolean usingLowLevelCache() {
+    return useLowLevelCache;
+  }
+
 }