You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ay...@apache.org on 2023/04/05 09:35:52 UTC

[hive] branch master updated: HIVE-27185: Iceberg: Cache iceberg table while loading for stats. (#4165). (Ayush Saxena, reviewed by Denys Kuzmenko)

This is an automated email from the ASF dual-hosted git repository.

ayushsaxena pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 920c7e17082 HIVE-27185: Iceberg: Cache iceberg table while loading for stats. (#4165). (Ayush Saxena, reviewed by Denys Kuzmenko)
920c7e17082 is described below

commit 920c7e17082e7cfcafa06dac12159386912fb2d7
Author: Ayush Saxena <ay...@apache.org>
AuthorDate: Wed Apr 5 15:05:44 2023 +0530

    HIVE-27185: Iceberg: Cache iceberg table while loading for stats. (#4165). (Ayush Saxena, reviewed by Denys Kuzmenko)
---
 .../src/java/org/apache/hadoop/hive/conf/HiveConf.java   |  4 ++++
 .../iceberg/mr/hive/HiveIcebergStorageHandler.java       | 16 ++++++++++++++--
 .../apache/hadoop/hive/ql/session/SessionStateUtil.java  |  5 ++---
 3 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 9f61c132bde..8b666164212 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -6292,6 +6292,10 @@ public class HiveConf extends Configuration {
     return getVar(conf, ConfVars.HIVEQUERYSTRING, EncoderDecoderFactory.URL_ENCODER_DECODER);
   }
 
+  public static String getQueryId(Configuration conf) {
+    return getVar(conf, HiveConf.ConfVars.HIVEQUERYID, "");
+  }
+
   public void setQueryString(String query) {
     setQueryString(this, query);
   }
diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
index 80943f97e95..bcadebbf4c0 100644
--- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
+++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
@@ -49,6 +49,7 @@ import org.apache.hadoop.hive.metastore.api.LockType;
 import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
 import org.apache.hadoop.hive.ql.Context;
 import org.apache.hadoop.hive.ql.Context.Operation;
+import org.apache.hadoop.hive.ql.QueryState;
 import org.apache.hadoop.hive.ql.ddl.table.AbstractAlterTableDesc;
 import org.apache.hadoop.hive.ql.ddl.table.AlterTableType;
 import org.apache.hadoop.hive.ql.ddl.table.create.like.CreateTableLikeDesc;
@@ -315,8 +316,8 @@ public class HiveIcebergStorageHandler implements HiveStoragePredicateHandler, H
   @Override
   public Map<String, String> getBasicStatistics(Partish partish) {
     org.apache.hadoop.hive.ql.metadata.Table hmsTable = partish.getTable();
-    TableDesc tableDesc = Utilities.getTableDesc(hmsTable);
-    Table table = Catalogs.loadTable(conf, tableDesc.getProperties());
+    // For write queries where rows got modified, don't fetch from cache as values could have changed.
+    Table table = getTable(hmsTable);
     String statsSource = HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_USE_STATS_FROM).toLowerCase();
     Map<String, String> stats = Maps.newHashMap();
     switch (statsSource) {
@@ -349,6 +350,17 @@ public class HiveIcebergStorageHandler implements HiveStoragePredicateHandler, H
     return stats;
   }
 
+  private Table getTable(org.apache.hadoop.hive.ql.metadata.Table hmsTable) {
+    Table table;
+    final Optional<QueryState> queryState = SessionStateUtil.getQueryState(conf);
+    if (!queryState.isPresent() || queryState.get().getNumModifiedRows() > 0) {
+      table = IcebergTableUtil.getTable(conf, hmsTable.getTTable(), true);
+    } else {
+      table = IcebergTableUtil.getTable(conf, hmsTable.getTTable());
+    }
+    return table;
+  }
+
   /**
    * No need for exclusive locks when writing, since Iceberg tables use optimistic concurrency when writing
    * and only lock the table during the commit operation.
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionStateUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionStateUtil.java
index b7791fe3625..f831f0920b6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionStateUtil.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionStateUtil.java
@@ -115,9 +115,8 @@ public class SessionStateUtil {
     return addResource(conf, COMMIT_INFO_PREFIX + tableName, newCommitInfoMap);
   }
 
-  private static Optional<QueryState> getQueryState(Configuration conf) {
-    return Optional.ofNullable(SessionState.get())
-        .map(session -> session.getQueryState(conf.get(HiveConf.ConfVars.HIVEQUERYID.varname, "")));
+  public static Optional<QueryState> getQueryState(Configuration conf) {
+    return Optional.ofNullable(SessionState.get()).map(ss -> ss.getQueryState(HiveConf.getQueryId(conf)));
   }
 
   /**