You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ay...@apache.org on 2023/04/05 09:35:52 UTC
[hive] branch master updated: HIVE-27185: Iceberg: Cache iceberg table while loading for stats. (#4165). (Ayush Saxena, reviewed by Denys Kuzmenko)
This is an automated email from the ASF dual-hosted git repository.
ayushsaxena pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 920c7e17082 HIVE-27185: Iceberg: Cache iceberg table while loading for stats. (#4165). (Ayush Saxena, reviewed by Denys Kuzmenko)
920c7e17082 is described below
commit 920c7e17082e7cfcafa06dac12159386912fb2d7
Author: Ayush Saxena <ay...@apache.org>
AuthorDate: Wed Apr 5 15:05:44 2023 +0530
HIVE-27185: Iceberg: Cache iceberg table while loading for stats. (#4165). (Ayush Saxena, reviewed by Denys Kuzmenko)
---
.../src/java/org/apache/hadoop/hive/conf/HiveConf.java | 4 ++++
.../iceberg/mr/hive/HiveIcebergStorageHandler.java | 16 ++++++++++++++--
.../apache/hadoop/hive/ql/session/SessionStateUtil.java | 5 ++---
3 files changed, 20 insertions(+), 5 deletions(-)
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 9f61c132bde..8b666164212 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -6292,6 +6292,10 @@ public class HiveConf extends Configuration {
return getVar(conf, ConfVars.HIVEQUERYSTRING, EncoderDecoderFactory.URL_ENCODER_DECODER);
}
+ public static String getQueryId(Configuration conf) {
+ return getVar(conf, HiveConf.ConfVars.HIVEQUERYID, "");
+ }
+
public void setQueryString(String query) {
setQueryString(this, query);
}
diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
index 80943f97e95..bcadebbf4c0 100644
--- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
+++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
@@ -49,6 +49,7 @@ import org.apache.hadoop.hive.metastore.api.LockType;
import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
import org.apache.hadoop.hive.ql.Context;
import org.apache.hadoop.hive.ql.Context.Operation;
+import org.apache.hadoop.hive.ql.QueryState;
import org.apache.hadoop.hive.ql.ddl.table.AbstractAlterTableDesc;
import org.apache.hadoop.hive.ql.ddl.table.AlterTableType;
import org.apache.hadoop.hive.ql.ddl.table.create.like.CreateTableLikeDesc;
@@ -315,8 +316,8 @@ public class HiveIcebergStorageHandler implements HiveStoragePredicateHandler, H
@Override
public Map<String, String> getBasicStatistics(Partish partish) {
org.apache.hadoop.hive.ql.metadata.Table hmsTable = partish.getTable();
- TableDesc tableDesc = Utilities.getTableDesc(hmsTable);
- Table table = Catalogs.loadTable(conf, tableDesc.getProperties());
+ // For write queries where rows got modified, don't fetch from cache as values could have changed.
+ Table table = getTable(hmsTable);
String statsSource = HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_USE_STATS_FROM).toLowerCase();
Map<String, String> stats = Maps.newHashMap();
switch (statsSource) {
@@ -349,6 +350,17 @@ public class HiveIcebergStorageHandler implements HiveStoragePredicateHandler, H
return stats;
}
+ private Table getTable(org.apache.hadoop.hive.ql.metadata.Table hmsTable) {
+ Table table;
+ final Optional<QueryState> queryState = SessionStateUtil.getQueryState(conf);
+ if (!queryState.isPresent() || queryState.get().getNumModifiedRows() > 0) {
+ table = IcebergTableUtil.getTable(conf, hmsTable.getTTable(), true);
+ } else {
+ table = IcebergTableUtil.getTable(conf, hmsTable.getTTable());
+ }
+ return table;
+ }
+
/**
* No need for exclusive locks when writing, since Iceberg tables use optimistic concurrency when writing
* and only lock the table during the commit operation.
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionStateUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionStateUtil.java
index b7791fe3625..f831f0920b6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionStateUtil.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionStateUtil.java
@@ -115,9 +115,8 @@ public class SessionStateUtil {
return addResource(conf, COMMIT_INFO_PREFIX + tableName, newCommitInfoMap);
}
- private static Optional<QueryState> getQueryState(Configuration conf) {
- return Optional.ofNullable(SessionState.get())
- .map(session -> session.getQueryState(conf.get(HiveConf.ConfVars.HIVEQUERYID.varname, "")));
+ public static Optional<QueryState> getQueryState(Configuration conf) {
+ return Optional.ofNullable(SessionState.get()).map(ss -> ss.getQueryState(HiveConf.getQueryId(conf)));
}
/**