You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@hive.apache.org by "ASF GitHub Bot (Jira)" <ji...@apache.org> on 2021/09/21 07:43:00 UTC

[jira] [Work logged] (HIVE-24776) Reduce HMS DB calls during stats updates

     [ https://issues.apache.org/jira/browse/HIVE-24776?focusedWorklogId=653408&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-653408 ]

ASF GitHub Bot logged work on HIVE-24776:
-----------------------------------------

                Author: ASF GitHub Bot
            Created on: 21/Sep/21 07:42
            Start Date: 21/Sep/21 07:42
    Worklog Time Spent: 10m 
      Work Description: maheshk114 commented on a change in pull request #2636:
URL: https://github.com/apache/hive/pull/2636#discussion_r712748333



##########
File path: standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java
##########
@@ -9700,8 +9705,7 @@ private void writeMPartitionColumnStatistics(Table table, Partition partition,
       Map<String, MPartitionColumnStatistics> oldStats = getPartitionColStats(table, statsDesc
           .getPartName(), colNames, colStats.getEngine());
 
-      MPartition mPartition = getMPartition(
-          catName, statsDesc.getDbName(), statsDesc.getTableName(), partVals, mTable);
+      //MPartition mPartition = convertToMPart(partition,mTable,false);
       if (partition == null) {

Review comment:
       this check can be moved up.

##########
File path: standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java
##########
@@ -9687,10 +9688,14 @@ private void writeMPartitionColumnStatistics(Table table, Partition partition,
       List<ColumnStatisticsObj> statsObjs = colStats.getStatsObj();
       ColumnStatisticsDesc statsDesc = colStats.getStatsDesc();
       String catName = statsDesc.isSetCatName() ? statsDesc.getCatName() : getDefaultCatalog(conf);
-      MTable mTable = ensureGetMTable(catName, statsDesc.getDbName(), statsDesc.getTableName());
-      Table table = convertToTable(mTable);
-      Partition partition = convertToPart(getMPartition(
-          catName, statsDesc.getDbName(), statsDesc.getTableName(), partVals, mTable), false);
+      if(table == null) {
+        MTable mTable = ensureGetMTable(catName, statsDesc.getDbName(), statsDesc.getTableName());
+        table = convertToTable(mTable);
+      }
+      //MTable mTable = ensureGetMTable(catName, statsDesc.getDbName(), statsDesc.getTableName());
+      MTable mTable = convertToMTable(table);

Review comment:
       convertToMTable is redundant incase table is null

##########
File path: standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java
##########
@@ -75,6 +75,8 @@
 
 import com.google.common.annotations.VisibleForTesting;
 
+import javax.servlet.http.Part;

Review comment:
       why this import is required ?

##########
File path: standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestObjectStore.java
##########
@@ -691,9 +691,9 @@ private void createPartitionedTable(boolean withPrivileges, boolean withStatisti
 
         ColumnStatisticsObj partStats = new ColumnStatisticsObj("test_part_col", "int", data);
         statsObjList.add(partStats);
-
+        //Table tbl = objectStore.getTable(tbl1.getCatName(),tbl1.getDbName(),tbl1.getTableName());

Review comment:
       remove this line

##########
File path: standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java
##########
@@ -2197,18 +2199,26 @@ private void updatePartitionColumnStatisticsInCache(ColumnStatistics colStats, M
     sharedCache.updatePartitionColStatsInCache(catName, dbName, tblName, partVals, colStats.getStatsObj());
   }
 
-  @Override public Map<String, String> updatePartitionColumnStatistics(ColumnStatistics colStats, List<String> partVals,
+  @Override
+  public Map<String, String> updatePartitionColumnStatistics(Table table,
+      ColumnStatistics colStats, List<String> partVals,
       String validWriteIds, long writeId)
       throws NoSuchObjectException, MetaException, InvalidObjectException, InvalidInputException {
     Map<String, String> newParams =
-        rawStore.updatePartitionColumnStatistics(colStats, partVals, validWriteIds, writeId);
+        rawStore.updatePartitionColumnStatistics(table, colStats, partVals, validWriteIds, writeId);
     // in case of event based cache update, cache is updated during commit txn
     if (newParams != null && !canUseEvents) {
       updatePartitionColumnStatisticsInCache(colStats, newParams, partVals);
     }
     return newParams;
   }
 
+  @Override public Map<String, String> updatePartitionColumnStatistics(ColumnStatistics statsObj, List<String> partVals,
+      String validWriteIds, long writeId)
+      throws NoSuchObjectException, MetaException, InvalidObjectException, InvalidInputException {
+    return updatePartitionColumnStatistics(null, statsObj, partVals, validWriteIds, writeId);

Review comment:
       why not call the rawstore API without table ?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: gitbox-unsubscribe@hive.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


Issue Time Tracking
-------------------

            Worklog Id:     (was: 653408)
    Remaining Estimate: 0h
            Time Spent: 10m

> Reduce HMS DB calls during stats updates
> ----------------------------------------
>
>                 Key: HIVE-24776
>                 URL: https://issues.apache.org/jira/browse/HIVE-24776
>             Project: Hive
>          Issue Type: Improvement
>            Reporter: Rajesh Balamohan
>            Priority: Major
>          Time Spent: 10m
>  Remaining Estimate: 0h
>
>  When adding large number of partitions (100s/1000s) in a table, it ends up making lots of getTable calls which are not needed.
> Lines mentioned below may vary slightly in apache-master. 
> {noformat}
> 	at org.datanucleus.api.jdo.JDOPersistenceManager.jdoRetrieve(JDOPersistenceManager.java:620)
> 	at org.datanucleus.api.jdo.JDOPersistenceManager.retrieve(JDOPersistenceManager.java:637)
> 	at org.datanucleus.api.jdo.JDOPersistenceManager.retrieve(JDOPersistenceManager.java:646)
> 	at org.apache.hadoop.hive.metastore.ObjectStore.getMTable(ObjectStore.java:2112)
> 	at org.apache.hadoop.hive.metastore.ObjectStore.getMTable(ObjectStore.java:2150)
> 	at org.apache.hadoop.hive.metastore.ObjectStore.ensureGetMTable(ObjectStore.java:4578)
> 	at org.apache.hadoop.hive.metastore.ObjectStore.ensureGetTable(ObjectStore.java:4588)
> 	at org.apache.hadoop.hive.metastore.ObjectStore.updatePartitionColumnStatistics(ObjectStore.java:9264)
> 	at sun.reflect.GeneratedMethodAccessor92.invoke(Unknown Source)
> 	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> 	at java.lang.reflect.Method.invoke(Method.java:498)
> 	at org.apache.hadoop.hive.metastore.RawStoreProxy.invoke(RawStoreProxy.java:97)
> 	at com.sun.proxy.$Proxy27.updatePartitionColumnStatistics(Unknown Source)
> 	at org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.updatePartitonColStatsInternal(HiveMetaStore.java:6679)
> 	at org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.updatePartColumnStatsWithMerge(HiveMetaStore.java:8655)
> 	at org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.set_aggr_stats_for(HiveMetaStore.java:8592)
> 	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> 	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> 	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> 	at java.lang.reflect.Method.invoke(Method.java:498)
> 	at org.apache.hadoop.hive.metastore.RetryingHMSHandler.invokeInternal(RetryingHMSHandler.java:147)
> 	at org.apache.hadoop.hive.metastore.RetryingHMSHandler.invoke(RetryingHMSHandler.java:108)
> 	at com.sun.proxy.$Proxy28.set_aggr_stats_for(Unknown Source)
> 	at org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Processor$set_aggr_stats_for.getResult(ThriftHiveMetastore.java:19060)
> 	at org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Processor$set_aggr_stats_for.getResult(ThriftHiveMetastore.java:19044)
> 	at org.apache.thrift.ProcessFunction.process(ProcessFunction.java:39)
> 	at org.apache.thrift.TBaseProcessor.process(TBaseProcessor.java:39)
>  {noformat}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)