You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@trafodion.apache.org by se...@apache.org on 2018/12/05 21:54:46 UTC

[3/8] trafodion git commit: [TRAFODION-3234] Refactor hive meta calls to be less resource intensive to support hive partitions if needed

[TRAFODION-3234] Refactor hive meta calls to be less resource intensive to support hive partitions if needed

There was yet another method that called HiveMetaStoreClient.listPartitions API. This method is
called whenever the hive table entry is picked from the NATable cache.

Replaced listParitions call with Trafodion specific HDFSClient.getHiveTableModificationTS call
that is less expensive.

HiveMetaStoreClient is now made as ThreadLocal variable to ensure that HiveMetaStoreClient methods
can be invoked in a thread safe manner.


Project: http://git-wip-us.apache.org/repos/asf/trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/trafodion/commit/bfd7a8f6
Tree: http://git-wip-us.apache.org/repos/asf/trafodion/tree/bfd7a8f6
Diff: http://git-wip-us.apache.org/repos/asf/trafodion/diff/bfd7a8f6

Branch: refs/heads/master
Commit: bfd7a8f6210bf27ed8c0b1ecd209971abf47f31b
Parents: 923ca57
Author: selvaganesang <se...@esgyn.com>
Authored: Mon Nov 26 23:50:13 2018 +0000
Committer: selvaganesang <se...@esgyn.com>
Committed: Fri Nov 30 00:20:22 2018 +0000

----------------------------------------------------------------------
 core/sql/executor/ExExeUtilGet.cpp              |  2 +-
 core/sql/executor/hiveHook.cpp                  | 41 +++++--------
 core/sql/optimizer/NATable.cpp                  | 12 ++--
 core/sql/optimizer/hiveHook.h                   | 10 ++--
 core/sql/sqlcomp/CmpDescribe.cpp                |  2 +-
 .../main/java/org/trafodion/sql/HiveClient.java | 61 +++++++++++++++-----
 core/sql/ustat/hs_la.cpp                        |  2 +-
 7 files changed, 74 insertions(+), 56 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/trafodion/blob/bfd7a8f6/core/sql/executor/ExExeUtilGet.cpp
----------------------------------------------------------------------
diff --git a/core/sql/executor/ExExeUtilGet.cpp b/core/sql/executor/ExExeUtilGet.cpp
index 080072e..09a199a 100644
--- a/core/sql/executor/ExExeUtilGet.cpp
+++ b/core/sql/executor/ExExeUtilGet.cpp
@@ -6217,7 +6217,7 @@ short ExExeUtilHiveMDaccessTcb::work()
             while (i < tblNames_.entries())
               {
                 hiveMD_->getTableDesc(schForHive_, tblNames_[i]->c_str(), 
-                      0, FALSE, FALSE, FALSE /*dont read partn info*/);
+                      FALSE, FALSE, FALSE /*dont read partn info*/);
                 i++;
               }
 

http://git-wip-us.apache.org/repos/asf/trafodion/blob/bfd7a8f6/core/sql/executor/hiveHook.cpp
----------------------------------------------------------------------
diff --git a/core/sql/executor/hiveHook.cpp b/core/sql/executor/hiveHook.cpp
index 5922599..284ba5c 100644
--- a/core/sql/executor/hiveHook.cpp
+++ b/core/sql/executor/hiveHook.cpp
@@ -299,7 +299,6 @@ struct hive_tbl_desc* HiveMetaData::getFakedTableDesc(const char* tblName)
 
 struct hive_tbl_desc* HiveMetaData::getTableDesc( const char* schemaName,
                                                  const char* tblName,
-                                                 Int64 expirationTS,
                                                  NABoolean validateOnly,
                                                  NABoolean rereadFromMD,
                                                  NABoolean readPartnInfo)
@@ -310,8 +309,8 @@ struct hive_tbl_desc* HiveMetaData::getTableDesc( const char* schemaName,
 
       if ( !(strcmp(ptr->tblName_, tblName)
              ||strcmp(ptr->schName_, schemaName))) {
-        if (validate(ptr->tblID_, ptr->redeftime(), schemaName, tblName))
-          return ptr;
+        if ((NOT rereadFromMD) && (validate(ptr)))
+           return ptr;
         else {
           // table changed, delete it and re-read below
           if (tbl_ == ptr)
@@ -367,25 +366,21 @@ struct hive_tbl_desc* HiveMetaData::getTableDesc( const char* schemaName,
    return hiveTableDesc;
 }
 
-NABoolean HiveMetaData::validate(Int32 tableId, Int64 redefTS, 
-                                 const char* schName, const char* tblName)
+NABoolean HiveMetaData::validate(hive_tbl_desc *hDesc)
 {
-   NABoolean result = FALSE;
-
-   // validate creation timestamp
-
    Int64 currentRedefTime = 0;
-   HVC_RetCode retCode = HiveClient_JNI::getRedefTime(schName, tblName, 
+   HVC_RetCode retCode = HiveClient_JNI::getRedefTime(hDesc->schName_, hDesc->tblName_,
                                                  currentRedefTime);
    if ((retCode != HVC_OK) && (retCode != HVC_DONE)) {
      return recordError((Int32)retCode, "HiveClient_JNI::getRedefTime()");
    }
-   if ((retCode == HVC_DONE) || (currentRedefTime != redefTS))
-     return result;
-   else
-     return TRUE;
-
-  return result;
+   if ((retCode == HVC_DONE) || (currentRedefTime != hDesc->redeftime())) 
+     return FALSE;
+  
+   // object has been validated a short time ago
+   hDesc->setRedeftime(currentRedefTime);
+   
+   return TRUE;
 }
 
 hive_tbl_desc::hive_tbl_desc(NAHeap *heap, Int32 tblID, const char* name, const char* schName, 
@@ -400,11 +395,11 @@ hive_tbl_desc::hive_tbl_desc(NAHeap *heap, Int32 tblID, const char* name, const
      : heap_(heap), tblID_(tblID), 
        viewOriginalText_(NULL), viewExpandedText_(NULL),
        sd_(sd), tblParams_(tp),
-       creationTS_(creationTS), pkey_(pk), next_(NULL)
+       creationTS_(creationTS), 
+       redefineTS_(-1), pkey_(pk), next_(NULL)
 {  
   tblName_ = strduph(name, heap_);
   schName_ = strduph(schName, heap_);
-  validationTS_ = JULIANTIMESTAMP();
 
   if (owner)
     owner_ = strduph(owner, heap_);
@@ -575,14 +570,8 @@ Int32 hive_tbl_desc::getSortColNum(const char* name)
 Int64 hive_tbl_desc::redeftime()
 {
   Int64 result = creationTS_;
-
-  struct hive_sd_desc* sd = sd_;
-
-  while (sd) {
-    if (sd->creationTS_ > result)
-      result = sd->creationTS_;
-    sd = sd->next_;
-  }
+  if (redefineTS_ !=  -1)
+      result = redefineTS_;
   return result;
 }
 

http://git-wip-us.apache.org/repos/asf/trafodion/blob/bfd7a8f6/core/sql/optimizer/NATable.cpp
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/NATable.cpp b/core/sql/optimizer/NATable.cpp
index 2149f31..23e4354 100644
--- a/core/sql/optimizer/NATable.cpp
+++ b/core/sql/optimizer/NATable.cpp
@@ -7592,10 +7592,12 @@ NATable * NATableDB::get(const ExtendedQualName* key, BindWA* bindWA, NABoolean
               if (objName.getUnqualifiedSchemaNameAsAnsiString() == defSchema)
                 sName = hiveMetaDB_->getDefaultSchemaName();
 
-              // validate Hive table timestamps
-              if (!hiveMetaDB_->validate(cachedNATable->getHiveTableId(),
-                                         cachedNATable->getRedefTime(),
-                                         sName.data(), tName.data()))
+              // validate Hive table timestamps to check if there is change
+              // in directory timestamp
+              if (hiveMetaDB_->getTableDesc(sName,
+                                            tName,
+                                            TRUE /*validate only*/,
+                                            (CmpCommon::getDefault(TRAF_RELOAD_NATABLE_CACHE) == DF_ON), TRUE) == NULL)
                 removeEntry = TRUE;
 
               // validate HDFS stats and update them in-place, if needed
@@ -8448,7 +8450,7 @@ NATable * NATableDB::get(CorrName& corrName, BindWA * bindWA,
          htbl = hiveMetaDB_->getFakedTableDesc(tableNameInt);
        else
          htbl = hiveMetaDB_->getTableDesc(schemaNameInt, tableNameInt,
-                0, FALSE,
+                FALSE,
                 // reread Hive Table Desc from MD.
                 (CmpCommon::getDefault(TRAF_RELOAD_NATABLE_CACHE) == DF_ON),
                 TRUE);

http://git-wip-us.apache.org/repos/asf/trafodion/blob/bfd7a8f6/core/sql/optimizer/hiveHook.h
----------------------------------------------------------------------
diff --git a/core/sql/optimizer/hiveHook.h b/core/sql/optimizer/hiveHook.h
index 9bcc7f9..3f4448d 100644
--- a/core/sql/optimizer/hiveHook.h
+++ b/core/sql/optimizer/hiveHook.h
@@ -242,9 +242,8 @@ struct hive_tbl_desc
   char* owner_;
   char* tableType_;
   Int64 creationTS_;
-  // last time we validated the redefinition time
-  Int64 validationTS_;
-
+  // redefineTS_ = hdfs modification time of the hive files too
+  Int64 redefineTS_;
   // next 2 fields are used if hive object is a view.
   // Contents are populated during HiveMetaData::getTableDesc.
   // original text is what was used at view creation time.
@@ -312,6 +311,7 @@ struct hive_tbl_desc
   Int32 getSortColNum(const char* name);
 
   Int64 redeftime();
+  void setRedeftime(Int64 redeftime) { redefineTS_ = redeftime; }
 };
 
 class HiveMetaData
@@ -325,7 +325,6 @@ public:
   
   struct hive_tbl_desc* getTableDesc(const char* schemaName,
                                      const char* tblName,
-                                     Int64 expirationTS,
                                      NABoolean validateOnly,
                                      // force to reread from Hive MD
                                      NABoolean rereadFromMD,
@@ -333,8 +332,7 @@ public:
   struct hive_tbl_desc* getFakedTableDesc(const char* tblName);
   
   // validate a cached hive table descriptor
-  NABoolean validate(Int32 tableId, Int64 redefTS, 
-                     const char* schName, const char* tblName);
+  NABoolean validate(hive_tbl_desc *hDesc);
   
   // iterator over all tables in a Hive schema (default)
   // or iterate over all schemas in the Hive metadata

http://git-wip-us.apache.org/repos/asf/trafodion/blob/bfd7a8f6/core/sql/sqlcomp/CmpDescribe.cpp
----------------------------------------------------------------------
diff --git a/core/sql/sqlcomp/CmpDescribe.cpp b/core/sql/sqlcomp/CmpDescribe.cpp
index 79492ee..4660b8e 100644
--- a/core/sql/sqlcomp/CmpDescribe.cpp
+++ b/core/sql/sqlcomp/CmpDescribe.cpp
@@ -2374,7 +2374,7 @@ short CmpDescribeHiveTable (
    // identifiers at a later point, or wait until Hive supports delimited identifiers
   schemaNameInt.toLower();
   tableNameInt.toLower();
-  hive_tbl_desc* htd = md->getTableDesc(schemaNameInt, tableNameInt, 0, FALSE,
+  hive_tbl_desc* htd = md->getTableDesc(schemaNameInt, tableNameInt, FALSE,
                 // reread Hive Table Desc from MD.
                 (CmpCommon::getDefault(TRAF_RELOAD_NATABLE_CACHE) == DF_ON),
                 TRUE);

http://git-wip-us.apache.org/repos/asf/trafodion/blob/bfd7a8f6/core/sql/src/main/java/org/trafodion/sql/HiveClient.java
----------------------------------------------------------------------
diff --git a/core/sql/src/main/java/org/trafodion/sql/HiveClient.java b/core/sql/src/main/java/org/trafodion/sql/HiveClient.java
index 152be60..5423efe 100644
--- a/core/sql/src/main/java/org/trafodion/sql/HiveClient.java
+++ b/core/sql/src/main/java/org/trafodion/sql/HiveClient.java
@@ -22,6 +22,7 @@
 package org.trafodion.sql;
 
 import java.io.IOException;
+import java.io.FileNotFoundException;
 import java.util.HashMap;
 import java.util.ArrayList;
 import java.util.List;
@@ -92,7 +93,8 @@ public class HiveClient {
     private final String lockPath="/trafodion/traflock";
 
     private static HiveConf hiveConf = null;
-    private static HiveMetaStoreClient hmsClient  ;
+    private static ThreadLocal<HiveMetaStoreClient> hiveMetaClient  ;
+    private static HiveMetaStoreClient hmsClient;
     private static String ddlTimeConst = null;
 
     private static Statement stmt = null;
@@ -104,20 +106,32 @@ public class HiveClient {
          confFile = System.getenv("TRAF_CONF") + "/log4j.sql.config";
          PropertyConfigurator.configure(confFile);
          hiveConf = new HiveConf();
+         hiveMetaClient = new ThreadLocal<HiveMetaStoreClient>();
          try {
-             hmsClient = new HiveMetaStoreClient(hiveConf, null);
+             hmsClient = getHiveMetaClient();
              ddlTimeConst = getDDLTimeConstant();
          } catch (MetaException me)
          {
              throw new RuntimeException("Checked MetaException from HiveClient static block");
          }
     }
-
+ 
     public static boolean init() 
-    {
-       return true;
+    { 
+        return true;
     }
 
+   private static HiveMetaStoreClient getHiveMetaClient() throws org.apache.hadoop.hive.metastore.api.MetaException 
+   {
+       HiveMetaStoreClient ts_hmsClient;
+       ts_hmsClient = hiveMetaClient.get(); 
+       if (ts_hmsClient == null) {
+          ts_hmsClient = new HiveMetaStoreClient(hiveConf, null);
+          hiveMetaClient.set(ts_hmsClient);
+       }
+       return ts_hmsClient;
+   }  
+
     public static boolean close() 
     {	
         hmsClient.close();	
@@ -128,19 +142,20 @@ public class HiveClient {
         throws MetaException, TException, UnknownDBException 
     {
         if (logger.isDebugEnabled()) logger.debug("HiveClient.exists(" + schName + " , " + tblName + ") called.");
-        boolean result = hmsClient.tableExists(schName, tblName);
+        boolean result = getHiveMetaClient().tableExists(schName, tblName);
         return result;
     }
 
 
     public static long getRedefTime(String schName, String tblName)
-        throws MetaException, TException, ClassCastException, NullPointerException, NumberFormatException 
+        throws MetaException, TException, IOException
     {
         Table table;
+        long modificationTime;
         if (logger.isDebugEnabled()) logger.debug("HiveClient.getRedefTime(" + schName + " , " + 
                      tblName + ") called.");
         try {
-            table = hmsClient.getTable(schName, tblName);
+            table = getHiveMetaClient().getTable(schName, tblName);
             if (logger.isDebugEnabled()) logger.debug("getTable returns null for " + schName + "." + tblName + ".");
             if (table == null)
                 return 0;
@@ -149,7 +164,6 @@ public class HiveClient {
             if (logger.isDebugEnabled()) logger.debug("Hive table no longer exists.");
             return 0;
         }
-
         long redefTime = table.getCreateTime();
         if (table.getParameters() != null){
             // those would be used without reflection
@@ -160,13 +174,28 @@ public class HiveClient {
             if (rfTime != null)
                 redefTime = Long.parseLong(rfTime);
         }
+        // Get the lastest partition/file timestamp 
+        int numPartKeys = table.getPartitionKeysSize();
+        String rootDir = table.getSd().getLocation();
+        long dirTime = 0;
+        if (rootDir != null) {
+           try {
+              dirTime = HDFSClient.getHiveTableMaxModificationTs(rootDir, numPartKeys);
+           } catch (FileNotFoundException e) {
+           // ignore this exception
+           }
+        }
+        if (dirTime > redefTime)
+           modificationTime = dirTime;
+        else
+           modificationTime = redefTime;
         if (logger.isDebugEnabled()) logger.debug("RedefTime is " + redefTime);
-        return redefTime ;
+        return modificationTime;
     }
 
     public static Object[] getAllSchemas() throws MetaException 
     {
-        List<String> schemaList = (hmsClient.getAllDatabases());
+        List<String> schemaList = (getHiveMetaClient().getAllDatabases());
         if (schemaList != null)
            return schemaList.toArray();
         else
@@ -177,11 +206,11 @@ public class HiveClient {
         throws MetaException, TException 
     {
         try {
-        Database db = hmsClient.getDatabase(schName);
+        Database db = getHiveMetaClient().getDatabase(schName);
         if (db == null)
             return null;
 
-        List<String> tableList = hmsClient.getAllTables(schName);
+        List<String> tableList = getHiveMetaClient().getAllTables(schName);
         if (tableList != null)
            return tableList.toArray();
         else
@@ -281,7 +310,7 @@ public class HiveClient {
   {
      Table table;
      try {
-        table = hmsClient.getTable(schName, tblName);
+        table = getHiveMetaClient().getTable(schName, tblName);
      } catch (NoSuchObjectException x) {
          return false; 
      } 
@@ -340,7 +369,7 @@ public class HiveClient {
            i++;
         }
         if (readPartn) {
-           List<String> partNamesList = hmsClient.listPartitionNames(schName, tblName, (short)-1);
+           List<String> partNamesList = getHiveMetaClient().listPartitionNames(schName, tblName, (short)-1);
            if (partNamesList != null) {
               partNames = new String[partNamesList.size()];
               partNames = partNamesList.toArray(partNames); 
@@ -348,7 +377,7 @@ public class HiveClient {
               partKeyValues = new String[partNames.length][];
               for (i = 0; i < partNames.length; i++) {
                   partKeyValues[i] = new String[numPartKeys];
-                  partKeyValues[i] = (String[])hmsClient.partitionNameToVals(partNames[i]).toArray(partKeyValues[i]); 
+                  partKeyValues[i] = (String[])getHiveMetaClient().partitionNameToVals(partNames[i]).toArray(partKeyValues[i]); 
               }
            }
         } 

http://git-wip-us.apache.org/repos/asf/trafodion/blob/bfd7a8f6/core/sql/ustat/hs_la.cpp
----------------------------------------------------------------------
diff --git a/core/sql/ustat/hs_la.cpp b/core/sql/ustat/hs_la.cpp
index 96cebe8..f945985 100644
--- a/core/sql/ustat/hs_la.cpp
+++ b/core/sql/ustat/hs_la.cpp
@@ -949,7 +949,7 @@ NABoolean HSHiveTableDef::objExists(NABoolean createExternalTable)
     hiveTblDesc_ = hiveMetaDB->getFakedTableDesc(obj.data());
   else
     hiveTblDesc_ = hiveMetaDB->getTableDesc(sch.data(), obj.data(),
-                0, FALSE,
+                FALSE,
                 // reread Hive Table Desc from MD.
                 (CmpCommon::getDefault(TRAF_RELOAD_NATABLE_CACHE) == DF_ON),
                 TRUE);