You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@trafodion.apache.org by hz...@apache.org on 2017/07/26 16:02:57 UTC

[4/5] incubator-trafodion git commit: [TRAFODION-2628] Runtime work for elasticity

[TRAFODION-2628] Runtime work for elasticity

Adding a CPU array, similar to what's used in the compiler, to the CLI
globals. Make sure that when the compiler didn't specify a CPU (node)
id for an ESP, don't pick a node id that no longer exists.

Made a similar change for the SSMP.

Also got rid of some dead or now obsolete code.


Project: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/commit/8f5fd239
Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/tree/8f5fd239
Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/diff/8f5fd239

Branch: refs/heads/master
Commit: 8f5fd239f04166c0e59689a1659020b3ab460d6f
Parents: 5605fbb
Author: Hans Zeller <hz...@apache.org>
Authored: Wed Jul 12 22:46:05 2017 +0000
Committer: Hans Zeller <hz...@apache.org>
Committed: Wed Jul 12 22:46:05 2017 +0000

----------------------------------------------------------------------
 core/sql/cli/Globals.cpp          | 13 ++-------
 core/sql/cli/Globals.h            | 27 ++++++++++++------
 core/sql/common/ComRtUtils.cpp    | 51 +++++++++++++++-------------------
 core/sql/common/ComRtUtils.h      | 28 ++++---------------
 core/sql/executor/ex_frag_rt.cpp  | 23 +++++++++++----
 core/sql/executor/ex_frag_rt.h    |  1 +
 core/sql/runtimestats/ssmpipc.cpp | 20 ++++++-------
 7 files changed, 78 insertions(+), 85 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/8f5fd239/core/sql/cli/Globals.cpp
----------------------------------------------------------------------
diff --git a/core/sql/cli/Globals.cpp b/core/sql/cli/Globals.cpp
index 95fb000..f9678ec 100644
--- a/core/sql/cli/Globals.cpp
+++ b/core/sql/cli/Globals.cpp
@@ -187,18 +187,9 @@ void CliGlobals::init( NABoolean espProcess,
 
   ComRtGetProcessPriority(myPriority_);
   savedPriority_ = (short)myPriority_;
-  myNumSegs_ = 0;
-  myNumCpus_ = 0;
-  SEGMENT_INFO * segs = new(&executorMemory_) SEGMENT_INFO[MAX_NO_OF_SEGMENTS];
-  ComRtGetSegsInfo(segs, MAX_NO_OF_SEGMENTS, myNumSegs_,
-           (NAHeap *)&executorMemory_);
-  for (Lng32 i = 0; i < myNumSegs_; i++)
-    {
-      myNumCpus_ += segs[i].noOfCpus_;
-    }
-  NADELETEARRAY(segs, MAX_NO_OF_SEGMENTS, SEGMENT_INFO, &executorMemory_);
+  myNumCpus_ = ComRtGetCPUArray(cpuArray_, (NAHeap *)&executorMemory_);
 
-   // create global structures for IPC environment
+  // create global structures for IPC environment
 #if !(defined(__SSCP) || defined(__SSMP))
 
   // check if Measure is enabled and allocate Measure process counters.

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/8f5fd239/core/sql/cli/Globals.h
----------------------------------------------------------------------
diff --git a/core/sql/cli/Globals.h b/core/sql/cli/Globals.h
index c0dc1b7..d2ad4c7 100644
--- a/core/sql/cli/Globals.h
+++ b/core/sql/cli/Globals.h
@@ -362,11 +362,23 @@ inline
 
   Lng32 myNodeNumber() { return myNodeNumber_; };
   Int64 myStartTime() { return myStartTime_; };
-  // following methods are unused in seaquest
-  //LCOV_EXCL_START
-  Lng32 myNumSegments() { return myNumSegs_; };
+
+  // number of CPUs (Linux nodes) in the cluster
   Lng32 myNumCpus() { return myNumCpus_; };
-  //LCOV_EXCL_STOP
+  // an array of myNumCpus_ node ids, the index into
+  // this array is a "logical" CPU id (0 ... myNumCpus_-1)
+  // and the content of the array is a physical id,
+  // a node id (nid) of a node that actually exists
+  // (but still may be down)
+  const Int32 *myCPUArray() { return cpuArray_; }
+  // return the physical node id for a logical id,
+  // the physical id is always >= the logical id
+  Int32 mapLogicalToPhysicalCPUNum(Int32 ix)
+                          { return cpuArray_[ix % myNumCpus_]; }
+  // are logical/physical node ids the same?
+  NABoolean nodeIdsAreContiguous()
+           { return cpuArray_[myNumCpus_-1] == myNumCpus_ - 1; }
+
   IpcPriority myPriority() { return myPriority_; }
   void setMyPriority(IpcPriority p) { myPriority_= p; }
   //LCOV_EXCL_START
@@ -562,11 +574,10 @@ private:
   pid_t myPin_;
   Lng32  myNodeNumber_;
 
-  // number of segments on this system
-  Lng32 myNumSegs_;
-
-  // number of configured cpus on this system
+  // number of configured cpus (Linux nodes) on this system
+  // and an array with their node ids (nids)
   Lng32 myNumCpus_;
+  Int32 *cpuArray_;
 
   IpcPriority myPriority_;
   NABoolean priorityChanged_;

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/8f5fd239/core/sql/common/ComRtUtils.cpp
----------------------------------------------------------------------
diff --git a/core/sql/common/ComRtUtils.cpp b/core/sql/common/ComRtUtils.cpp
index 387c609..e82c23c 100644
--- a/core/sql/common/ComRtUtils.cpp
+++ b/core/sql/common/ComRtUtils.cpp
@@ -812,22 +812,15 @@ char * ComRtGetIsoMappingName()
   return (char*)CharInfo::getCharSetName((CharInfo::CharSet)ime);
 }
 
-
-#ifdef _DEBUG
-#endif 
-
-NABoolean ComRtIsNeoSystem(void)
-{
-   return TRUE;
-}
-
-Lng32 ComRtGetSegsInfo(SEGMENT_INFO *segs, Lng32 maxNoSegs, Lng32 &noOfSegs, 
-				NAHeap *heap)
+Int32 ComRtGetCPUArray(Int32 *&cpuArray, NAHeap *heap)
 {
   Int32 nodeCount = 0;
+  Int32 configuredNodeCount=0;
   Int32 nodeMax = 0;
   MS_Mon_Node_Info_Entry_Type *nodeInfo = NULL;
 
+  cpuArray = NULL;
+
   // Get the number of nodes to know how much info space to allocate
   Int32 error = msg_mon_get_node_info(&nodeCount, 0, NULL);
   if (error != 0)
@@ -836,36 +829,38 @@ Lng32 ComRtGetSegsInfo(SEGMENT_INFO *segs, Lng32 maxNoSegs, Lng32 &noOfSegs,
      return 0;
 
   // Allocate the space for node info entries
-  nodeInfo = new (heap) MS_Mon_Node_Info_Entry_Type[nodeCount];
+  nodeInfo = new(heap) MS_Mon_Node_Info_Entry_Type[nodeCount];
+  cpuArray = new(heap) Int32[nodeCount];
 
-  if (!nodeInfo)
+  if (!nodeInfo || !cpuArray)
      return 0;
   // Get the node info
-  memset(nodeInfo, 0, sizeof(nodeInfo));
+  memset(nodeInfo, 0, sizeof(MS_Mon_Node_Info_Entry_Type) * nodeCount);
   nodeMax = nodeCount;
   error = msg_mon_get_node_info(&nodeCount, nodeMax, nodeInfo);
   if (error != 0)
   { 
      NADELETEBASIC(nodeInfo, heap);
+     NADELETEBASIC(cpuArray, heap);
+     cpuArray = NULL;
      return 0;
   }
-  Int32 i;
-  Int32 j=0;
-  for (i = 0 ; i < nodeCount && j < maxNoSegs ; i++)
+
+  if (nodeCount > nodeMax)
+    // very unlikely, could happen if a node just got added
+    nodeCount = nodeMax;
+
+  for (Int32 i = 0; i < nodeCount; i++)
   {
      if (!nodeInfo[i].spare_node)
-     {
-        segs[j].segName_[0] = '\0';
-        segs[j].segNo_ = nodeInfo[i].nid;
-        segs[j].noOfCpus_ = 1;
-        segs[j].cpuStatus_ = 0x8000;
-        segs[j].nodeDown_ = FALSE;
-        j++;
-     }
+       {
+         cpuArray[configuredNodeCount] = nodeInfo[i].nid;
+         configuredNodeCount++;
+       }
   }
-  noOfSegs = j;
-  NADELETEARRAY(nodeInfo, nodeCount, MS_Mon_Node_Info_Entry_Type, heap);
-  return i;
+
+  NADELETEBASIC(nodeInfo, heap);
+  return configuredNodeCount;
 }
 
 NABoolean ComRtGetCpuStatus(char *nodeName, short cpuNum)

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/8f5fd239/core/sql/common/ComRtUtils.h
----------------------------------------------------------------------
diff --git a/core/sql/common/ComRtUtils.h b/core/sql/common/ComRtUtils.h
index e22a8ab..165b6b4 100644
--- a/core/sql/common/ComRtUtils.h
+++ b/core/sql/common/ComRtUtils.h
@@ -53,7 +53,6 @@ using namespace std;
 #endif // NA_64BIT
 
 #define MAX_SEGMENT_NAME_LEN  255
-#define MAX_NO_OF_SEGMENTS    256
 #define PROCESSNAME_STRING_LEN    40
 #define PROGRAM_NAME_LEN    64
 #define BDR_CLUSTER_NAME_LEN 24
@@ -266,28 +265,13 @@ Lng32 extract_SMDLocation(
 Lng32 validate_SMDLocation(
       char *SMDLocation); /* in */
 
-#ifdef _DEBUG
-// -----------------------------------------------------------------------
-// Print an NT memory map (debug only)
-// -----------------------------------------------------------------------
-#if (MSC_VER >= 1300)
-void ComRtDisplayVirtualMemoryMap(std::ostream* outstream);
-#else
-void ComRtDisplayVirtualMemoryMap(ostream* outstream);
-#endif
-#endif
-NABoolean ComRtIsNeoSystem(void);
+// allocate and populate an array with entries for all the configured
+// CPUs (Trafodion node ids) and return the number of CPUs. Usually,
+// the array will contain node  ids 0 ... n-1, but sometimes there may
+// be holes in the assigned node ids, when CPUs (Linux nodes) get
+// removed from the cluster.
+Int32 ComRtGetCPUArray(Int32 *&cpuArray, NAHeap *heap);
 
-typedef struct SEGMENT_INFO
-{
-  char segName_[MAX_SEGMENT_NAME_LEN+1];
-  Lng32 segNo_;
-  Lng32 noOfCpus_;
-  Lng32 cpuStatus_;
-  NABoolean nodeDown_;
-} SEGMENT_INFO;
-Lng32 ComRtGetSegsInfo(SEGMENT_INFO *segs, Lng32 maxNoSegs, Lng32 &noOfSegs,
-		NAHeap *heap);
 NABoolean ComRtGetCpuStatus(char *nodeName, short cpuNum);
 Lng32 ComRtTransIdToText(Int64 transId, char *buf, short len);
 

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/8f5fd239/core/sql/executor/ex_frag_rt.cpp
----------------------------------------------------------------------
diff --git a/core/sql/executor/ex_frag_rt.cpp b/core/sql/executor/ex_frag_rt.cpp
index 9ea5cf9..f468fd6 100644
--- a/core/sql/executor/ex_frag_rt.cpp
+++ b/core/sql/executor/ex_frag_rt.cpp
@@ -2860,13 +2860,8 @@ ExEspDbEntry *ExEspManager::shareEsp(
   if (*creatingEsp == NULL) // Nowaited Creation of an ESP is not in progress
   {
     nowaitDepth = env_->getCCMaxWaitDepthLow();
-    IpcCpuNum nextCpu = 
-      (lastAssignedCpu_ == (maxCpuNum_ -1)) ? 0 : lastAssignedCpu_ + 1;
-
     if ( cpuNum == IPC_CPU_DONT_CARE )
-      // nextCpu now points to the CPU we should use
-      cpuNum = lastAssignedCpu_ = nextCpu;
-
+      cpuNum = getRoundRobinCPU();
 
     // look up the cache for esp to share
     NABoolean espServerError = FALSE;
@@ -3253,6 +3248,22 @@ ExEspDbEntry *ExEspManager::getEspFromCache(LIST(ExEspDbEntry *) &alreadyAssigne
   return result;
 }
 
+IpcCpuNum ExEspManager::getRoundRobinCPU()
+{
+  IpcCpuNum logCPUNum;
+  CliGlobals *cliGlobals = GetCliGlobals();
+
+  // lastAssignedCpu_ contains a "logical" CPU number (contiguous
+  // values 0 ... n-1). We increment this number by 1 (mod n) for
+  // every call.
+  logCPUNum        =
+  lastAssignedCpu_ = (lastAssignedCpu_ + 1) % cliGlobals->myNumCpus();
+
+  // now map this logical number to an actual node id, in case we have
+  // "holes" in the node ids (nodes were removed dynamically)
+  return cliGlobals->mapLogicalToPhysicalCPUNum(logCPUNum);
+}
+
 void ExEspManager::releaseEsp(ExEspDbEntry *esp, NABoolean verifyEsp,
                  NABoolean prevState)
 {

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/8f5fd239/core/sql/executor/ex_frag_rt.h
----------------------------------------------------------------------
diff --git a/core/sql/executor/ex_frag_rt.h b/core/sql/executor/ex_frag_rt.h
index 362dcc4..9dc6ee5 100644
--- a/core/sql/executor/ex_frag_rt.h
+++ b/core/sql/executor/ex_frag_rt.h
@@ -565,6 +565,7 @@ class ExEspManager
 				  Int32 nowaitDepth,
                                   NABoolean &espServerError,
                                   NABoolean soloFragment);
+  IpcCpuNum getRoundRobinCPU();
 
   // ESP state tracing >>
   enum EspStateEnum

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/8f5fd239/core/sql/runtimestats/ssmpipc.cpp
----------------------------------------------------------------------
diff --git a/core/sql/runtimestats/ssmpipc.cpp b/core/sql/runtimestats/ssmpipc.cpp
index 1d7b0e5..6c24dbe 100755
--- a/core/sql/runtimestats/ssmpipc.cpp
+++ b/core/sql/runtimestats/ssmpipc.cpp
@@ -288,23 +288,23 @@ SsmpGlobals::~SsmpGlobals()
 
 ULng32 SsmpGlobals::allocateServers()
 {
-  SEGMENT_INFO segInfo[MAX_NO_OF_SEGMENTS];
-  Lng32 noOfSegs = 0;
-  Int32 i, j;
-
   // Attempt connect to all SSCPs
   if (sscpServerClass_ == NULL)
   {
-    noOfSegs = ComRtGetSegsInfo(segInfo, MAX_NO_OF_SEGMENTS, noOfSegs,
-		heap_);
-    if (noOfSegs == 0)
+    Int32 noOfNodes;
+    Int32 *cpuArray = NULL;
+
+    noOfNodes = ComRtGetCPUArray(cpuArray, heap_);
+
+    if (noOfNodes == 0)
       return 0;
-    statsGlobals_->setNodesInCluster(noOfSegs);
+    statsGlobals_->setNodesInCluster(noOfNodes);
     sscpServerClass_ = new(heap_) IpcServerClass(ipcEnv_, IPC_SQLSSCP_SERVER, IPC_USE_PROCESS);
-    for (i = 0 ; i < noOfSegs ; i++)
+    for (Int32 i = 0 ; i < noOfNodes ; i++)
     {
-      allocateServer(NULL, 0, segInfo[i].segNo_);
+      allocateServer(NULL, 0, cpuArray[i]);
     }
+    NADELETEBASIC(cpuArray, heap_);
   }
   else
   {