You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@trafodion.apache.org by hz...@apache.org on 2017/07/26 16:02:57 UTC
[4/5] incubator-trafodion git commit: [TRAFODION-2628] Runtime work
for elasticity
[TRAFODION-2628] Runtime work for elasticity
Adding a CPU array, similar to what's used in the compiler, to the CLI
globals. Make sure that when the compiler didn't specify a CPU (node)
id for an ESP, don't pick a node id that no longer exists.
Made a similar change for the SSMP.
Also got rid of some dead or now obsolete code.
Project: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/commit/8f5fd239
Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/tree/8f5fd239
Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/diff/8f5fd239
Branch: refs/heads/master
Commit: 8f5fd239f04166c0e59689a1659020b3ab460d6f
Parents: 5605fbb
Author: Hans Zeller <hz...@apache.org>
Authored: Wed Jul 12 22:46:05 2017 +0000
Committer: Hans Zeller <hz...@apache.org>
Committed: Wed Jul 12 22:46:05 2017 +0000
----------------------------------------------------------------------
core/sql/cli/Globals.cpp | 13 ++-------
core/sql/cli/Globals.h | 27 ++++++++++++------
core/sql/common/ComRtUtils.cpp | 51 +++++++++++++++-------------------
core/sql/common/ComRtUtils.h | 28 ++++---------------
core/sql/executor/ex_frag_rt.cpp | 23 +++++++++++----
core/sql/executor/ex_frag_rt.h | 1 +
core/sql/runtimestats/ssmpipc.cpp | 20 ++++++-------
7 files changed, 78 insertions(+), 85 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/8f5fd239/core/sql/cli/Globals.cpp
----------------------------------------------------------------------
diff --git a/core/sql/cli/Globals.cpp b/core/sql/cli/Globals.cpp
index 95fb000..f9678ec 100644
--- a/core/sql/cli/Globals.cpp
+++ b/core/sql/cli/Globals.cpp
@@ -187,18 +187,9 @@ void CliGlobals::init( NABoolean espProcess,
ComRtGetProcessPriority(myPriority_);
savedPriority_ = (short)myPriority_;
- myNumSegs_ = 0;
- myNumCpus_ = 0;
- SEGMENT_INFO * segs = new(&executorMemory_) SEGMENT_INFO[MAX_NO_OF_SEGMENTS];
- ComRtGetSegsInfo(segs, MAX_NO_OF_SEGMENTS, myNumSegs_,
- (NAHeap *)&executorMemory_);
- for (Lng32 i = 0; i < myNumSegs_; i++)
- {
- myNumCpus_ += segs[i].noOfCpus_;
- }
- NADELETEARRAY(segs, MAX_NO_OF_SEGMENTS, SEGMENT_INFO, &executorMemory_);
+ myNumCpus_ = ComRtGetCPUArray(cpuArray_, (NAHeap *)&executorMemory_);
- // create global structures for IPC environment
+ // create global structures for IPC environment
#if !(defined(__SSCP) || defined(__SSMP))
// check if Measure is enabled and allocate Measure process counters.
http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/8f5fd239/core/sql/cli/Globals.h
----------------------------------------------------------------------
diff --git a/core/sql/cli/Globals.h b/core/sql/cli/Globals.h
index c0dc1b7..d2ad4c7 100644
--- a/core/sql/cli/Globals.h
+++ b/core/sql/cli/Globals.h
@@ -362,11 +362,23 @@ inline
Lng32 myNodeNumber() { return myNodeNumber_; };
Int64 myStartTime() { return myStartTime_; };
- // following methods are unused in seaquest
- //LCOV_EXCL_START
- Lng32 myNumSegments() { return myNumSegs_; };
+
+ // number of CPUs (Linux nodes) in the cluster
Lng32 myNumCpus() { return myNumCpus_; };
- //LCOV_EXCL_STOP
+ // an array of myNumCpus_ node ids, the index into
+ // this array is a "logical" CPU id (0 ... myNumCpus_-1)
+ // and the content of the array is a physical id,
+ // a node id (nid) of a node that actually exists
+ // (but still may be down)
+ const Int32 *myCPUArray() { return cpuArray_; }
+ // return the physical node id for a logical id,
+ // the physical id is always >= the logical id
+ Int32 mapLogicalToPhysicalCPUNum(Int32 ix)
+ { return cpuArray_[ix % myNumCpus_]; }
+ // are logical/physical node ids the same?
+ NABoolean nodeIdsAreContiguous()
+ { return cpuArray_[myNumCpus_-1] == myNumCpus_ - 1; }
+
IpcPriority myPriority() { return myPriority_; }
void setMyPriority(IpcPriority p) { myPriority_= p; }
//LCOV_EXCL_START
@@ -562,11 +574,10 @@ private:
pid_t myPin_;
Lng32 myNodeNumber_;
- // number of segments on this system
- Lng32 myNumSegs_;
-
- // number of configured cpus on this system
+ // number of configured cpus (Linux nodes) on this system
+ // and an array with their node ids (nids)
Lng32 myNumCpus_;
+ Int32 *cpuArray_;
IpcPriority myPriority_;
NABoolean priorityChanged_;
http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/8f5fd239/core/sql/common/ComRtUtils.cpp
----------------------------------------------------------------------
diff --git a/core/sql/common/ComRtUtils.cpp b/core/sql/common/ComRtUtils.cpp
index 387c609..e82c23c 100644
--- a/core/sql/common/ComRtUtils.cpp
+++ b/core/sql/common/ComRtUtils.cpp
@@ -812,22 +812,15 @@ char * ComRtGetIsoMappingName()
return (char*)CharInfo::getCharSetName((CharInfo::CharSet)ime);
}
-
-#ifdef _DEBUG
-#endif
-
-NABoolean ComRtIsNeoSystem(void)
-{
- return TRUE;
-}
-
-Lng32 ComRtGetSegsInfo(SEGMENT_INFO *segs, Lng32 maxNoSegs, Lng32 &noOfSegs,
- NAHeap *heap)
+Int32 ComRtGetCPUArray(Int32 *&cpuArray, NAHeap *heap)
{
Int32 nodeCount = 0;
+ Int32 configuredNodeCount=0;
Int32 nodeMax = 0;
MS_Mon_Node_Info_Entry_Type *nodeInfo = NULL;
+ cpuArray = NULL;
+
// Get the number of nodes to know how much info space to allocate
Int32 error = msg_mon_get_node_info(&nodeCount, 0, NULL);
if (error != 0)
@@ -836,36 +829,38 @@ Lng32 ComRtGetSegsInfo(SEGMENT_INFO *segs, Lng32 maxNoSegs, Lng32 &noOfSegs,
return 0;
// Allocate the space for node info entries
- nodeInfo = new (heap) MS_Mon_Node_Info_Entry_Type[nodeCount];
+ nodeInfo = new(heap) MS_Mon_Node_Info_Entry_Type[nodeCount];
+ cpuArray = new(heap) Int32[nodeCount];
- if (!nodeInfo)
+ if (!nodeInfo || !cpuArray)
return 0;
// Get the node info
- memset(nodeInfo, 0, sizeof(nodeInfo));
+ memset(nodeInfo, 0, sizeof(MS_Mon_Node_Info_Entry_Type) * nodeCount);
nodeMax = nodeCount;
error = msg_mon_get_node_info(&nodeCount, nodeMax, nodeInfo);
if (error != 0)
{
NADELETEBASIC(nodeInfo, heap);
+ NADELETEBASIC(cpuArray, heap);
+ cpuArray = NULL;
return 0;
}
- Int32 i;
- Int32 j=0;
- for (i = 0 ; i < nodeCount && j < maxNoSegs ; i++)
+
+ if (nodeCount > nodeMax)
+ // very unlikely, could happen if a node just got added
+ nodeCount = nodeMax;
+
+ for (Int32 i = 0; i < nodeCount; i++)
{
if (!nodeInfo[i].spare_node)
- {
- segs[j].segName_[0] = '\0';
- segs[j].segNo_ = nodeInfo[i].nid;
- segs[j].noOfCpus_ = 1;
- segs[j].cpuStatus_ = 0x8000;
- segs[j].nodeDown_ = FALSE;
- j++;
- }
+ {
+ cpuArray[configuredNodeCount] = nodeInfo[i].nid;
+ configuredNodeCount++;
+ }
}
- noOfSegs = j;
- NADELETEARRAY(nodeInfo, nodeCount, MS_Mon_Node_Info_Entry_Type, heap);
- return i;
+
+ NADELETEBASIC(nodeInfo, heap);
+ return configuredNodeCount;
}
NABoolean ComRtGetCpuStatus(char *nodeName, short cpuNum)
http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/8f5fd239/core/sql/common/ComRtUtils.h
----------------------------------------------------------------------
diff --git a/core/sql/common/ComRtUtils.h b/core/sql/common/ComRtUtils.h
index e22a8ab..165b6b4 100644
--- a/core/sql/common/ComRtUtils.h
+++ b/core/sql/common/ComRtUtils.h
@@ -53,7 +53,6 @@ using namespace std;
#endif // NA_64BIT
#define MAX_SEGMENT_NAME_LEN 255
-#define MAX_NO_OF_SEGMENTS 256
#define PROCESSNAME_STRING_LEN 40
#define PROGRAM_NAME_LEN 64
#define BDR_CLUSTER_NAME_LEN 24
@@ -266,28 +265,13 @@ Lng32 extract_SMDLocation(
Lng32 validate_SMDLocation(
char *SMDLocation); /* in */
-#ifdef _DEBUG
-// -----------------------------------------------------------------------
-// Print an NT memory map (debug only)
-// -----------------------------------------------------------------------
-#if (MSC_VER >= 1300)
-void ComRtDisplayVirtualMemoryMap(std::ostream* outstream);
-#else
-void ComRtDisplayVirtualMemoryMap(ostream* outstream);
-#endif
-#endif
-NABoolean ComRtIsNeoSystem(void);
+// allocate and populate an array with entries for all the configured
+// CPUs (Trafodion node ids) and return the number of CPUs. Usually,
+// the array will contain node ids 0 ... n-1, but sometimes there may
+// be holes in the assigned node ids, when CPUs (Linux nodes) get
+// removed from the cluster.
+Int32 ComRtGetCPUArray(Int32 *&cpuArray, NAHeap *heap);
-typedef struct SEGMENT_INFO
-{
- char segName_[MAX_SEGMENT_NAME_LEN+1];
- Lng32 segNo_;
- Lng32 noOfCpus_;
- Lng32 cpuStatus_;
- NABoolean nodeDown_;
-} SEGMENT_INFO;
-Lng32 ComRtGetSegsInfo(SEGMENT_INFO *segs, Lng32 maxNoSegs, Lng32 &noOfSegs,
- NAHeap *heap);
NABoolean ComRtGetCpuStatus(char *nodeName, short cpuNum);
Lng32 ComRtTransIdToText(Int64 transId, char *buf, short len);
http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/8f5fd239/core/sql/executor/ex_frag_rt.cpp
----------------------------------------------------------------------
diff --git a/core/sql/executor/ex_frag_rt.cpp b/core/sql/executor/ex_frag_rt.cpp
index 9ea5cf9..f468fd6 100644
--- a/core/sql/executor/ex_frag_rt.cpp
+++ b/core/sql/executor/ex_frag_rt.cpp
@@ -2860,13 +2860,8 @@ ExEspDbEntry *ExEspManager::shareEsp(
if (*creatingEsp == NULL) // Nowaited Creation of an ESP is not in progress
{
nowaitDepth = env_->getCCMaxWaitDepthLow();
- IpcCpuNum nextCpu =
- (lastAssignedCpu_ == (maxCpuNum_ -1)) ? 0 : lastAssignedCpu_ + 1;
-
if ( cpuNum == IPC_CPU_DONT_CARE )
- // nextCpu now points to the CPU we should use
- cpuNum = lastAssignedCpu_ = nextCpu;
-
+ cpuNum = getRoundRobinCPU();
// look up the cache for esp to share
NABoolean espServerError = FALSE;
@@ -3253,6 +3248,22 @@ ExEspDbEntry *ExEspManager::getEspFromCache(LIST(ExEspDbEntry *) &alreadyAssigne
return result;
}
+IpcCpuNum ExEspManager::getRoundRobinCPU()
+{
+ IpcCpuNum logCPUNum;
+ CliGlobals *cliGlobals = GetCliGlobals();
+
+ // lastAssignedCpu_ contains a "logical" CPU number (contiguous
+ // values 0 ... n-1). We increment this number by 1 (mod n) for
+ // every call.
+ logCPUNum =
+ lastAssignedCpu_ = (lastAssignedCpu_ + 1) % cliGlobals->myNumCpus();
+
+ // now map this logical number to an actual node id, in case we have
+ // "holes" in the node ids (nodes were removed dynamically)
+ return cliGlobals->mapLogicalToPhysicalCPUNum(logCPUNum);
+}
+
void ExEspManager::releaseEsp(ExEspDbEntry *esp, NABoolean verifyEsp,
NABoolean prevState)
{
http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/8f5fd239/core/sql/executor/ex_frag_rt.h
----------------------------------------------------------------------
diff --git a/core/sql/executor/ex_frag_rt.h b/core/sql/executor/ex_frag_rt.h
index 362dcc4..9dc6ee5 100644
--- a/core/sql/executor/ex_frag_rt.h
+++ b/core/sql/executor/ex_frag_rt.h
@@ -565,6 +565,7 @@ class ExEspManager
Int32 nowaitDepth,
NABoolean &espServerError,
NABoolean soloFragment);
+ IpcCpuNum getRoundRobinCPU();
// ESP state tracing >>
enum EspStateEnum
http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/8f5fd239/core/sql/runtimestats/ssmpipc.cpp
----------------------------------------------------------------------
diff --git a/core/sql/runtimestats/ssmpipc.cpp b/core/sql/runtimestats/ssmpipc.cpp
index 1d7b0e5..6c24dbe 100755
--- a/core/sql/runtimestats/ssmpipc.cpp
+++ b/core/sql/runtimestats/ssmpipc.cpp
@@ -288,23 +288,23 @@ SsmpGlobals::~SsmpGlobals()
ULng32 SsmpGlobals::allocateServers()
{
- SEGMENT_INFO segInfo[MAX_NO_OF_SEGMENTS];
- Lng32 noOfSegs = 0;
- Int32 i, j;
-
// Attempt connect to all SSCPs
if (sscpServerClass_ == NULL)
{
- noOfSegs = ComRtGetSegsInfo(segInfo, MAX_NO_OF_SEGMENTS, noOfSegs,
- heap_);
- if (noOfSegs == 0)
+ Int32 noOfNodes;
+ Int32 *cpuArray = NULL;
+
+ noOfNodes = ComRtGetCPUArray(cpuArray, heap_);
+
+ if (noOfNodes == 0)
return 0;
- statsGlobals_->setNodesInCluster(noOfSegs);
+ statsGlobals_->setNodesInCluster(noOfNodes);
sscpServerClass_ = new(heap_) IpcServerClass(ipcEnv_, IPC_SQLSSCP_SERVER, IPC_USE_PROCESS);
- for (i = 0 ; i < noOfSegs ; i++)
+ for (Int32 i = 0 ; i < noOfNodes ; i++)
{
- allocateServer(NULL, 0, segInfo[i].segNo_);
+ allocateServer(NULL, 0, cpuArray[i]);
}
+ NADELETEBASIC(cpuArray, heap_);
}
else
{