You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@trafodion.apache.org by su...@apache.org on 2017/05/31 18:47:03 UTC

[02/50] [abbrv] incubator-trafodion git commit: Fixes to node up, persist info, etc.

Fixes to node up, persist info, etc.


Project: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/commit/adbef87b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/tree/adbef87b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/diff/adbef87b

Branch: refs/heads/master
Commit: adbef87bb7326c727d13ffd9387dfdfe48384fe2
Parents: 99042bd
Author: Zalo Correa <za...@esgyn.com>
Authored: Wed Aug 10 18:07:49 2016 -0700
Committer: Zalo Correa <za...@esgyn.com>
Committed: Wed Aug 10 18:07:49 2016 -0700

----------------------------------------------------------------------
 core/sqf/monitor/linux/cluster.cxx         |   4 +-
 core/sqf/monitor/linux/commaccept.cxx      |  19 ---
 core/sqf/monitor/linux/lnode.cxx           | 151 +++++++++++++++++-
 core/sqf/monitor/linux/lnode.h             |  11 ++
 core/sqf/monitor/linux/monitor.cxx         |   9 ++
 core/sqf/monitor/linux/pnode.cxx           | 193 ++++--------------------
 core/sqf/monitor/linux/pnode.h             |  11 +-
 core/sqf/monitor/linux/reqprocinfo.cxx     |  23 +--
 core/sqf/monitor/linux/reqqueue.cxx        |   2 +-
 core/sqf/monitor/linux/shell.cxx           |  14 --
 core/sqf/sql/scripts/bats/sqconfig.monitor |  12 +-
 core/sqf/sql/scripts/bats/sqconfig.seabed  |  12 +-
 12 files changed, 223 insertions(+), 238 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/adbef87b/core/sqf/monitor/linux/cluster.cxx
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/cluster.cxx b/core/sqf/monitor/linux/cluster.cxx
index fe7b08d..5a46cf8 100644
--- a/core/sqf/monitor/linux/cluster.cxx
+++ b/core/sqf/monitor/linux/cluster.cxx
@@ -2840,7 +2840,7 @@ bool CCluster::ReinitializeConfigCluster( bool nodeAdded, int pnid )
     {
         CClusterConfig *clusterConfig = Nodes->GetClusterConfig();
         configPNodesCount_ = clusterConfig->GetPNodesCount();
-        Nodes->UpdateCluster( &indexToPnid_ );
+        Nodes->UpdateCluster();
     }
 
     if (trace_settings & (TRACE_INIT | TRACE_REQUEST))
@@ -2892,8 +2892,8 @@ void CCluster::InitializeConfigCluster( void )
         // (for virtual nodes physical node equals "rank" (previously set))
         MyPNID = clusterConfig->GetPNid( Node_name );
     }
-    Nodes->AddNodes( );
 
+    Nodes->AddNodes( );
     MyNode = Nodes->GetNode(MyPNID);
     Nodes->SetupCluster( &Node, &LNode, &indexToPnid_ );
 

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/adbef87b/core/sqf/monitor/linux/commaccept.cxx
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/commaccept.cxx b/core/sqf/monitor/linux/commaccept.cxx
index 91db931..2855cd3 100644
--- a/core/sqf/monitor/linux/commaccept.cxx
+++ b/core/sqf/monitor/linux/commaccept.cxx
@@ -191,25 +191,6 @@ bool CCommAccept::sendNodeInfoSock( int sockFd )
                     sizeof(nodeInfo[i].syncPort));
             nodeInfo[i].pnid = node->GetPNid();
             nodeInfo[i].creatorPNid = (nodeInfo[i].pnid == MyPNID) ? MyPNID : -1;
-
-            if (trace_settings & (TRACE_INIT | TRACE_RECOVERY))
-            {
-                trace_printf( "%s@%d - Preparing port info for new monitor\n"
-                            , method_name, __LINE__);
-                for (int i=0; i<pnodeCount; i++)
-                {
-                    trace_printf( "Port info for pnid=%d\n"
-                                  "        nodeInfo[%d].nodeName=%s\n"
-                                  "        nodeInfo[%d].commPort=%s\n"
-                                  "        nodeInfo[%d].syncPort=%s\n"
-                                  "        nodeInfo[%d].creatorPNid=%d\n"
-                                , nodeInfo[i].pnid
-                                , i, nodeInfo[i].nodeName
-                                , i, nodeInfo[i].commPort
-                                , i, nodeInfo[i].syncPort
-                                , i, nodeInfo[i].creatorPNid );
-                }
-            }
         }
         else
         {

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/adbef87b/core/sqf/monitor/linux/lnode.cxx
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/lnode.cxx b/core/sqf/monitor/linux/lnode.cxx
index 31b9682..6e81da3 100644
--- a/core/sqf/monitor/linux/lnode.cxx
+++ b/core/sqf/monitor/linux/lnode.cxx
@@ -969,8 +969,10 @@ void CLNode::Up( void )
 }
 
 CLNodeContainer::CLNodeContainer(CNode *node)
-                :LastNid(0)
+                :LNode(NULL)
+                ,LastNid(0)
                 ,lnodesCount_(0)
+                ,indexToNid_(NULL)
                 ,node_(node)
                 ,head_(NULL)
                 ,tail_(NULL)
@@ -1175,6 +1177,153 @@ CLNode *CLNodeContainer::GetLNode(int nid)
     return lnode;
 }
 
+CLNode *CLNodeContainer::GetLNode( char *process_name, CProcess **process,
+                                  bool checkstate, bool backupOk )
+{
+    CLNode *lnode = head_;
+    CNode *node = lnode ? lnode->GetNode() : NULL;
+    CProcess *p_process;
+    CLNode *b_lnode = NULL;
+    CProcess *b_process = NULL;
+    const char method_name[] = "CLNodeContainer::GetLNode";
+    TRACE_ENTRY;
+
+    // Initialize return value
+    *process = NULL;
+
+    while (node)
+    {
+        if ( !node->IsSpareNode() && 
+             (node->GetState() == State_Up ||
+              node->GetState() == State_Shutdown) )
+        {
+            *process = node->CProcessContainer::GetProcess(process_name, checkstate);
+            if (*process)
+            { 
+                p_process = *process;
+                if (trace_settings & (TRACE_REQUEST_DETAIL | TRACE_PROCESS_DETAIL))
+                    trace_printf("%s@%d - process %s (%d, %d), backup=%d, backupOk=%d\n",
+                                 method_name, __LINE__,
+                                 p_process->GetName(), p_process->GetNid(),
+                                 p_process->GetPid(),  p_process->IsBackup(),
+                                 backupOk);
+                if (!p_process->IsBackup())
+                {
+                    lnode = LNode[p_process->GetNid()];
+                    break;
+                }
+                else
+                {
+                    // Save backup process and lnode
+                    b_process = *process;
+                    b_lnode = LNode[b_process->GetNid()];
+                }
+            }
+        }
+        lnode = lnode->GetNext ();
+        node = lnode ? lnode->GetNode() : NULL;
+    }
+
+    if ( !*process && backupOk )
+    {
+        // We did not find the primary and it's ok to return the backup
+        *process = b_process;
+        lnode = b_lnode;
+    }
+
+    TRACE_EXIT;
+    return lnode;
+}
+
+CLNode *CLNodeContainer::GetLNodeByMap(int index )
+{
+    const char method_name[] = "CNodeContainer::GetLNodeByMap";
+    TRACE_ENTRY;
+
+    CClusterConfig *clusterConfig = Nodes->GetClusterConfig();
+    CLNode *lnode = NULL;
+    
+    if( index >= 0 && index < clusterConfig->GetLNodesCount() )
+    {
+        lnode = LNode[indexToNid_[index]];
+    }
+
+    TRACE_EXIT;
+    return lnode;
+}
+
+int CLNodeContainer::GetNidIndex( int nid )
+{
+    const char method_name[] = "CNodeContainer::GetNidIndex";
+    TRACE_ENTRY;
+
+    CClusterConfig *clusterConfig = Nodes->GetClusterConfig();
+
+    for (int i = 0; i <  clusterConfig->GetLNodesCount(); i++ )
+    {
+        if (LNode[i]->GetNid() == nid)
+        {
+            return(i);
+        }
+    }
+
+    TRACE_EXIT;
+    return(-1);
+}
+
+CLNode *CLNodeContainer::GetLNodeNext( int nid, bool checkstate )
+{
+    const char method_name[] = "CLNodeContainer::GetLNodeNext";
+    TRACE_ENTRY;
+
+    CClusterConfig *clusterConfig = Nodes->GetClusterConfig();
+    CLNode *lnode = NULL;
+
+    for (int i = (nid+1); i <  clusterConfig->GetLNodesCount(); i++ )
+    {
+        lnode = LNode[i];
+        if ( lnode )
+        {
+            if ( lnode->GetNid() > nid )
+            {
+                if (checkstate && lnode->GetState() == State_Up)
+                {
+                    break; // found it
+                }
+                else
+                {
+                    break; // found it
+                }
+            }
+        }
+    }
+    
+    if ( lnode == NULL )
+    {
+        for (int i = 0; i < clusterConfig->GetLNodesCount(); i++ )
+        {
+            lnode = LNode[i];
+            if ( lnode )
+            {
+                if ( lnode->GetNid() <= nid )
+                {
+                    if (checkstate && lnode->GetState() == State_Up)
+                    {
+                        break; // found it
+                    }
+                    else
+                    {
+                        break; // found it
+                    }
+                }
+            }
+        }
+    }
+
+    TRACE_EXIT;
+    return lnode;
+}
+
 bool CLNodeContainer::IsMyNode( int nid )
 {
     bool found = false;

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/adbef87b/core/sqf/monitor/linux/lnode.h
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/lnode.h b/core/sqf/monitor/linux/lnode.h
index 9575c2a..6b1dba7 100644
--- a/core/sqf/monitor/linux/lnode.h
+++ b/core/sqf/monitor/linux/lnode.h
@@ -153,6 +153,7 @@ private:
     int            eyecatcher_;      // Debuggging aid -- leave as first
                                      // member variable of the class
 public:
+    CLNode  **LNode;      // array of logical node objects
     int     LastNid;      // Last node selected for process startup
 
     CLNodeContainer( CNode *node );
@@ -167,7 +168,16 @@ public:
     void    CheckForPendingCreates( CProcess *process=NULL );
     inline  CLNode *GetFirstLNode( void ) { return ( head_ ); }
     inline  CLNode *GetLastLNode( void ) { return ( tail_ ); }
+
     CLNode *GetLNode( int nid );
+    CLNode *GetLNode( char *process_name, CProcess **process,
+                      bool checkstate=true, bool backupOk=false );
+    CLNode *GetLNodeByMap( int index );
+    CLNode *GetLNodeNext( int nid, bool checkstate=true );
+
+    inline  int    GetNidByMap( int index ) { return ( indexToNid_[index] ); }
+    int     GetNidIndex( int nid );
+
     inline  CNode *GetNode( void ) { return ( node_ ); }
     inline  int    GetLNodesCount( void ) { return ( lnodesCount_ ); }
     bool    IsMyNode( int nid );
@@ -181,6 +191,7 @@ protected:
     inline  void SetLNodesCount( int lnodesCount ) { lnodesCount_ = lnodesCount; }
 
     int      lnodesCount_; // # of logical nodes in this container
+    int     *indexToNid_;  // map of configuration entries to LNode[nid]
 
 private:
     CNode   *node_;        // physical node of this container or 

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/adbef87b/core/sqf/monitor/linux/monitor.cxx
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/monitor.cxx b/core/sqf/monitor/linux/monitor.cxx
index 8ba0540..34995f4 100755
--- a/core/sqf/monitor/linux/monitor.cxx
+++ b/core/sqf/monitor/linux/monitor.cxx
@@ -544,6 +544,15 @@ char * CMonitor::ProcCopy(char *bufPtr, CProcess *process)
     procObj->argc = process->argc();
     procObj->creation_time = process->GetCreationTime();
 
+
+    if (trace_settings & (TRACE_REQUEST | TRACE_INIT | TRACE_RECOVERY))
+            trace_printf( "%s@%d - Packing process %s (%d,%d:%d)\n"
+                        , method_name, __LINE__
+                        , process->GetName()
+                        , process->GetNid()
+                        , process->GetPid()
+                        , process->GetVerifier() );
+
     char * stringData = &procObj->stringData;
 
     // Copy the program name

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/adbef87b/core/sqf/monitor/linux/pnode.cxx
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/pnode.cxx b/core/sqf/monitor/linux/pnode.cxx
index 13f5d1d..5fdafc7 100644
--- a/core/sqf/monitor/linux/pnode.cxx
+++ b/core/sqf/monitor/linux/pnode.cxx
@@ -1411,9 +1411,8 @@ void CNode::StartSMServiceProcess( void )
 CNodeContainer::CNodeContainer( void )
                :CLNodeContainer(NULL)
                ,Node(NULL)
-               ,LNode(NULL)
-               ,indexToPnid_(NULL)
                ,pnodeCount_(0)
+               ,indexToPnid_(NULL)
                ,clusterConfig_(NULL)
                ,head_(NULL)
                ,tail_(NULL)
@@ -1440,9 +1439,11 @@ CNodeContainer::CNodeContainer( void )
         Node[i] = NULL;
         indexToPnid_[i] = -1;
     }
+    indexToNid_ = new int[clusterConfig_->GetLNodesConfigMax()];
     for (int i = 0; i < clusterConfig_->GetLNodesConfigMax(); i++ )
     {
         LNode[i] = NULL;
+        indexToNid_[i] = -1;
     }
 
     TRACE_EXIT;
@@ -1969,13 +1970,15 @@ void CNodeContainer::UnpackNodeMappings( intBuffPtr_t &buffer, int nodeMapCount
         pnidConfig = *buffer++;
         pnid = *buffer++;
 
-        Nodes->AddLNodes( Nodes->GetNode(pnid), Nodes->GetNode(pnidConfig) );
-
         if (trace_settings & ( TRACE_INIT || TRACE_RECOVERY || TRACE_REQUEST_DETAIL) )
             trace_printf("%s@%d - Unpacking node mapping, pnidConfig=%d, pnid=%d \n",
                         method_name, __LINE__, pnidConfig, pnid);
+
+        Nodes->AddLNodes( Nodes->GetNode(pnid), Nodes->GetNode(pnidConfig) );
     }
 
+    UpdateCluster();
+
     TRACE_EXIT;
     return;
 }
@@ -2360,134 +2363,6 @@ void CNodeContainer::DeleteNodeLNodes( CNode *node )
     TRACE_EXIT;
 }
 
-CLNode *CNodeContainer::GetLNode(int nid)
-{
-    const char method_name[] = "CNodeContainer::GetLNode";
-    TRACE_ENTRY;
-
-    CLNode *lnode = GetFirstLNode();
-    while (lnode)
-    {
-        if ( lnode->GetNid() == nid )
-        { 
-            break;
-        }
-        lnode = lnode->GetNext();
-    }
-
-    TRACE_EXIT;
-    return lnode;
-}
-
-CLNode *CNodeContainer::GetLNodeNext( int nid, bool checkstate )
-{
-    const char method_name[] = "CLNodeContainer::GetLNodeNext";
-    TRACE_ENTRY;
-
-    CLNode *lnode = NULL;
-
-    for (int i = (nid+1); i <  clusterConfig_->GetLNodesCount(); i++ )
-    {
-        lnode = LNode[i];
-        if ( lnode )
-        {
-            if ( lnode->GetNid() > nid )
-            {
-                if (checkstate && lnode->GetState() == State_Up)
-                {
-                    break; // found it
-                }
-                else
-                {
-                    break; // found it
-                }
-            }
-        }
-    }
-    
-    if ( lnode == NULL )
-    {
-        for (int i = 0; i < clusterConfig_->GetLNodesCount(); i++ )
-        {
-            lnode = LNode[i];
-            if ( lnode )
-            {
-                if ( lnode->GetNid() <= nid )
-                {
-                    if (checkstate && lnode->GetState() == State_Up)
-                    {
-                        break; // found it
-                    }
-                    else
-                    {
-                        break; // found it
-                    }
-                }
-            }
-        }
-    }
-
-    TRACE_EXIT;
-    return lnode;
-}
-
-CLNode *CNodeContainer::GetLNode( char *process_name, CProcess **process,
-                                  bool checkstate, bool backupOk )
-{
-    CLNode *lnode = NULL;
-    CNode *node = head_;
-    CProcess *p_process;
-    CLNode *b_lnode = NULL;
-    CProcess *b_process = NULL;
-    const char method_name[] = "CNodeContainer::GetLNode";
-    TRACE_ENTRY;
-
-    // Initialize return value
-    *process = NULL;
-
-    while (node)
-    {
-        if ( !node->IsSpareNode() && 
-             (node->GetState() == State_Up ||
-              node->GetState() == State_Shutdown) )
-        {
-            *process = node->CProcessContainer::GetProcess(process_name, checkstate);
-            if (*process)
-            { 
-                p_process = *process;
-                if (trace_settings & (TRACE_REQUEST_DETAIL | TRACE_PROCESS_DETAIL))
-                    trace_printf("%s@%d - process %s (%d, %d), backup=%d, backupOk=%d\n",
-                                 method_name, __LINE__,
-                                 p_process->GetName(), p_process->GetNid(),
-                                 p_process->GetPid(),  p_process->IsBackup(),
-                                 backupOk);
-                if (!p_process->IsBackup())
-                {
-                    lnode = LNode[p_process->GetNid()];
-                    break;
-                }
-                else
-                {
-                    // Save backup process and lnode
-                    b_process = *process;
-                    b_lnode = LNode[b_process->GetNid()];
-                }
-            }
-        }
-        node = node->GetNext ();
-    }
-
-    if ( !*process && backupOk )
-    {
-        // We did not find the primary and it's ok to return the backup
-        *process = b_process;
-        lnode = b_lnode;
-    }
-
-    TRACE_EXIT;
-    return lnode;
-}
-
 int CNodeContainer::GetFirstNid( void )
 {
     const char method_name[] = "CNodeContainer::GetFirstNid";
@@ -3063,7 +2938,7 @@ struct internal_msg_def *CNodeContainer::PopMsg( struct sync_buffer_def *recvBuf
     const char method_name[] = "CNodeContainer::PopMsg";
     TRACE_ENTRY;
 
-    if ( recvBuf->msgInfo.msg_count )
+    if ( recvBuf->msgInfo.msg_count > 0 )
     {
         msg = (struct internal_msg_def *)&recvBuf->msg[recvBuf->msgInfo.msg_offset];
         recvBuf->msgInfo.msg_count --;
@@ -3220,7 +3095,6 @@ void CNodeContainer::SetupCluster( CNode ***pnode_list, CLNode ***lnode_list, in
     {
         if (node)
         {
-            indexToPnid_[i] = node->GetPNid();
             if (trace_settings & TRACE_INIT)
                 trace_printf( "%s@%d - Node %s (pnid=%d, zid=%d, state=%s) is Spare=%d\n"
                             , method_name, __LINE__
@@ -3254,6 +3128,8 @@ void CNodeContainer::SetupCluster( CNode ***pnode_list, CLNode ***lnode_list, in
         }
     }
 
+    UpdateCluster();
+
     TRACE_EXIT;
 }
 
@@ -3451,53 +3327,42 @@ CLNode *CNodeContainer::NextPossibleLNode( CProcess *requester, ZoneType type, i
     return lnode;
 }
 
-void CNodeContainer::UpdateCluster( int **indexToPnid )
+void CNodeContainer::UpdateCluster( void )
 {
     const char method_name[] = "CNodeContainer::UpdateCluster";
     TRACE_ENTRY;
 
-    *indexToPnid = indexToPnid_;
-
-    if (trace_settings & (TRACE_INIT | TRACE_REQUEST))
-    {
-        for ( int i = 0; i < GetPNodesCount(); i++ )
-        {
-            trace_printf( "%s@%d - indexToPnid_[%d]=%d\n"
-                        , method_name, __LINE__, i, indexToPnid_[i]);
-        }
-    }
+    CLNode *lnode;
+    CNode  *node;
 
     for (int i = 0; i < clusterConfig_->GetPNodesConfigMax(); i++ )
     {
         indexToPnid_[i] = -1;
     }
 
+    node = GetFirstNode();
     // Refresh the index to pnid map
-    CNode *node = GetFirstNode();
     for ( int i = 0; node && i < GetPNodesCount(); i++, node = node->GetNext() )
     {
-        if (node)
-        {
-            indexToPnid_[i] = node->GetPNid();
-            if (trace_settings & (TRACE_INIT | TRACE_REQUEST))
-            {
-                trace_printf( "%s@%d - Node %s (pnid=%d, zid=%d, state=%s) is Spare=%d\n"
-                            , method_name, __LINE__
-                            , node->GetName()
-                            , node->GetPNid()
-                            , node->GetZone()
-                            , StateString(node->GetState())
-                            , node->IsSpareNode());
-            }
-        }
+        indexToPnid_[i] = node->GetPNid();
+        if (trace_settings & (TRACE_INIT | TRACE_RECOVERY | TRACE_REQUEST))
+            trace_printf( "%s@%d - indexToPnid_[%d]=%d\n"
+                        , method_name, __LINE__
+                        , i
+                        , indexToPnid_[i]);
     }
 
-    if (trace_settings & (TRACE_INIT | TRACE_REQUEST))
+    // Refresh the index to nid map
+    lnode = GetFirstLNode();
+    for ( int i = 0; lnode && i < GetLNodesCount(); i++, lnode = lnode->GetNext() )
     {
-        for ( int i = 0; i < GetPNodesCount(); i++ )
+        indexToNid_[i] = lnode->GetNid();
+        if (trace_settings & (TRACE_INIT | TRACE_RECOVERY | TRACE_REQUEST))
         {
-            trace_printf( "%s@%d - indexToPnid_[%d]=%d\n"
-                        , method_name, __LINE__, i, indexToPnid_[i]);
+            trace_printf( "%s@%d - indexToNid_[%d]=%d\n"
+                        , method_name, __LINE__
+                        , i
+                        , indexToNid_[i]);
         }
     }
 

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/adbef87b/core/sqf/monitor/linux/pnode.h
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/pnode.h b/core/sqf/monitor/linux/pnode.h
index d151a6c..8c3a5de 100644
--- a/core/sqf/monitor/linux/pnode.h
+++ b/core/sqf/monitor/linux/pnode.h
@@ -61,8 +61,6 @@ private:
 
 public:
     CNode   **Node;          // array of physical node objects
-    CLNode  **LNode;         // array of logical node objects
-    int      *indexToPnid_;  // map of configuration entries to Node[pnid]
 
 
     CNodeContainer( void );
@@ -85,10 +83,6 @@ public:
     int     GetFirstNid( void );
     int     GetNextNid( int nid );
     inline CNode *GetFirstNode( void ) { return ( head_ ); }
-    CLNode *GetLNode( int nid );
-    CLNode *GetLNode( char *process_name, CProcess **process,
-                      bool checkstate=true, bool backupOk=false );
-    CLNode *GetLNodeNext( int nid, bool checkstate=true );
     CNode  *GetNode( char *name );
     CNode  *GetNode( int pnid );
     CNode  *GetNodeByMap( int index );
@@ -148,12 +142,13 @@ public:
     void    UnpackSpareNodesList( intBuffPtr_t &buffer, int spareNodesCount );
     void    UnpackZids( intBuffPtr_t &buffer );
 
-    void    UpdateCluster( int **indexToPnid );
+    void    UpdateCluster( void );
 
 protected:
 
 private:
-    int         pnodeCount_;  // # of physical node objects in array
+    int     pnodeCount_;    // # of physical node objects in array
+    int    *indexToPnid_;   // map of configuration entries to Node[pnid]
     CClusterConfig *clusterConfig_;  // 'sqconfig.db' objects
     NodesList  spareNodesList_; // current spare physical nodes list
     NodesList  spareNodesConfigList_; // configured spare physical nodes list

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/adbef87b/core/sqf/monitor/linux/reqprocinfo.cxx
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/reqprocinfo.cxx b/core/sqf/monitor/linux/reqprocinfo.cxx
index 9bbabad..f7eeefe 100644
--- a/core/sqf/monitor/linux/reqprocinfo.cxx
+++ b/core/sqf/monitor/linux/reqprocinfo.cxx
@@ -136,13 +136,13 @@ CProcess * CExtProcInfoBase::ProcessInfo_GetProcess (int &nid, bool getDataForAl
                 }
             }
             lnode = lnode->GetNext();
+            nid = lnode ? lnode->GetNid() : nid;
         }
     } while (getDataForAllNodes && lnode);
 
     return(NULL);
 }
 
-
 // Information for more than one process is being requested.  Iterate
 // through the process list and return process information for processes
 // meeting the requested criteria.
@@ -152,7 +152,9 @@ int CExtProcInfoBase::ProcessInfo_BuildReply(CProcess *process,
                                      bool getDataForAllNodes,
                                      char *pattern)
 {
-    int currentNode = (process != 0) ? process->GetNid() : Nodes->GetLNodesConfigMax();
+    int currentIndex = (process != 0) 
+            ? Nodes->GetNidIndex( process->GetNid() )
+            : Nodes->GetLNodesCount();
     bool moreToRetrieve;
     bool copy = true;
     bool reg = false;
@@ -202,22 +204,25 @@ int CExtProcInfoBase::ProcessInfo_BuildReply(CProcess *process,
                 // of whether there is more data remaining.
                 msg->u.reply.u.process_info.more_data
                     = (process != 0)
-                    || (++currentNode < Nodes->GetLNodesConfigMax());
+                    || (++currentIndex < Nodes->GetLNodesCount());
                 return count;
             }
         }
 
         moreToRetrieve = false;
-        if (getDataForAllNodes && ++currentNode < Nodes->GetLNodesConfigMax())
+        if (getDataForAllNodes && ++currentIndex < Nodes->GetLNodesCount())
         {   // Start retrieving process data for next node.  We ask
-            // ProcessInfo_GetProcess for the first process on
-            // "currentNode" which has just been incremented.  Note
+            // ProcessInfo_GetProcess for the first process on lnode of
+            // "currentIndex" which has just been incremented.  Note
             // that it is possible there are no processes on that node
             // so ProcessInfo_GetProcess will return a process on the
-            // first node it finds and "currentNode" will be updated
-            // to be the node number where the process resides.
+            // first node it finds and "currentIndex" will be updated
+            // to be the node index number where the process resides.
 
-            process = ProcessInfo_GetProcess(currentNode, getDataForAllNodes);
+            int nid = Nodes->GetNidByMap( currentIndex );
+            if (nid == -1) break;
+            process = ProcessInfo_GetProcess( nid, getDataForAllNodes);
+            currentIndex = Nodes->GetNidIndex( nid );
             moreToRetrieve = true;
         }
     } while (moreToRetrieve);

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/adbef87b/core/sqf/monitor/linux/reqqueue.cxx
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/reqqueue.cxx b/core/sqf/monitor/linux/reqqueue.cxx
index 2962f6a..196f490 100644
--- a/core/sqf/monitor/linux/reqqueue.cxx
+++ b/core/sqf/monitor/linux/reqqueue.cxx
@@ -2466,7 +2466,7 @@ void CIntSnapshotReq::performRequest()
 
     // estimate size of snapshot buffer
     // about 100 bytes per process, 1.5 times total
-    int procSize = Nodes->ProcessCount() * 1.5 * 100;
+    int procSize = Nodes->ProcessCount() * 1.75 * 100;
     int spareNodeSize = Nodes->GetSpareNodesList()->size() * sizeof(int); // pnids
 
     if (trace_settings & (TRACE_REQUEST | TRACE_INIT | TRACE_RECOVERY))

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/adbef87b/core/sqf/monitor/linux/shell.cxx
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/shell.cxx b/core/sqf/monitor/linux/shell.cxx
index 58d1a9d..bc4fc74 100644
--- a/core/sqf/monitor/linux/shell.cxx
+++ b/core/sqf/monitor/linux/shell.cxx
@@ -3620,20 +3620,6 @@ void node_info( int nid )
 
                         for (i=0; i < msg->u.reply.u.node_info.num_returned; i++)
                         {
-#if 0
-                            CPNodeConfig *pConfig = ClusterConfig.GetPNodeConfig (msg->u.reply.u.node_info.node[i].pnid);
-                            if (pConfig != NULL)
-                            {
-                                string downNodeToFind = " ";
-                                downNodeToFind += pConfig->GetName();
-                                downNodeToFind += " ";
-
-                                if  ((downNodeList != NULL) && strstr(downNodeString.c_str(),downNodeToFind.c_str()))
-                                {
-                                    continue; // We do not want to consider this node since it is in our exclude list
-                                }
-                            }
-#endif
                             if ( last_nid != -1 )
                             {
                                 if ( (msg->u.reply.u.node_info.node[i].pnid != 

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/adbef87b/core/sqf/sql/scripts/bats/sqconfig.monitor
----------------------------------------------------------------------
diff --git a/core/sqf/sql/scripts/bats/sqconfig.monitor b/core/sqf/sql/scripts/bats/sqconfig.monitor
index 52e8ada..bb9f02b 100644
--- a/core/sqf/sql/scripts/bats/sqconfig.monitor
+++ b/core/sqf/sql/scripts/bats/sqconfig.monitor
@@ -23,6 +23,8 @@ begin node
 _virtualnodes 6
 end node
 
+begin persist
+
 #
 # The folling process types are persistent and have special process management:
 #    DTM
@@ -35,8 +37,6 @@ end node
 #    PERSIST
 #
 
-begin persist
-
 PERSIST_PROCESS_KEYS = DTM,TMID,SSCP,SSMP,PSD,WDG,QMN
 
 DTM_PROCESS_NAME     = $TM%nid+
@@ -87,12 +87,4 @@ WDG_STDOUT           = stdout_WDG%nid
 WDG_PERSIST_RETRIES  = 10,60
 WDG_PERSIST_ZONES    = %zid
 
-QMN_PROCESS_NAME    = $ZQM0000
-QMN_PROCESS_TYPE    = PERSIST
-QMN_PROGRAM_NAME    = tdm_arkqmm
-QMN_REQUIRES_DTM    = Y
-QMN_STDOUT          = stdout_ZQM0000
-QMN_PERSIST_RETRIES = 10,60
-QMN_PERSIST_ZONES   = %zid+
-
 end persist

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/adbef87b/core/sqf/sql/scripts/bats/sqconfig.seabed
----------------------------------------------------------------------
diff --git a/core/sqf/sql/scripts/bats/sqconfig.seabed b/core/sqf/sql/scripts/bats/sqconfig.seabed
index e5c2748..05729e8 100644
--- a/core/sqf/sql/scripts/bats/sqconfig.seabed
+++ b/core/sqf/sql/scripts/bats/sqconfig.seabed
@@ -23,6 +23,8 @@ begin node
 _virtualnodes 4
 end node
 
+begin persist
+
 #
 # The folling process types are persistent and have special process management:
 #    DTM
@@ -35,8 +37,6 @@ end node
 #    PERSIST
 #
 
-begin persist
-
 PERSIST_PROCESS_KEYS = DTM,TMID,SSCP,SSMP,PSD,WDG,QMN
 
 DTM_PROCESS_NAME     = $TM%nid+
@@ -87,12 +87,4 @@ WDG_STDOUT           = stdout_WDG%nid
 WDG_PERSIST_RETRIES  = 10,60
 WDG_PERSIST_ZONES    = %zid
 
-QMN_PROCESS_NAME    = $ZQM0000
-QMN_PROCESS_TYPE    = PERSIST
-QMN_PROGRAM_NAME    = tdm_arkqmm
-QMN_REQUIRES_DTM    = Y
-QMN_STDOUT          = stdout_ZQM0000
-QMN_PERSIST_RETRIES = 10,60
-QMN_PERSIST_ZONES   = %zid+
-
 end persist