You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@trafodion.apache.org by su...@apache.org on 2017/09/19 02:56:24 UTC

[1/2] incubator-trafodion git commit: [TRAFODION-2746] Fixed various problem detected in large clusters (> 30)

Repository: incubator-trafodion
Updated Branches:
  refs/heads/master aeb9ef223 -> 8e2ba64dd


[TRAFODION-2746] Fixed various problem detected in large clusters (> 30)


Project: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/commit/19555630
Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/tree/19555630
Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/diff/19555630

Branch: refs/heads/master
Commit: 19555630d5c0d63e8a8ea1e02f92545da983cb35
Parents: cd54195
Author: Zalo Correa <za...@esgyn.com>
Authored: Fri Sep 15 17:02:48 2017 -0700
Committer: Zalo Correa <za...@esgyn.com>
Committed: Fri Sep 15 17:02:48 2017 -0700

----------------------------------------------------------------------
 core/sqf/monitor/linux/cluster.cxx    | 10 ++++------
 core/sqf/monitor/linux/redirector.cxx | 22 +++++++++++++++++-----
 core/sqf/monitor/linux/reqqueue.cxx   | 16 +++++++++-------
 3 files changed, 30 insertions(+), 18 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/19555630/core/sqf/monitor/linux/cluster.cxx
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/cluster.cxx b/core/sqf/monitor/linux/cluster.cxx
index 585ff0a..c18c969 100644
--- a/core/sqf/monitor/linux/cluster.cxx
+++ b/core/sqf/monitor/linux/cluster.cxx
@@ -346,18 +346,16 @@ void CCluster::NodeReady( CNode *spareNode )
         lnode->Up();
     }
 
-    ResetIntegratingPNid();
     spareNode->SetActivatingSpare( false );
-
     if ( MyNode->IsCreator() )
     {
         MyNode->SetCreator( false, -1, -1 );
     }
+    ResetIntegratingPNid();
 
     TRACE_EXIT;
 }
 
-
 // Assigns a new TMLeader if given pnid is same as TmLeaderNid 
 // TmLeader is a logical node num. 
 // pnid has gone down, so if that node was previously the TM leader, a new one needs to be chosen.
@@ -804,11 +802,11 @@ void CCluster::HardNodeDown (int pnid, bool communicate_state)
         {
             if ( node->GetPNid() == integratingPNid_ )
             {
-                ResetIntegratingPNid();
                 if ( MyNode->IsCreator() )
                 {
                     MyNode->SetCreator( false, -1, -1 );
                 }
+                ResetIntegratingPNid();
             }
             node->KillAllDown();
             node->SetState( State_Down ); 
@@ -1425,11 +1423,11 @@ int CCluster::HardNodeUp( int pnid, char *node_name )
                 }
             }
 
-            ResetIntegratingPNid();
             if ( MyNode->IsCreator() )
             {
                 MyNode->SetCreator( false, -1, -1 );
             }
+            ResetIntegratingPNid();
 
             if (trace_settings & (TRACE_INIT | TRACE_RECOVERY))
                trace_printf( "%s@%d" " - New monitor %s, pnid=%d, state=%s, spare=%d\n"
@@ -7541,7 +7539,7 @@ int CCluster::ReceiveSock(char *buf, int size, int sockFd)
             }
             else
             {
-                sizeCount -= received;
+                sizeCount -= readCount;
                 readAgain = true;
             }
         }

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/19555630/core/sqf/monitor/linux/redirector.cxx
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/redirector.cxx b/core/sqf/monitor/linux/redirector.cxx
index fc0bc7b..b3780cc 100644
--- a/core/sqf/monitor/linux/redirector.cxx
+++ b/core/sqf/monitor/linux/redirector.cxx
@@ -1271,11 +1271,23 @@ void CRedirectStderr::handleOutput(ssize_t count, char *buffer)
     if ( buf )
     {
         memset(buf, 0, buf_size);
-        ssize_t size = snprintf(buf, 
-                                (buf_size<MON_EVENT_BUF_SIZE)?buf_size:MON_EVENT_BUF_SIZE, 
-                                "STDERR redirected from %s.%s.%d.%d: %s",
-                                nodeName(), processName(), nid(), pid(), buffer );
-        if ( size > 0 && buf[size-1] != '\n') buf[size-1] = '\n';
+        // Copy up to MON_EVENT_BUF_SIZE
+        ssize_t size = snprintf( buf
+                               , (buf_size<MON_EVENT_BUF_SIZE)?buf_size:MON_EVENT_BUF_SIZE
+                               , "STDERR redirected from %s.%s.%d.%d: %s"
+                               ,  nodeName(), processName(), nid(), pid(), buffer );
+        if ( size > 0 )
+        {
+            if (size >= MON_EVENT_BUF_SIZE )
+            { // truncated
+                buf[MON_EVENT_BUF_SIZE-2] = '\n';
+                buf[MON_EVENT_BUF_SIZE-1] = 0;
+            }
+            else if ( buf[size-1] != '\n')
+            {
+                buf[size-1] = '\n';
+            }
+        }
         mon_log_write(MON_REDIR_STDERR, SQ_LOG_INFO, buf);
 
         delete [] buf;

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/19555630/core/sqf/monitor/linux/reqqueue.cxx
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/reqqueue.cxx b/core/sqf/monitor/linux/reqqueue.cxx
index f095345..764966f 100644
--- a/core/sqf/monitor/linux/reqqueue.cxx
+++ b/core/sqf/monitor/linux/reqqueue.cxx
@@ -2464,17 +2464,19 @@ void CIntSnapshotReq::performRequest()
     }
 
     // estimate size of snapshot buffer
-    // about 100 bytes per process, 1.5 times total
-    int procSize = Nodes->ProcessCount() * 1.75 * 100;
-    int spareNodeSize = Nodes->GetSpareNodesList()->size() * sizeof(int); // pnids
+    // about 100 bytes per process, 2 times total
+    int procSize = Nodes->ProcessCount() * 2 * 100;
+    int idsSize = Nodes->GetSNodesCount() * sizeof(int); // spare pnids
+    idsSize += (Nodes->GetPNodesCount() + Nodes->GetLNodesCount()) * sizeof(int); // pnid/nid map
+    idsSize += Nodes->GetLNodesCount() * sizeof(int);    // nids
 
     if (trace_settings & (TRACE_REQUEST | TRACE_INIT | TRACE_RECOVERY))
-        trace_printf("%s@%d - Snapshot sizes, procSize = %d, spareNodeSize = %d\n",
-                      method_name, __LINE__, procSize, spareNodeSize);
+        trace_printf("%s@%d - Snapshot sizes, procSize = %d, idsSize = %d\n",
+                      method_name, __LINE__, procSize, idsSize);
 
-    mem_log_write(MON_REQQUEUE_SNAPSHOT_4, procSize, spareNodeSize);
+    mem_log_write(MON_REQQUEUE_SNAPSHOT_4, procSize, idsSize);
 
-    snapshotBuf = (char *) malloc (procSize + spareNodeSize); 
+    snapshotBuf = (char *) malloc (procSize + idsSize); 
 
     if (!snapshotBuf) 
     {


[2/2] incubator-trafodion git commit: Merge [TRAFODION-2746] PR-1234 Fixed various problem detected in large clusters (> 30)

Posted by su...@apache.org.
Merge [TRAFODION-2746] PR-1234 Fixed various problem detected in large clusters (> 30)


Project: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/commit/8e2ba64d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/tree/8e2ba64d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/diff/8e2ba64d

Branch: refs/heads/master
Commit: 8e2ba64ddc1eb43ff9a30ad412bcf9024088fc13
Parents: aeb9ef2 1955563
Author: Suresh Subbiah <su...@apache.org>
Authored: Tue Sep 19 02:56:07 2017 +0000
Committer: Suresh Subbiah <su...@apache.org>
Committed: Tue Sep 19 02:56:07 2017 +0000

----------------------------------------------------------------------
 core/sqf/monitor/linux/cluster.cxx    | 10 ++++------
 core/sqf/monitor/linux/redirector.cxx | 22 +++++++++++++++++-----
 core/sqf/monitor/linux/reqqueue.cxx   | 16 +++++++++-------
 3 files changed, 30 insertions(+), 18 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/8e2ba64d/core/sqf/monitor/linux/cluster.cxx
----------------------------------------------------------------------