You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@trafodion.apache.org by su...@apache.org on 2017/09/19 02:56:24 UTC
[1/2] incubator-trafodion git commit: [TRAFODION-2746] Fixed various
problem detected in large clusters (> 30)
Repository: incubator-trafodion
Updated Branches:
refs/heads/master aeb9ef223 -> 8e2ba64dd
[TRAFODION-2746] Fixed various problem detected in large clusters (> 30)
Project: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/commit/19555630
Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/tree/19555630
Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/diff/19555630
Branch: refs/heads/master
Commit: 19555630d5c0d63e8a8ea1e02f92545da983cb35
Parents: cd54195
Author: Zalo Correa <za...@esgyn.com>
Authored: Fri Sep 15 17:02:48 2017 -0700
Committer: Zalo Correa <za...@esgyn.com>
Committed: Fri Sep 15 17:02:48 2017 -0700
----------------------------------------------------------------------
core/sqf/monitor/linux/cluster.cxx | 10 ++++------
core/sqf/monitor/linux/redirector.cxx | 22 +++++++++++++++++-----
core/sqf/monitor/linux/reqqueue.cxx | 16 +++++++++-------
3 files changed, 30 insertions(+), 18 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/19555630/core/sqf/monitor/linux/cluster.cxx
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/cluster.cxx b/core/sqf/monitor/linux/cluster.cxx
index 585ff0a..c18c969 100644
--- a/core/sqf/monitor/linux/cluster.cxx
+++ b/core/sqf/monitor/linux/cluster.cxx
@@ -346,18 +346,16 @@ void CCluster::NodeReady( CNode *spareNode )
lnode->Up();
}
- ResetIntegratingPNid();
spareNode->SetActivatingSpare( false );
-
if ( MyNode->IsCreator() )
{
MyNode->SetCreator( false, -1, -1 );
}
+ ResetIntegratingPNid();
TRACE_EXIT;
}
-
// Assigns a new TMLeader if given pnid is same as TmLeaderNid
// TmLeader is a logical node num.
// pnid has gone down, so if that node was previously the TM leader, a new one needs to be chosen.
@@ -804,11 +802,11 @@ void CCluster::HardNodeDown (int pnid, bool communicate_state)
{
if ( node->GetPNid() == integratingPNid_ )
{
- ResetIntegratingPNid();
if ( MyNode->IsCreator() )
{
MyNode->SetCreator( false, -1, -1 );
}
+ ResetIntegratingPNid();
}
node->KillAllDown();
node->SetState( State_Down );
@@ -1425,11 +1423,11 @@ int CCluster::HardNodeUp( int pnid, char *node_name )
}
}
- ResetIntegratingPNid();
if ( MyNode->IsCreator() )
{
MyNode->SetCreator( false, -1, -1 );
}
+ ResetIntegratingPNid();
if (trace_settings & (TRACE_INIT | TRACE_RECOVERY))
trace_printf( "%s@%d" " - New monitor %s, pnid=%d, state=%s, spare=%d\n"
@@ -7541,7 +7539,7 @@ int CCluster::ReceiveSock(char *buf, int size, int sockFd)
}
else
{
- sizeCount -= received;
+ sizeCount -= readCount;
readAgain = true;
}
}
http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/19555630/core/sqf/monitor/linux/redirector.cxx
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/redirector.cxx b/core/sqf/monitor/linux/redirector.cxx
index fc0bc7b..b3780cc 100644
--- a/core/sqf/monitor/linux/redirector.cxx
+++ b/core/sqf/monitor/linux/redirector.cxx
@@ -1271,11 +1271,23 @@ void CRedirectStderr::handleOutput(ssize_t count, char *buffer)
if ( buf )
{
memset(buf, 0, buf_size);
- ssize_t size = snprintf(buf,
- (buf_size<MON_EVENT_BUF_SIZE)?buf_size:MON_EVENT_BUF_SIZE,
- "STDERR redirected from %s.%s.%d.%d: %s",
- nodeName(), processName(), nid(), pid(), buffer );
- if ( size > 0 && buf[size-1] != '\n') buf[size-1] = '\n';
+ // Copy up to MON_EVENT_BUF_SIZE
+ ssize_t size = snprintf( buf
+ , (buf_size<MON_EVENT_BUF_SIZE)?buf_size:MON_EVENT_BUF_SIZE
+ , "STDERR redirected from %s.%s.%d.%d: %s"
+ , nodeName(), processName(), nid(), pid(), buffer );
+ if ( size > 0 )
+ {
+ if (size >= MON_EVENT_BUF_SIZE )
+ { // truncated
+ buf[MON_EVENT_BUF_SIZE-2] = '\n';
+ buf[MON_EVENT_BUF_SIZE-1] = 0;
+ }
+ else if ( buf[size-1] != '\n')
+ {
+ buf[size-1] = '\n';
+ }
+ }
mon_log_write(MON_REDIR_STDERR, SQ_LOG_INFO, buf);
delete [] buf;
http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/19555630/core/sqf/monitor/linux/reqqueue.cxx
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/reqqueue.cxx b/core/sqf/monitor/linux/reqqueue.cxx
index f095345..764966f 100644
--- a/core/sqf/monitor/linux/reqqueue.cxx
+++ b/core/sqf/monitor/linux/reqqueue.cxx
@@ -2464,17 +2464,19 @@ void CIntSnapshotReq::performRequest()
}
// estimate size of snapshot buffer
- // about 100 bytes per process, 1.5 times total
- int procSize = Nodes->ProcessCount() * 1.75 * 100;
- int spareNodeSize = Nodes->GetSpareNodesList()->size() * sizeof(int); // pnids
+ // about 100 bytes per process, 2 times total
+ int procSize = Nodes->ProcessCount() * 2 * 100;
+ int idsSize = Nodes->GetSNodesCount() * sizeof(int); // spare pnids
+ idsSize += (Nodes->GetPNodesCount() + Nodes->GetLNodesCount()) * sizeof(int); // pnid/nid map
+ idsSize += Nodes->GetLNodesCount() * sizeof(int); // nids
if (trace_settings & (TRACE_REQUEST | TRACE_INIT | TRACE_RECOVERY))
- trace_printf("%s@%d - Snapshot sizes, procSize = %d, spareNodeSize = %d\n",
- method_name, __LINE__, procSize, spareNodeSize);
+ trace_printf("%s@%d - Snapshot sizes, procSize = %d, idsSize = %d\n",
+ method_name, __LINE__, procSize, idsSize);
- mem_log_write(MON_REQQUEUE_SNAPSHOT_4, procSize, spareNodeSize);
+ mem_log_write(MON_REQQUEUE_SNAPSHOT_4, procSize, idsSize);
- snapshotBuf = (char *) malloc (procSize + spareNodeSize);
+ snapshotBuf = (char *) malloc (procSize + idsSize);
if (!snapshotBuf)
{
[2/2] incubator-trafodion git commit: Merge [TRAFODION-2746] PR-1234
Fixed various problem detected in large clusters (> 30)
Posted by su...@apache.org.
Merge [TRAFODION-2746] PR-1234 Fixed various problem detected in large clusters (> 30)
Project: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/commit/8e2ba64d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/tree/8e2ba64d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/diff/8e2ba64d
Branch: refs/heads/master
Commit: 8e2ba64ddc1eb43ff9a30ad412bcf9024088fc13
Parents: aeb9ef2 1955563
Author: Suresh Subbiah <su...@apache.org>
Authored: Tue Sep 19 02:56:07 2017 +0000
Committer: Suresh Subbiah <su...@apache.org>
Committed: Tue Sep 19 02:56:07 2017 +0000
----------------------------------------------------------------------
core/sqf/monitor/linux/cluster.cxx | 10 ++++------
core/sqf/monitor/linux/redirector.cxx | 22 +++++++++++++++++-----
core/sqf/monitor/linux/reqqueue.cxx | 16 +++++++++-------
3 files changed, 30 insertions(+), 18 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/8e2ba64d/core/sqf/monitor/linux/cluster.cxx
----------------------------------------------------------------------