You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@trafodion.apache.org by su...@apache.org on 2016/10/26 17:13:50 UTC
[1/2] incubator-trafodion git commit: [TRAFODION-2310] Changed soft
down node processing to propagate node state change to remote monitor prior
to killing processes.
Repository: incubator-trafodion
Updated Branches:
refs/heads/master 8d8adf141 -> db66dc292
[TRAFODION-2310] Changed soft down node processing to propagate node state change
to remote monitor prior to killing processes.
Project: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/commit/1ad3983f
Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/tree/1ad3983f
Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/diff/1ad3983f
Branch: refs/heads/master
Commit: 1ad3983f7de988554a55281072a5ae28baa58fa9
Parents: de82dfb
Author: Zalo Correa <za...@esgyn.com>
Authored: Tue Oct 25 15:04:40 2016 -0700
Committer: Zalo Correa <za...@esgyn.com>
Committed: Tue Oct 25 15:04:40 2016 -0700
----------------------------------------------------------------------
core/sqf/monitor/linux/cluster.cxx | 104 +++++++++++++++++++++++---------
core/sqf/monitor/linux/internal.h | 3 +-
core/sqf/monitor/linux/monitor.cxx | 1 +
core/sqf/monitor/linux/pnode.cxx | 2 -
core/sqf/monitor/linux/pnode.h | 7 +--
core/sqf/monitor/linux/zclient.cxx | 41 +++++++++++++
core/sqf/sqenvcom.sh | 6 +-
7 files changed, 126 insertions(+), 38 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/1ad3983f/core/sqf/monitor/linux/cluster.cxx
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/cluster.cxx b/core/sqf/monitor/linux/cluster.cxx
index 104f7d8..a986b67 100644
--- a/core/sqf/monitor/linux/cluster.cxx
+++ b/core/sqf/monitor/linux/cluster.cxx
@@ -410,6 +410,18 @@ void CCluster::AssignTmLeader(int pnid)
if (TmLeaderPNid != pnid)
{
+ node = LNode[TmLeaderNid]->GetNode();
+
+ if (trace_settings & (TRACE_INIT | TRACE_RECOVERY | TRACE_REQUEST | TRACE_SYNC | TRACE_TMSYNC))
+ {
+ trace_printf( "%s@%d - Node pnid=%d (%s), phase=%s, isSoftNodeDown=%d\n"
+ , method_name, __LINE__
+ , node->GetPNid()
+ , node->GetName()
+ , NodePhaseString(node->GetPhase())
+ , node->IsSoftNodeDown());
+ }
+
return;
}
@@ -436,6 +448,16 @@ void CCluster::AssignTmLeader(int pnid)
node = Node[TmLeaderPNid];
+ if (trace_settings & (TRACE_INIT | TRACE_RECOVERY | TRACE_REQUEST | TRACE_SYNC | TRACE_TMSYNC))
+ {
+ trace_printf( "%s@%d - Node pnid=%d (%s), phase=%s, isSoftNodeDown=%d\n"
+ , method_name, __LINE__
+ , node->GetPNid()
+ , node->GetName()
+ , NodePhaseString(node->GetPhase())
+ , node->IsSoftNodeDown());
+ }
+
if ( node->IsSpareNode() ||
node->IsSoftNodeDown() ||
node->GetState() != State_Up ||
@@ -938,14 +960,21 @@ void CCluster::SoftNodeDown( int pnid )
node = Nodes->GetNode(pnid);
if (trace_settings & (TRACE_REQUEST | TRACE_INIT | TRACE_RECOVERY))
- trace_printf( "%s@%d - pnid=%d, state=%s, isInQuiesceState=%d,"
- " (local pnid=%d, state=%s, isInQuiesceState=%d, "
- "shutdown level=%d)\n"
- , method_name, __LINE__
- , pnid, StateString(node->GetState())
- , node->isInQuiesceState()
- , MyPNID, StateString(MyNode->GetState())
- , MyNode->isInQuiesceState(), MyNode->GetShutdownLevel() );
+ {
+ trace_printf( "%s@%d - pnid=%d, state=%s, phase=%s, isInQuiesceState=%d, isSoftNodeDown=%d"
+ " (local pnid=%d, state=%s, phase=%s, isInQuiesceState=%d, isSoftNodeDown=%d "
+ "shutdown level=%d)\n"
+ , method_name, __LINE__
+ , pnid, StateString(node->GetState())
+ , NodePhaseString(node->GetPhase())
+ , node->isInQuiesceState()
+ , node->IsSoftNodeDown()
+ , MyPNID, StateString(MyNode->GetState())
+ , NodePhaseString(MyNode->GetPhase())
+ , MyNode->isInQuiesceState()
+ , MyNode->IsSoftNodeDown()
+ , MyNode->GetShutdownLevel() );
+ }
if (( MyPNID == pnid ) &&
( MyNode->GetState() == State_Down ||
@@ -966,12 +995,6 @@ void CCluster::SoftNodeDown( int pnid )
{
node->SetSoftNodeDown(); // Set soft down flag
node->SetPhase( Phase_SoftDown ); // Suspend TMSync on node
- node->KillAllDownSoft(); // Kill all processes
-
- snprintf( buf, sizeof(buf)
- , "[%s], Node %s (%d) executed soft down.\n"
- , method_name, node->GetName(), node->GetPNid() );
- mon_log_write(MON_CLUSTER_SOFTNODEDOWN_2, SQ_LOG_ERR, buf);
if ( node->GetPNid() == MyPNID )
{
@@ -979,6 +1002,13 @@ void CCluster::SoftNodeDown( int pnid )
CReplSoftNodeDown *repl = new CReplSoftNodeDown( MyPNID );
Replicator.addItem(repl);
}
+
+ node->KillAllDownSoft(); // Kill all processes
+
+ snprintf( buf, sizeof(buf)
+ , "[%s], Node %s (%d) executed soft down.\n"
+ , method_name, node->GetName(), node->GetPNid() );
+ mon_log_write(MON_CLUSTER_SOFTNODEDOWN_2, SQ_LOG_ERR, buf);
}
else
{
@@ -1003,6 +1033,16 @@ void CCluster::SoftNodeDown( int pnid )
trace_printf("%s@%d - Node %s (pnid=%d) TmSyncState updated (%d)(%s)\n", method_name, __LINE__, MyNode->GetName(), MyPNID, MyNode->GetTmSyncState(), SyncStateString( MyNode->GetTmSyncState() ));
}
+ if (trace_settings & (TRACE_INIT | TRACE_RECOVERY | TRACE_REQUEST | TRACE_SYNC | TRACE_TMSYNC))
+ {
+ trace_printf( "%s@%d - Node pnid=%d (%s), phase=%s, isSoftNodeDown=%d\n"
+ , method_name, __LINE__
+ , node->GetPNid()
+ , node->GetName()
+ , NodePhaseString(node->GetPhase())
+ , node->IsSoftNodeDown());
+ }
+
IAmIntegrated = false;
AssignTmLeader(pnid);
@@ -4321,25 +4361,32 @@ int CCluster::AllgatherSock( int nbytes, void *sbuf, char *rbuf, int tag, MPI_St
{
// convert to milliseconds
sv_epoll_wait_timeout = atoi( lv_epoll_wait_timeout_env ) * 1000;
+ char *lv_epoll_retry_count_env = getenv( "SQ_MON_EPOLL_RETRY_COUNT" );
+ if ( lv_epoll_retry_count_env )
+ {
+ sv_epoll_retry_count = atoi( lv_epoll_retry_count_env );
+ }
+ if ( sv_epoll_retry_count > 180 )
+ {
+ sv_epoll_retry_count = 180;
+ }
}
else
{
- sv_epoll_wait_timeout = -1;
+ // default to 60 seconds
+ sv_epoll_wait_timeout = 1000;
+ sv_epoll_retry_count = 60;
}
- char *lv_epoll_retry_count_env = getenv( "SQ_MON_EPOLL_RETRY_COUNT" );
- if ( lv_epoll_retry_count_env )
- {
- sv_epoll_retry_count = atoi( lv_epoll_retry_count_env );
- }
- if ( sv_epoll_retry_count < 0 )
- {
- sv_epoll_retry_count = 0;
- }
- if ( sv_epoll_retry_count > 100 )
- {
- sv_epoll_retry_count = 100;
- }
+ char buf[MON_STRING_BUF_SIZE];
+ snprintf( buf, sizeof(buf)
+ , "[%s@%d] EPOLL timeout wait_timeout=%d msecs, retry_count=%d\n"
+ , method_name
+ , __LINE__
+ , sv_epoll_wait_timeout
+ , sv_epoll_retry_count );
+
+ mon_log_write( MON_CLUSTER_ALLGATHERSOCK_1, SQ_LOG_INFO, buf );
}
// do the work
@@ -6383,6 +6430,7 @@ int CCluster::AcceptSock( int sock )
int csock; // connected socket
struct sockaddr_in sockinfo; // socket address info
+ size = sizeof(struct sockaddr *);
if ( getsockname( sock, (struct sockaddr *) &sockinfo, &size ) )
{
char buf[MON_STRING_BUF_SIZE];
http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/1ad3983f/core/sqf/monitor/linux/internal.h
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/internal.h b/core/sqf/monitor/linux/internal.h
index 5766bd4..7aeaafb 100644
--- a/core/sqf/monitor/linux/internal.h
+++ b/core/sqf/monitor/linux/internal.h
@@ -83,7 +83,8 @@ enum SyncState
typedef enum {
State_Default=0,
- State_Quiesce, // node is in quiesce state
+ State_Quiesce, // node quiesce state while going down
+ State_SoftDown, // node soft down on DTM abort -> restart
State_Ready_To_Exit
} IntNodeState;
http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/1ad3983f/core/sqf/monitor/linux/monitor.cxx
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/monitor.cxx b/core/sqf/monitor/linux/monitor.cxx
index a4b114c..8228098 100755
--- a/core/sqf/monitor/linux/monitor.cxx
+++ b/core/sqf/monitor/linux/monitor.cxx
@@ -1670,6 +1670,7 @@ int main (int argc, char *argv[])
delete [] nodename;
delete Devices;
delete Nodes;
+ delete ZClient;
delete Monitor;
Monitor = NULL; // TRACE uses this
delete Config;
http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/1ad3983f/core/sqf/monitor/linux/pnode.cxx
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/pnode.cxx b/core/sqf/monitor/linux/pnode.cxx
index dda44f2..d5c8555 100755
--- a/core/sqf/monitor/linux/pnode.cxx
+++ b/core/sqf/monitor/linux/pnode.cxx
@@ -126,7 +126,6 @@ CNode::CNode( char *name, int pnid, int rank )
,freeCache_(0)
,state_(rank == -1 ? State_Down : State_Up)
,phase_(Phase_Ready)
- ,softDown_(false)
,killingNode_(false)
,dtmAborted_(false)
,smsAborted_(false)
@@ -252,7 +251,6 @@ CNode::CNode( char *name
,freeCache_(0)
,state_(rank == -1 ? State_Down : State_Up)
,phase_(Phase_Ready)
- ,softDown_(false)
,killingNode_(false)
,dtmAborted_(false)
,smsAborted_(false)
http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/1ad3983f/core/sqf/monitor/linux/pnode.h
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/pnode.h b/core/sqf/monitor/linux/pnode.h
index 0cc6fd3..607678b 100755
--- a/core/sqf/monitor/linux/pnode.h
+++ b/core/sqf/monitor/linux/pnode.h
@@ -232,13 +232,13 @@ public:
inline bool IsKillingNode( void ) { return( killingNode_ ); }
inline bool IsRankFailure( void ) { return( rankFailure_ ); }
inline bool IsSpareNode( void ) { return( spareNode_ ); }
- inline bool IsSoftNodeDown( void ) { return( softDown_ ); }
+ inline bool IsSoftNodeDown( void ) { return( internalState_ == State_SoftDown ); }
CNode *Link( CNode *entry );
void MoveLNodes( CNode *targetNode );
inline void ResetSpareNode( void ) { spareNode_ = false; }
void ResetWatchdogTimer( void );
- inline void ResetSoftNodeDown( void ) { softDown_ = false; }
+ inline void ResetSoftNodeDown( void ) { internalState_ = State_Default; }
inline void SetActivatingSpare( int activatingSpare ) { activatingSpare_ = activatingSpare; }
void SetAffinity( int nid, pid_t pid, PROCESSTYPE type );
void SetAffinity( CProcess *process );
@@ -268,7 +268,7 @@ public:
inline void SetKillingNode( bool killingNode ) { killingNode_ = killingNode; }
inline void SetNumCores( int numCores ) { numCores_ = numCores; }
inline void SetPhase( NodePhase phase ) { phase_ = phase; }
- inline void SetSoftNodeDown( void ) { softDown_ = true; }
+ inline void SetSoftNodeDown( void ) { internalState_ = State_SoftDown; }
inline void SetSparePNids( PNidVector &sparePNids ) { sparePNids_ = sparePNids; }
inline void SetRank( int rank ) { rank_ = rank; }
inline void SetRankFailure( bool failed ) { rankFailure_ = failed;
@@ -332,7 +332,6 @@ private:
string hostname_; // physical node name without domain
STATE state_; // Physical node's current operating state
NodePhase phase_; // Physical node's current phase during spare node activation
- bool softDown_; // true when soft down node in process
bool killingNode_; // true when down node in process
bool dtmAborted_; // true when DTM process terminates abnormally
bool smsAborted_; // true when SMS process terminates abnormally
http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/1ad3983f/core/sqf/monitor/linux/zclient.cxx
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/zclient.cxx b/core/sqf/monitor/linux/zclient.cxx
index 2f766ad..dc91a28 100644
--- a/core/sqf/monitor/linux/zclient.cxx
+++ b/core/sqf/monitor/linux/zclient.cxx
@@ -391,6 +391,7 @@ CZClient::~CZClient( void )
if (ZHandle)
{
+ WatchNodeDelete( Node_name );
zookeeper_close(ZHandle);
ZHandle = 0;
}
@@ -1095,6 +1096,26 @@ int CZClient::SetZNodeWatch( string &monZnode )
, "[%s], zoo_exists() for %s failed with error %s\n"
, method_name, monZnode.c_str( ), ZooErrorStr(rc));
mon_log_write(MON_ZCLIENT_SETZNODEWATCH_1, SQ_LOG_ERR, buf);
+ switch ( rc )
+ {
+ case ZSYSTEMERROR:
+ case ZRUNTIMEINCONSISTENCY:
+ case ZDATAINCONSISTENCY:
+ case ZCONNECTIONLOSS:
+ case ZMARSHALLINGERROR:
+ case ZUNIMPLEMENTED:
+ case ZOPERATIONTIMEOUT:
+ case ZBADARGUMENTS:
+ case ZINVALIDSTATE:
+ case ZSESSIONEXPIRED:
+ case ZCLOSING:
+ // Treat these error like a session expiration, since
+ // we can't communicate with quorum servers
+ HandleZSessionExpiration();
+ break;
+ default:
+ break;
+ }
}
TRACE_EXIT;
@@ -1438,6 +1459,26 @@ int CZClient::WatchNodeDelete( const char *nodeName )
, "[%s], zoo_delete(%s) failed with error %s\n"
, method_name, nodeName, ZooErrorStr(rc) );
mon_log_write(MON_ZCLIENT_WATCHNODEDELETE_3, SQ_LOG_INFO, buf);
+ switch ( rc )
+ {
+ case ZSYSTEMERROR:
+ case ZRUNTIMEINCONSISTENCY:
+ case ZDATAINCONSISTENCY:
+ case ZCONNECTIONLOSS:
+ case ZMARSHALLINGERROR:
+ case ZUNIMPLEMENTED:
+ case ZOPERATIONTIMEOUT:
+ case ZBADARGUMENTS:
+ case ZINVALIDSTATE:
+ case ZSESSIONEXPIRED:
+ case ZCLOSING:
+ // Treat these error like a session expiration, since
+ // we can't communicate with quorum servers
+ HandleZSessionExpiration();
+ break;
+ default:
+ break;
+ }
}
TRACE_EXIT;
http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/1ad3983f/core/sqf/sqenvcom.sh
----------------------------------------------------------------------
diff --git a/core/sqf/sqenvcom.sh b/core/sqf/sqenvcom.sh
index 6409b35..278cb2e 100644
--- a/core/sqf/sqenvcom.sh
+++ b/core/sqf/sqenvcom.sh
@@ -683,9 +683,9 @@ export SQ_MON_KEEPINTVL=6
export SQ_MON_KEEPCNT=5
# Monitor sync thread epoll wait timeout is in seconds
-# Currently set to 45 seconds
-export SQ_MON_EPOLL_WAIT_TIMEOUT=15
-export SQ_MON_EPOLL_RETRY_COUNT=3
+# Currently set to 60 seconds internally (1 second timeout, 60 retries)
+#export SQ_MON_EPOLL_WAIT_TIMEOUT=5
+#export SQ_MON_EPOLL_RETRY_COUNT=12
# Monitor Zookeeper client
# - A zero value disables the zclient logic in the monitor process.
[2/2] incubator-trafodion git commit: Merge [TRAFODION-2310] PR-782
DTM Lead Logic on very busy system resulted in trafodion crash
Posted by su...@apache.org.
Merge [TRAFODION-2310] PR-782 DTM Lead Logic on very busy system resulted in trafodion crash
Project: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/commit/db66dc29
Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/tree/db66dc29
Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/diff/db66dc29
Branch: refs/heads/master
Commit: db66dc292dcbc73141cf42af75aeafa80bad7bf2
Parents: 8d8adf1 1ad3983
Author: Suresh Subbiah <su...@apache.org>
Authored: Wed Oct 26 17:13:32 2016 +0000
Committer: Suresh Subbiah <su...@apache.org>
Committed: Wed Oct 26 17:13:32 2016 +0000
----------------------------------------------------------------------
core/sqf/monitor/linux/cluster.cxx | 104 +++++++++++++++++++++++---------
core/sqf/monitor/linux/internal.h | 3 +-
core/sqf/monitor/linux/monitor.cxx | 1 +
core/sqf/monitor/linux/pnode.cxx | 2 -
core/sqf/monitor/linux/pnode.h | 7 +--
core/sqf/monitor/linux/zclient.cxx | 41 +++++++++++++
core/sqf/sqenvcom.sh | 6 +-
7 files changed, 126 insertions(+), 38 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/db66dc29/core/sqf/sqenvcom.sh
----------------------------------------------------------------------