You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@trafodion.apache.org by su...@apache.org on 2016/10/26 17:13:50 UTC

[1/2] incubator-trafodion git commit: [TRAFODION-2310] Changed soft down node processing to propagate node state change to remote monitor prior to killing processes.

Repository: incubator-trafodion
Updated Branches:
  refs/heads/master 8d8adf141 -> db66dc292


[TRAFODION-2310] Changed soft down node processing to propagate node state change
to remote monitor prior to killing processes.


Project: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/commit/1ad3983f
Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/tree/1ad3983f
Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/diff/1ad3983f

Branch: refs/heads/master
Commit: 1ad3983f7de988554a55281072a5ae28baa58fa9
Parents: de82dfb
Author: Zalo Correa <za...@esgyn.com>
Authored: Tue Oct 25 15:04:40 2016 -0700
Committer: Zalo Correa <za...@esgyn.com>
Committed: Tue Oct 25 15:04:40 2016 -0700

----------------------------------------------------------------------
 core/sqf/monitor/linux/cluster.cxx | 104 +++++++++++++++++++++++---------
 core/sqf/monitor/linux/internal.h  |   3 +-
 core/sqf/monitor/linux/monitor.cxx |   1 +
 core/sqf/monitor/linux/pnode.cxx   |   2 -
 core/sqf/monitor/linux/pnode.h     |   7 +--
 core/sqf/monitor/linux/zclient.cxx |  41 +++++++++++++
 core/sqf/sqenvcom.sh               |   6 +-
 7 files changed, 126 insertions(+), 38 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/1ad3983f/core/sqf/monitor/linux/cluster.cxx
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/cluster.cxx b/core/sqf/monitor/linux/cluster.cxx
index 104f7d8..a986b67 100644
--- a/core/sqf/monitor/linux/cluster.cxx
+++ b/core/sqf/monitor/linux/cluster.cxx
@@ -410,6 +410,18 @@ void CCluster::AssignTmLeader(int pnid)
 
     if (TmLeaderPNid != pnid) 
     {
+        node = LNode[TmLeaderNid]->GetNode();
+
+        if (trace_settings & (TRACE_INIT | TRACE_RECOVERY | TRACE_REQUEST | TRACE_SYNC | TRACE_TMSYNC))
+        {
+            trace_printf( "%s@%d - Node pnid=%d (%s), phase=%s, isSoftNodeDown=%d\n"
+                        , method_name, __LINE__
+                        , node->GetPNid()
+                        , node->GetName()
+                        , NodePhaseString(node->GetPhase())
+                        , node->IsSoftNodeDown());
+        }
+    
         return;
     }
 
@@ -436,6 +448,16 @@ void CCluster::AssignTmLeader(int pnid)
 
         node = Node[TmLeaderPNid];
 
+        if (trace_settings & (TRACE_INIT | TRACE_RECOVERY | TRACE_REQUEST | TRACE_SYNC | TRACE_TMSYNC))
+        {
+            trace_printf( "%s@%d - Node pnid=%d (%s), phase=%s, isSoftNodeDown=%d\n"
+                        , method_name, __LINE__
+                        , node->GetPNid()
+                        , node->GetName()
+                        , NodePhaseString(node->GetPhase())
+                        , node->IsSoftNodeDown());
+        }
+
         if ( node->IsSpareNode() ||
              node->IsSoftNodeDown() ||
              node->GetState() != State_Up ||
@@ -938,14 +960,21 @@ void CCluster::SoftNodeDown( int pnid )
     node = Nodes->GetNode(pnid);
 
     if (trace_settings & (TRACE_REQUEST | TRACE_INIT | TRACE_RECOVERY))
-       trace_printf( "%s@%d - pnid=%d, state=%s, isInQuiesceState=%d,"
-                     " (local pnid=%d, state=%s, isInQuiesceState=%d, "
-                     "shutdown level=%d)\n"
-                   , method_name, __LINE__
-                   , pnid, StateString(node->GetState())
-                   , node->isInQuiesceState()
-                   , MyPNID, StateString(MyNode->GetState())
-                   , MyNode->isInQuiesceState(), MyNode->GetShutdownLevel() );
+    {
+        trace_printf( "%s@%d - pnid=%d, state=%s, phase=%s, isInQuiesceState=%d, isSoftNodeDown=%d"
+                      " (local pnid=%d, state=%s, phase=%s, isInQuiesceState=%d, isSoftNodeDown=%d "
+                      "shutdown level=%d)\n"
+                    , method_name, __LINE__
+                    , pnid, StateString(node->GetState())
+                    , NodePhaseString(node->GetPhase())
+                    , node->isInQuiesceState()
+                    , node->IsSoftNodeDown()
+                    , MyPNID, StateString(MyNode->GetState())
+                    , NodePhaseString(MyNode->GetPhase())
+                    , MyNode->isInQuiesceState()
+                    , MyNode->IsSoftNodeDown()
+                    , MyNode->GetShutdownLevel() );
+    }
 
     if (( MyPNID == pnid              ) &&
         ( MyNode->GetState() == State_Down ||
@@ -966,12 +995,6 @@ void CCluster::SoftNodeDown( int pnid )
     {
         node->SetSoftNodeDown();            // Set soft down flag
         node->SetPhase( Phase_SoftDown );   // Suspend TMSync on node
-        node->KillAllDownSoft();            // Kill all processes
-
-        snprintf( buf, sizeof(buf)
-                , "[%s], Node %s (%d) executed soft down.\n"
-                , method_name, node->GetName(), node->GetPNid() );
-        mon_log_write(MON_CLUSTER_SOFTNODEDOWN_2, SQ_LOG_ERR, buf);
 
         if ( node->GetPNid() == MyPNID )
         {
@@ -979,6 +1002,13 @@ void CCluster::SoftNodeDown( int pnid )
             CReplSoftNodeDown *repl = new CReplSoftNodeDown( MyPNID );
             Replicator.addItem(repl);
         }
+
+        node->KillAllDownSoft();            // Kill all processes
+
+        snprintf( buf, sizeof(buf)
+                , "[%s], Node %s (%d) executed soft down.\n"
+                , method_name, node->GetName(), node->GetPNid() );
+        mon_log_write(MON_CLUSTER_SOFTNODEDOWN_2, SQ_LOG_ERR, buf);
     }
     else
     {
@@ -1003,6 +1033,16 @@ void CCluster::SoftNodeDown( int pnid )
            trace_printf("%s@%d - Node %s (pnid=%d) TmSyncState updated (%d)(%s)\n", method_name, __LINE__, MyNode->GetName(), MyPNID, MyNode->GetTmSyncState(), SyncStateString( MyNode->GetTmSyncState() ));
     }
 
+    if (trace_settings & (TRACE_INIT | TRACE_RECOVERY | TRACE_REQUEST | TRACE_SYNC | TRACE_TMSYNC))
+    {
+        trace_printf( "%s@%d - Node pnid=%d (%s), phase=%s, isSoftNodeDown=%d\n"
+                    , method_name, __LINE__
+                    , node->GetPNid()
+                    , node->GetName()
+                    , NodePhaseString(node->GetPhase())
+                    , node->IsSoftNodeDown());
+    }
+
     IAmIntegrated = false;
     AssignTmLeader(pnid);
 
@@ -4321,25 +4361,32 @@ int CCluster::AllgatherSock( int nbytes, void *sbuf, char *rbuf, int tag, MPI_St
         {
             // convert to milliseconds
             sv_epoll_wait_timeout = atoi( lv_epoll_wait_timeout_env ) * 1000;
+            char *lv_epoll_retry_count_env = getenv( "SQ_MON_EPOLL_RETRY_COUNT" );
+            if ( lv_epoll_retry_count_env )
+            {
+                sv_epoll_retry_count = atoi( lv_epoll_retry_count_env );
+            }
+            if ( sv_epoll_retry_count > 180 )
+            {
+                sv_epoll_retry_count = 180;
+            }
         }
         else
         {
-            sv_epoll_wait_timeout = -1;
+            // default to 60 seconds
+            sv_epoll_wait_timeout = 1000;
+            sv_epoll_retry_count = 60;
         }
 
-        char *lv_epoll_retry_count_env = getenv( "SQ_MON_EPOLL_RETRY_COUNT" );
-        if ( lv_epoll_retry_count_env )
-        {
-            sv_epoll_retry_count = atoi( lv_epoll_retry_count_env );
-        }
-        if ( sv_epoll_retry_count < 0 )
-        {
-            sv_epoll_retry_count = 0;
-        }
-        if ( sv_epoll_retry_count > 100 )
-        {
-            sv_epoll_retry_count = 100;
-        }
+        char buf[MON_STRING_BUF_SIZE];
+        snprintf( buf, sizeof(buf)
+                , "[%s@%d] EPOLL timeout wait_timeout=%d msecs, retry_count=%d\n"
+                , method_name
+                ,  __LINE__
+                , sv_epoll_wait_timeout
+                , sv_epoll_retry_count );
+
+        mon_log_write( MON_CLUSTER_ALLGATHERSOCK_1, SQ_LOG_INFO, buf );
     }
 
     // do the work
@@ -6383,6 +6430,7 @@ int CCluster::AcceptSock( int sock )
     int csock; // connected socket
     struct sockaddr_in  sockinfo;   // socket address info
 
+    size = sizeof(struct sockaddr *);
     if ( getsockname( sock, (struct sockaddr *) &sockinfo, &size ) )
     {
         char buf[MON_STRING_BUF_SIZE];

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/1ad3983f/core/sqf/monitor/linux/internal.h
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/internal.h b/core/sqf/monitor/linux/internal.h
index 5766bd4..7aeaafb 100644
--- a/core/sqf/monitor/linux/internal.h
+++ b/core/sqf/monitor/linux/internal.h
@@ -83,7 +83,8 @@ enum SyncState
 
 typedef enum {
     State_Default=0,
-    State_Quiesce,                  // node is in quiesce state
+    State_Quiesce,                  // node quiesce state while going down
+    State_SoftDown,                 // node soft down on DTM abort -> restart
     State_Ready_To_Exit
 } IntNodeState; 
 

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/1ad3983f/core/sqf/monitor/linux/monitor.cxx
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/monitor.cxx b/core/sqf/monitor/linux/monitor.cxx
index a4b114c..8228098 100755
--- a/core/sqf/monitor/linux/monitor.cxx
+++ b/core/sqf/monitor/linux/monitor.cxx
@@ -1670,6 +1670,7 @@ int main (int argc, char *argv[])
     delete [] nodename;
     delete Devices;
     delete Nodes;
+    delete ZClient;
     delete Monitor;
     Monitor = NULL; // TRACE uses this
     delete Config;

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/1ad3983f/core/sqf/monitor/linux/pnode.cxx
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/pnode.cxx b/core/sqf/monitor/linux/pnode.cxx
index dda44f2..d5c8555 100755
--- a/core/sqf/monitor/linux/pnode.cxx
+++ b/core/sqf/monitor/linux/pnode.cxx
@@ -126,7 +126,6 @@ CNode::CNode( char *name, int pnid, int rank )
       ,freeCache_(0)
       ,state_(rank == -1 ? State_Down : State_Up)
       ,phase_(Phase_Ready)
-      ,softDown_(false)
       ,killingNode_(false)
       ,dtmAborted_(false)
       ,smsAborted_(false)
@@ -252,7 +251,6 @@ CNode::CNode( char *name
       ,freeCache_(0)
       ,state_(rank == -1 ? State_Down : State_Up)
       ,phase_(Phase_Ready)
-      ,softDown_(false)
       ,killingNode_(false)
       ,dtmAborted_(false)
       ,smsAborted_(false)

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/1ad3983f/core/sqf/monitor/linux/pnode.h
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/pnode.h b/core/sqf/monitor/linux/pnode.h
index 0cc6fd3..607678b 100755
--- a/core/sqf/monitor/linux/pnode.h
+++ b/core/sqf/monitor/linux/pnode.h
@@ -232,13 +232,13 @@ public:
     inline bool  IsKillingNode( void ) { return( killingNode_ ); }
     inline bool  IsRankFailure( void ) { return( rankFailure_ ); }
     inline bool  IsSpareNode( void ) { return( spareNode_ ); }
-    inline bool  IsSoftNodeDown( void ) { return( softDown_ ); }
+    inline bool  IsSoftNodeDown( void ) { return( internalState_ == State_SoftDown ); }
 
     CNode  *Link( CNode *entry );
     void    MoveLNodes( CNode *targetNode );
     inline void ResetSpareNode( void ) { spareNode_ = false; }
     void    ResetWatchdogTimer( void );
-    inline void ResetSoftNodeDown( void ) { softDown_ = false; }
+    inline void ResetSoftNodeDown( void ) { internalState_ = State_Default; }
     inline void SetActivatingSpare( int activatingSpare ) { activatingSpare_ = activatingSpare; }
     void    SetAffinity( int nid, pid_t pid, PROCESSTYPE type );
     void    SetAffinity( CProcess *process );
@@ -268,7 +268,7 @@ public:
     inline void SetKillingNode( bool killingNode ) { killingNode_ = killingNode; }
     inline void SetNumCores( int numCores ) { numCores_ = numCores; }
     inline void SetPhase( NodePhase phase ) { phase_ = phase; }
-    inline void SetSoftNodeDown( void ) { softDown_ = true; }
+    inline void SetSoftNodeDown( void ) { internalState_ = State_SoftDown; }
     inline void SetSparePNids( PNidVector &sparePNids ) { sparePNids_ = sparePNids; }
     inline void SetRank( int rank ) { rank_ = rank; }
     inline void SetRankFailure( bool failed ) { rankFailure_ = failed; 
@@ -332,7 +332,6 @@ private:
     string        hostname_;     // physical node name without domain
     STATE         state_;        // Physical node's current operating state
     NodePhase     phase_;        // Physical node's current phase during spare node activation
-    bool          softDown_;     // true when soft down node in process
     bool          killingNode_;  // true when down node in process
     bool          dtmAborted_;   // true when DTM process terminates abnormally
     bool          smsAborted_;   // true when SMS process terminates abnormally

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/1ad3983f/core/sqf/monitor/linux/zclient.cxx
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/zclient.cxx b/core/sqf/monitor/linux/zclient.cxx
index 2f766ad..dc91a28 100644
--- a/core/sqf/monitor/linux/zclient.cxx
+++ b/core/sqf/monitor/linux/zclient.cxx
@@ -391,6 +391,7 @@ CZClient::~CZClient( void )
 
     if (ZHandle)
     {
+        WatchNodeDelete( Node_name );
         zookeeper_close(ZHandle);
         ZHandle = 0;
     }
@@ -1095,6 +1096,26 @@ int CZClient::SetZNodeWatch( string &monZnode )
                 , "[%s], zoo_exists() for %s failed with error %s\n"
                 ,  method_name, monZnode.c_str( ), ZooErrorStr(rc));
         mon_log_write(MON_ZCLIENT_SETZNODEWATCH_1, SQ_LOG_ERR, buf);
+        switch ( rc )
+        {
+        case ZSYSTEMERROR:
+        case ZRUNTIMEINCONSISTENCY:
+        case ZDATAINCONSISTENCY:
+        case ZCONNECTIONLOSS:
+        case ZMARSHALLINGERROR:
+        case ZUNIMPLEMENTED:
+        case ZOPERATIONTIMEOUT:
+        case ZBADARGUMENTS:
+        case ZINVALIDSTATE:
+        case ZSESSIONEXPIRED:
+        case ZCLOSING:
+            // Treat these error like a session expiration, since
+            // we can't communicate with quorum servers
+            HandleZSessionExpiration();
+            break;
+        default:
+            break;
+        }
     }
 
     TRACE_EXIT;
@@ -1438,6 +1459,26 @@ int CZClient::WatchNodeDelete( const char *nodeName )
                 , "[%s], zoo_delete(%s) failed with error %s\n"
                 , method_name, nodeName, ZooErrorStr(rc) );
         mon_log_write(MON_ZCLIENT_WATCHNODEDELETE_3, SQ_LOG_INFO, buf);
+        switch ( rc )
+        {
+        case ZSYSTEMERROR:
+        case ZRUNTIMEINCONSISTENCY:
+        case ZDATAINCONSISTENCY:
+        case ZCONNECTIONLOSS:
+        case ZMARSHALLINGERROR:
+        case ZUNIMPLEMENTED:
+        case ZOPERATIONTIMEOUT:
+        case ZBADARGUMENTS:
+        case ZINVALIDSTATE:
+        case ZSESSIONEXPIRED:
+        case ZCLOSING:
+            // Treat these error like a session expiration, since
+            // we can't communicate with quorum servers
+            HandleZSessionExpiration();
+            break;
+        default:
+            break;
+        }
     }
 
     TRACE_EXIT;

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/1ad3983f/core/sqf/sqenvcom.sh
----------------------------------------------------------------------
diff --git a/core/sqf/sqenvcom.sh b/core/sqf/sqenvcom.sh
index 6409b35..278cb2e 100644
--- a/core/sqf/sqenvcom.sh
+++ b/core/sqf/sqenvcom.sh
@@ -683,9 +683,9 @@ export SQ_MON_KEEPINTVL=6
 export SQ_MON_KEEPCNT=5
 
 # Monitor sync thread epoll wait timeout is in seconds
-# Currently set to 45 seconds
-export SQ_MON_EPOLL_WAIT_TIMEOUT=15
-export SQ_MON_EPOLL_RETRY_COUNT=3
+# Currently set to 60 seconds internally (1 second timeout, 60 retries)
+#export SQ_MON_EPOLL_WAIT_TIMEOUT=5
+#export SQ_MON_EPOLL_RETRY_COUNT=12
 
 # Monitor Zookeeper client
 #  - A zero value disables the zclient logic in the monitor process.


[2/2] incubator-trafodion git commit: Merge [TRAFODION-2310] PR-782 DTM Lead Logic on very busy system resulted in trafodion crash

Posted by su...@apache.org.
Merge [TRAFODION-2310] PR-782 DTM Lead Logic on very busy system resulted in trafodion crash


Project: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/commit/db66dc29
Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/tree/db66dc29
Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/diff/db66dc29

Branch: refs/heads/master
Commit: db66dc292dcbc73141cf42af75aeafa80bad7bf2
Parents: 8d8adf1 1ad3983
Author: Suresh Subbiah <su...@apache.org>
Authored: Wed Oct 26 17:13:32 2016 +0000
Committer: Suresh Subbiah <su...@apache.org>
Committed: Wed Oct 26 17:13:32 2016 +0000

----------------------------------------------------------------------
 core/sqf/monitor/linux/cluster.cxx | 104 +++++++++++++++++++++++---------
 core/sqf/monitor/linux/internal.h  |   3 +-
 core/sqf/monitor/linux/monitor.cxx |   1 +
 core/sqf/monitor/linux/pnode.cxx   |   2 -
 core/sqf/monitor/linux/pnode.h     |   7 +--
 core/sqf/monitor/linux/zclient.cxx |  41 +++++++++++++
 core/sqf/sqenvcom.sh               |   6 +-
 7 files changed, 126 insertions(+), 38 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/db66dc29/core/sqf/sqenvcom.sh
----------------------------------------------------------------------