You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@trafodion.apache.org by su...@apache.org on 2017/02/23 16:49:00 UTC

[2/8] incubator-trafodion git commit: [TRAFODION-2235] Added monitoring of local monitor's znode to detect errors with Zookeeper quorum.

[TRAFODION-2235] Added monitoring of local monitor's znode to detect errors
with Zookeeper quorum.


Project: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/commit/5b84281c
Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/tree/5b84281c
Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/diff/5b84281c

Branch: refs/heads/release2.1
Commit: 5b84281ca5e2ae4f916eb3cbd25ba2e430826718
Parents: 5881cf1
Author: Zalo Correa <za...@esgyn.com>
Authored: Tue Feb 14 10:55:23 2017 -0800
Committer: Zalo Correa <za...@esgyn.com>
Committed: Tue Feb 14 10:55:23 2017 -0800

----------------------------------------------------------------------
 .../export/include/common/evl_sqlog_eventnum.h  |   2 +
 core/sqf/monitor/linux/makefile                 |   5 +-
 core/sqf/monitor/linux/monitor.cxx              |   4 +-
 core/sqf/monitor/linux/process.cxx              |  20 +++-
 core/sqf/monitor/linux/zclient.cxx              | 110 +++++++++++++++----
 core/sqf/monitor/linux/zclient.h                |  83 +++++++++++++-
 core/sqf/monitor/linux/zootest.cxx              |   5 +-
 core/sqf/sqenvcom.sh                            |   4 +
 8 files changed, 198 insertions(+), 35 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/5b84281c/core/sqf/export/include/common/evl_sqlog_eventnum.h
----------------------------------------------------------------------
diff --git a/core/sqf/export/include/common/evl_sqlog_eventnum.h b/core/sqf/export/include/common/evl_sqlog_eventnum.h
index 618d5ce..527ab7f 100644
--- a/core/sqf/export/include/common/evl_sqlog_eventnum.h
+++ b/core/sqf/export/include/common/evl_sqlog_eventnum.h
@@ -761,6 +761,8 @@
 #define MON_ZCLIENT_WATCHNODEDELETE_3       101371703
 #define MON_ZCLIENT_ISZNODEEXPIRED_1        101371801
 #define MON_ZCLIENT_ISZNODEEXPIRED_2        101371802
+#define MON_ZCLIENT_CHECKMYZNODE_1          101371901
+#define MON_ZCLIENT_CHECKMYZNODE_2          101371902
 
 /**********************************************/
 

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/5b84281c/core/sqf/monitor/linux/makefile
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/makefile b/core/sqf/monitor/linux/makefile
index 656e9e6..614b945 100755
--- a/core/sqf/monitor/linux/makefile
+++ b/core/sqf/monitor/linux/makefile
@@ -86,6 +86,7 @@ INCLUDES+= -I$(ZOOKEEPER_DIR)/include
 LIBS+=  -lsqlite3
 LIBS+=  -llog4cxx
 LIBS+=  -ldl
+LIBS+=  -lssl
 
 ifeq ($(USE_TESTPOINTS),1)
    FLAGS+= -DUSE_TESTPOINTS
@@ -468,8 +469,8 @@ $(OUTDIR)/montim : montim.cxx
 $(BINEXPDIR)/monitor: $(MONITOROBJS)
 	@echo 'Building target: $@'
 	@echo 'Invoking: Linker'
-	@echo $(CXX) $(LNK_FLGS) -o$@ $(MONITOROBJS) $(LIBS) -lz
-	@$(CXX) $(LNK_FLGS) -o$@ $(MONITOROBJS) $(LIBS) -lz
+	@echo $(CXX) $(LNK_FLGS) -o$@ $(MONITOROBJS) $(LIBS) -lz -lcrypto
+	@$(CXX) $(LNK_FLGS) -o$@ $(MONITOROBJS) $(LIBS) -lz -lcrypto
 	@echo 'Finished building target: $@'
 	@echo ' '
 

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/5b84281c/core/sqf/monitor/linux/monitor.cxx
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/monitor.cxx b/core/sqf/monitor/linux/monitor.cxx
index 08a4708..8d055d8 100755
--- a/core/sqf/monitor/linux/monitor.cxx
+++ b/core/sqf/monitor/linux/monitor.cxx
@@ -750,9 +750,9 @@ void CMonitor::StartPrimitiveProcesses( void )
     TRACE_EXIT;
 }
 
-void HandleZSessionExpiration( void )
+void HandleMyNodeExpiration( void )
 {
-    const char method_name[] = "HandleZSessionExpiration";
+    const char method_name[] = "HandleMyNodeExpiration";
     TRACE_ENTRY;
     ReqQueue.enqueueDownReq(MyPNID);
     TRACE_EXIT;

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/5b84281c/core/sqf/monitor/linux/process.cxx
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/process.cxx b/core/sqf/monitor/linux/process.cxx
index 67c1980..0ebe54c 100755
--- a/core/sqf/monitor/linux/process.cxx
+++ b/core/sqf/monitor/linux/process.cxx
@@ -3373,11 +3373,19 @@ CProcessContainer::CProcessContainer( bool nodeContainer )
     if(Mutex == SEM_FAILED)
     {
         char buf[MON_STRING_BUF_SIZE];
-        snprintf(buf, sizeof(buf), "[%s], Can't create semaphore %s!\n",
-                 method_name, sem_name);
+        int err = errno;
+        snprintf(buf, sizeof(buf), "[%s], Can't create semaphore %s! (%s)\n",
+                 method_name, sem_name, strerror(err));
         mon_log_write(MON_PROCESSCONT_PROCESSCONT_3, SQ_LOG_ERR, buf);
 
-        sem_unlink(sem_name);
+        err = sem_unlink(sem_name);
+        if (err == -1)
+        {
+            int err = errno;
+            snprintf(buf, sizeof(buf), "[%s], Can't unlink semaphore %s! (%s)\n",
+                     method_name, sem_name, strerror(err));
+            mon_log_write(MON_PROCESSCONT_PROCESSCONT_4, SQ_LOG_ERR, buf);
+        }
         abort();
     }
     
@@ -4350,7 +4358,7 @@ CProcess *CProcessContainer::CreateProcess (CProcess * parent,
 
             result = MPI_ERR_NAME;
 
-            return false;
+            return NULL;
         }
         if (parent->GetNid() == nid)
         {
@@ -4361,7 +4369,7 @@ CProcess *CProcessContainer::CreateProcess (CProcess * parent,
 
             result = MPI_ERR_RANK;
 
-            return false;
+            return NULL;
         }
     }
     else
@@ -4375,7 +4383,7 @@ CProcess *CProcessContainer::CreateProcess (CProcess * parent,
             mon_log_write(MON_PROCESSCONT_CREATEPROCESS_3, SQ_LOG_ERR, la_buf);
 
             result = MPI_ERR_NAME;
-            return false;
+            return NULL;
         }
     }
 

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/5b84281c/core/sqf/monitor/linux/zclient.cxx
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/zclient.cxx b/core/sqf/monitor/linux/zclient.cxx
index 074afb7..2b1ee7d 100644
--- a/core/sqf/monitor/linux/zclient.cxx
+++ b/core/sqf/monitor/linux/zclient.cxx
@@ -42,16 +42,12 @@
 #include "pnode.h"
 #include "zclient.h"
 
+//
 // The following specify the default values for the timers if the
-// zclient cluster monitoring timer related variables are not defined.
+// zclient cluster timer related environment variables are not defined.
 //
-// NOTE: It is recommended to set the checkrate to -1 which essentially
-//       make the zclient event driven. Meaning the watcher is called
-//       only when watched a znode is changed or is deleted (expires)
-//       Also, the session timeout must be kept at or below 60 seconds
-//       as this is enforced by Zookeeper. Any, value above 60 seconds
-//       is renegotiated by Zookeeper to 60 seconds.
-#define ZCLIENT_MONITORING_CHECKRATE         -1 // seconds (disabled)
+// - ZCLIENT_MY_ZNODE_CHECKRATE is the rate the local monitor's znode is checked
+#define ZCLIENT_MY_ZNODE_CHECKRATE            5 // seconds
 #define ZCLIENT_SESSION_TIMEOUT              60 // seconds (1 minute)
 
 // The monitors register their znodes under the cluster znode
@@ -241,7 +237,7 @@ void ZSessionWatcher( zhandle_t *zzh
                     ,  method_name );
             mon_log_write(MON_ZCLIENT_ZSESSIONWATCHER_1, SQ_LOG_CRIT, buf);
 
-            HandleZSessionExpiration();
+            HandleMyNodeExpiration();
 
             zookeeper_close( zzh );
             ZHandle=0;
@@ -254,7 +250,7 @@ void ZSessionWatcher( zhandle_t *zzh
                     ,  method_name );
             mon_log_write(MON_ZCLIENT_ZSESSIONWATCHER_2, SQ_LOG_CRIT, buf);
 
-            HandleZSessionExpiration();
+            HandleMyNodeExpiration();
 
             zookeeper_close( zzh );
             ZHandle=0;
@@ -291,7 +287,8 @@ CZClient::CZClient( const char *quorumHosts
          ,state_(ZC_DISABLED)
          ,enabled_(false)
          ,checkCluster_(false)
-         ,zcMonitoringRate_(ZCLIENT_MONITORING_CHECKRATE) // seconds
+         ,resetMyZNodeFailedTime_(true)
+         ,zcMonitoringRate_(ZCLIENT_MY_ZNODE_CHECKRATE) // seconds
          ,zkQuorumHosts_(quorumHosts)
          ,zkRootNode_(rootNode)
          ,zkRootNodeInstance_(instanceNode)
@@ -305,7 +302,7 @@ CZClient::CZClient( const char *quorumHosts
     
     char *zcMonitoringRateValueC;
     int zcMonitoringRateValue;
-    if ( (zcMonitoringRateValueC = getenv( "SQ_MON_ZCLIENT_MONITORING_CHECKRATE" )) )
+    if ( (zcMonitoringRateValueC = getenv( "SQ_MON_ZCLIENT_MY_ZNODE_CHECKRATE" )) )
     {
         // in seconds
         zcMonitoringRateValue = atoi( zcMonitoringRateValueC );
@@ -472,6 +469,65 @@ void CZClient::CheckCluster( void )
     TRACE_EXIT;
 }
 
+void CZClient::CheckMyZNode( void )
+{
+    const char method_name[] = "CZClient::CheckMyZNode";
+    TRACE_ENTRY;
+
+    int zerr;
+    struct timespec currentTime;
+
+    if ( IsCheckCluster() )
+    {
+        if (resetMyZNodeFailedTime_)
+        {
+            resetMyZNodeFailedTime_ = false;
+            clock_gettime(CLOCK_REALTIME, &myZNodeFailedTime_);
+            myZNodeFailedTime_.tv_sec += (GetSessionTimeout() * 2);
+            if (trace_settings & (TRACE_INIT | TRACE_RECOVERY))
+            {
+                trace_printf( "%s@%d" " - Resetting MyZnode Fail Time %ld(secs)\n"
+                            , method_name, __LINE__
+                            , myZNodeFailedTime_.tv_sec );
+            }
+        }
+        if ( ! IsZNodeExpired( Node_name, zerr ) )
+        {
+            if ( zerr == ZCONNECTIONLOSS || zerr == ZOPERATIONTIMEOUT )
+            {
+                // Ignore transient errors with the quorum.
+                // However, if longer than the session
+                // timeout, handle it as a hard error.
+                clock_gettime(CLOCK_REALTIME, &currentTime);
+                if (currentTime.tv_sec > myZNodeFailedTime_.tv_sec)
+                {
+                    char buf[MON_STRING_BUF_SIZE];
+                    snprintf( buf, sizeof(buf)
+                            , "[%s], Zookeeper quorum comm error: %s - Handling my znode (%s) as expired! Node is going down.\n"
+                            , method_name, ZooErrorStr(zerr), Node_name );
+                    mon_log_write(MON_ZCLIENT_CHECKMYZNODE_1, SQ_LOG_ERR, buf);
+                    HandleMyNodeExpiration();
+                }
+            }
+            else
+            {
+                resetMyZNodeFailedTime_ = true;
+            }
+        }
+        else
+        {
+            char buf[MON_STRING_BUF_SIZE];
+            snprintf( buf, sizeof(buf)
+                    , "[%s], My znode (%s) expired! Node is going down.\n"
+                    , method_name, Node_name );
+            mon_log_write(MON_ZCLIENT_CHECKMYZNODE_2, SQ_LOG_ERR, buf);
+            HandleMyNodeExpiration();
+        }
+    }
+    
+    TRACE_EXIT;
+}
+
 int CZClient::GetClusterZNodes( String_vector *nodes )
 {
     const char method_name[] = "CZClient::GetClusterZNodes";
@@ -777,7 +833,7 @@ bool CZClient::IsZNodeExpired( const char *nodeName, int &zerr )
         case ZCLOSING:
             // Treat these error like a session expiration, since
             // we can't communicate with quorum servers
-            HandleZSessionExpiration();
+            HandleMyNodeExpiration();
             break;
         default:
             break;
@@ -926,12 +982,12 @@ void CZClient::MonitorZCluster()
         lock();
         if ( !IsEnabled() )
         {
-            // Wait until timer started
+            // Wait until enabled
             CLock::wait();
         }
         else
         {
-            if (zcMonitoringRate_ < 0)
+            if (zcMonitoringRate_ < 0 || GetState() == ZC_DISABLED)
             {
                 // Wait until signaled
                 CLock::wait();
@@ -974,18 +1030,33 @@ void CZClient::MonitorZCluster()
                 if ( IsCheckCluster() )
                 {
                     CheckCluster();
+                    if (GetState() != ZC_STOP)
+                    {
+                        SetState( ZC_MYZNODE );
+                    }
                 }
                 break;
             case ZC_WATCH:
                 if ( !IsCheckCluster() )
                 {
                     WatchCluster();
+                    if (GetState() != ZC_STOP)
+                    {
+                        SetState( ZC_MYZNODE );
+                    }
+                }
+                break;
+            case ZC_MYZNODE:
+                if ( IsCheckCluster() )
+                {
+                    CheckMyZNode();
                 }
                 break;
             case ZC_ZNODE:
                 if ( IsCheckCluster() )
                 {
                     HandleExpiredZNode();
+                    SetState( ZC_MYZNODE );
                 }
                 break;
             case ZC_STOP:
@@ -994,7 +1065,7 @@ void CZClient::MonitorZCluster()
             default:
                 break;
         }
-        if (zcMonitoringRate_ >= 0 )
+        if (zcMonitoringRate_ >= 0)
         {
             SetTimeToWakeUp( timeout );
         }
@@ -1197,7 +1268,7 @@ int CZClient::SetZNodeWatch( string &monZnode )
         case ZCLOSING:
             // Treat these error like a session expiration, since
             // we can't communicate with quorum servers
-            HandleZSessionExpiration();
+            HandleMyNodeExpiration();
             break;
         default:
             break;
@@ -1307,7 +1378,7 @@ static void *ZClientThread(void *arg)
 
 
 // Create the ZClientThread
-int CZClient::StartWork()
+int CZClient::StartWork( void )
 {
     const char method_name[] = "CZClient::StartWork";
     TRACE_ENTRY;
@@ -1442,7 +1513,6 @@ void CZClient::WatchCluster( void )
                 }
             }
             SetCheckCluster( true );
-            SetState( ZC_CLUSTER );
             FreeStringVector( &nodes );
         }
     }
@@ -1560,7 +1630,7 @@ int CZClient::WatchNodeDelete( const char *nodeName )
         case ZCLOSING:
             // Treat these error like a session expiration, since
             // we can't communicate with quorum servers
-            HandleZSessionExpiration();
+            HandleMyNodeExpiration();
             break;
         default:
             break;

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/5b84281c/core/sqf/monitor/linux/zclient.h
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/zclient.h b/core/sqf/monitor/linux/zclient.h
index 897f9c6..d6dc884 100644
--- a/core/sqf/monitor/linux/zclient.h
+++ b/core/sqf/monitor/linux/zclient.h
@@ -20,6 +20,78 @@
 //
 // @@@ END COPYRIGHT @@@
 ********************************************************************/
+//
+// Zookeeper Client (CZClient class)
+//
+//  Implements the Zookeeper client functionality in the monitor process
+//  as the ZClient object which manages znode monitoring events through
+//  the ZCLientThread.
+//
+//  CZClient::StartWork() and CZClient::ShutdownWork() manage ZCLientThread
+//  creation and deletion.
+//
+//      CZClient::StartMonitoring()
+//              |
+//          pthread_create(ZClientThread)
+//              |
+//          ZC_DISABLED
+//              |
+//          CZClient::MonitorZCluster()
+//      
+//  CZClient::MonitorZCluster() is the thread main, a state machine:
+//      
+//                       CZClient::StartMonitoring()
+//                               |
+//                           ZC_START
+//                               |
+//                       CZClient::StartClusterMonitoring()
+//                               |
+//                           ZC_WATCH
+//                               |
+//                       CZClient::WatchCluster()
+//                               |
+//                           ZC_MYZNODE <------------------|
+//                               |                         |
+//                       CZClient::CheckMyZNode()          |
+//                               |                         |
+//                               |-------------------------|
+//                                                         |
+//  ZOO_CHILD_EVENT                                        |
+//  ZOO_NOTWATCHING_EVENT                                  |
+//            |                                            |
+//    CZClient::TriggerCheck()---|                         |
+//                               |                         |
+//                           ZC_CLUSTER                    |
+//                               |                         |
+//                       CZClient::CheckCluster()          |
+//                               |                         |
+//                               |-------------------------|
+//  ZOO_CREATED_EVENT                                      |
+//  ZOO_DELETED_EVENT                                      |
+//  ZOO_CHANGED_EVENT                                      |
+//            |                                            |
+//    CZClient::TriggerCheck()---|                         |
+//                               |                         |
+//                           ZC_ZNODE                      |
+//                               |                         |
+//                       CZClient::HandleExpiredZNode()    |
+//                               |                         |
+//                               |-------------------------|
+//                 
+//                       CZClient::StopMonitoring()
+//                               |
+//                           ZC_STOP
+//                               |
+//                       CZClient::StopClusterMonitoring()
+//                               |
+//                           ZC_DISABLED
+//
+//      CZClient::ShutdownWork()
+//              |
+//          ZC_SHUTDOWN
+//              |
+//          pthread_join()
+//
 #ifndef ZCLIENT_H_
 #define ZCLIENT_H_
 
@@ -36,10 +108,11 @@ using namespace std;
 typedef list<string>    ZNodeList_t;
 
 // The following two functions must be implemented in the calling program.
-// - HandleZSessionExpiration() is invoked when the program's session expires.
+// - HandleMyNodeExpiration() is invoked when the monitor's session expires, or
+//   the monitor's znode expires or quorum communication fails
 // - HandleNodeExpiration(nodeName) is invoked when the znode associated with
 //   the nodeName passed in expires.
-extern void HandleZSessionExpiration( void );
+extern void HandleMyNodeExpiration( void );
 extern void HandleNodeExpiration( const char *nodeName );
 
 class CZClient : public CLock
@@ -54,6 +127,7 @@ public:
         ZC_WATCH,         // set cluster watchers
         ZC_CLUSTER,       // check cluster
         ZC_ZNODE,         // check znode
+        ZC_MYZNODE,       // check this monitor's znode
         ZC_STOP,          // stop monitoring
         ZC_SHUTDOWN       // thread exit 
     } ZClientState_t;
@@ -79,6 +153,7 @@ public:
 
 private:
     void    CheckCluster( void );
+    void    CheckMyZNode( void );
     int     GetClusterZNodes( String_vector *children );
     int     GetZNodeData( string &monZnode, string &nodeName, int &pnid );
     ZClientState_t GetState( void ) { CAutoLock lock(getLocker()); return( state_ ); }
@@ -103,13 +178,15 @@ private:
     ZClientState_t  state_;        // Physical node's current operating state
     bool            enabled_;      // true when cluster monitoring enabled
     bool            checkCluster_; // true when cluster monitoring enabled
-    long            zcMonitoringRate_; // in nano seconds
+    bool            resetMyZNodeFailedTime_; // set to trigger fail time reset
+    long            zcMonitoringRate_; // in seconds
 
     string          zkQuorumHosts_;
     string          zkRootNode_;
     string          zkRootNodeInstance_;
     stringstream    zkQuorumPort_;
     int             zkSessionTimeout_;
+    struct timespec myZNodeFailedTime_;
     
     ZNodeList_t     znodeQueue_;
 };

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/5b84281c/core/sqf/monitor/linux/zootest.cxx
----------------------------------------------------------------------
diff --git a/core/sqf/monitor/linux/zootest.cxx b/core/sqf/monitor/linux/zootest.cxx
index 8a90299..c55d03b 100644
--- a/core/sqf/monitor/linux/zootest.cxx
+++ b/core/sqf/monitor/linux/zootest.cxx
@@ -27,6 +27,7 @@
 #include <string.h>
 #include <ifaddrs.h>
 #include <netdb.h>
+#include <unistd.h>
 #include <new>
 #include <stdio.h>
 #include <list>
@@ -54,9 +55,9 @@ CZClient    *ZClient = NULL;
 CMonLog     *MonLog =  NULL;
 CMonLog     *SnmpLog =  NULL;
 
-void HandleZSessionExpiration( void )
+void HandleMyNodeExpiration( void )
 {
-    const char method_name[] = "HandleZSessionExpiration";
+    const char method_name[] = "HandleMyNodeExpiration";
     TRACE_ENTRY;
     printf( "%s@%d ZSession expired!\n", method_name, __LINE__ );
     ZClient->StopMonitoring();

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/5b84281c/core/sqf/sqenvcom.sh
----------------------------------------------------------------------
diff --git a/core/sqf/sqenvcom.sh b/core/sqf/sqenvcom.sh
index e6cea33..e95e95b 100644
--- a/core/sqf/sqenvcom.sh
+++ b/core/sqf/sqenvcom.sh
@@ -700,6 +700,10 @@ export SQ_MON_EPOLL_RETRY_COUNT=16
 #    non-responsive monitor zclient which results in a Trafodion node down. 
 #    Default is 60 seconds (1 minute) which is the maximum Zookeeper allows.
 #export SQ_MON_ZCLIENT_SESSION_TIMEOUT=60
+#  - My znode monitoring timeout in seconds defines frequency when local
+#    monitor's znode is checked. Uncomment to override default value.
+#    Default is 5 seconds.
+#export SQ_MON_ZCLIENT_MY_ZNODE_CHECKRATE=5
 
 # set to 0 to disable phandle verifier
 export SQ_PHANDLE_VERIFIER=1