You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@trafodion.apache.org by su...@apache.org on 2017/05/31 18:47:20 UTC

[19/50] [abbrv] incubator-trafodion git commit: Merge remote branch 'origin/master' into TRAFODION-2001

Merge remote branch 'origin/master' into TRAFODION-2001

Conflicts:
	core/sqf/export/include/common/evl_sqlog_eventnum.h
	core/sqf/monitor/linux/makefile
	core/sqf/sql/scripts/sqcheck


Project: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/commit/1e294233
Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/tree/1e294233
Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/diff/1e294233

Branch: refs/heads/master
Commit: 1e2942337ed5888e7685406d786fe1fce6507d54
Parents: 52d45b7 1e94882
Author: Zalo Correa <za...@esgyn.com>
Authored: Wed Sep 28 15:37:47 2016 -0700
Committer: Zalo Correa <za...@esgyn.com>
Committed: Wed Sep 28 15:37:47 2016 -0700

----------------------------------------------------------------------
 .../export/include/common/evl_sqlog_eventnum.h  |   41 +
 core/sqf/monitor/linux/cluster.cxx              |  249 ++-
 core/sqf/monitor/linux/cluster.h                |    1 +
 core/sqf/monitor/linux/makefile                 |   26 +-
 core/sqf/monitor/linux/monitor.cxx              |  187 +++
 core/sqf/monitor/linux/monitor.h                |    2 +
 core/sqf/monitor/linux/zclient.cxx              | 1432 ++++++++++++++++++
 core/sqf/monitor/linux/zclient.h                |  116 ++
 core/sqf/monitor/linux/zootest.cxx              |  283 ++++
 core/sqf/monitor/linux/zootest.h                |   34 +
 core/sqf/sqenvcom.sh                            |   19 +-
 core/sqf/sql/scripts/sqcheck                    |   27 +-
 core/sql/bin/SqlciErrors.txt                    |    4 +-
 core/sql/regress/compGeneral/EXPECTED023        |    2 +-
 core/sql/regress/hive/EXPECTED018               |  183 ++-
 core/sql/regress/privs1/EXPECTED132             |  100 +-
 core/sql/regress/privs1/EXPECTED136             |    2 +
 core/sql/regress/privs2/EXPECTED129             |   23 +-
 core/sql/regress/privs2/EXPECTED135             |   17 +-
 core/sql/regress/privs2/EXPECTED138             |   22 +
 core/sql/regress/privs2/EXPECTED139             |   10 +
 core/sql/regress/privs2/EXPECTED140             |    4 +-
 core/sql/regress/privs2/EXPECTED142             |    6 +
 core/sql/sqlcomp/PrivMgrDesc.cpp                |   36 +-
 core/sql/sqlcomp/PrivMgrDesc.h                  |   22 +-
 core/sql/sqlcomp/PrivMgrPrivileges.cpp          | 1349 +++++++----------
 core/sql/sqlcomp/PrivMgrPrivileges.h            |   78 +-
 core/sql/sqlcomp/nadefaults.cpp                 |    2 +-
 .../org/trafodion/dcs/server/ServerManager.java |  115 +-
 .../phoenix/end2end/MultiCfQueryExecTest.java   |    2 +-
 30 files changed, 3345 insertions(+), 1049 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/1e294233/core/sqf/export/include/common/evl_sqlog_eventnum.h
----------------------------------------------------------------------
diff --cc core/sqf/export/include/common/evl_sqlog_eventnum.h
index b15382f,e723d33..ec85b13
--- a/core/sqf/export/include/common/evl_sqlog_eventnum.h
+++ b/core/sqf/export/include/common/evl_sqlog_eventnum.h
@@@ -768,9 -727,38 +777,41 @@@
  /* Module: pstartd.cxx = 35 */
  #define MON_PSTARTD_MAIN_1                  101350101
  
 +/* Module: persistconfig.cxx = 36 */
 +#define MON_PERSISTCONFIG_ADDCONFIG_1       101360101
 +
+ /* Module: zoonode.cxx = 37 */
+ #define MON_ZCLIENT_ZCLIENT_1               101370101
+ #define MON_ZCLIENT_ZCLIENT_2               101370102
+ #define MON_ZCLIENT_ZCLIENT_3               101370103
+ #define MON_ZCLIENT_SYNC_STRING_COMP_1      101370201
+ #define MON_ZCLIENT_CHECKCLUSTER_1          101370301
+ #define MON_ZCLIENT_CHECKCLUSTER_2          101370302
+ #define MON_ZCLIENT_CHECKCLUSTERZNODES_1    101370401
+ #define MON_ZCLIENT_CHECKCLUSTERZNODES_2    101370402
+ #define MON_ZCLIENT_CHECKCLUSTERZNODES_3    101370403
+ #define MON_ZCLIENT_GETCLUSTERZNODES_1      101370501
+ #define MON_ZCLIENT_GETCLUSTERZNODES_2      101370502
+ #define MON_ZCLIENT_REGISTERZNODE_1         101370601
+ #define MON_ZCLIENT_SHUTDOWNWORK_1          101370701
+ #define MON_ZCLIENT_ZCLIENTTHREAD_1         101370801
+ #define MON_ZCLIENT_STARTWORK_1             101370901
+ #define MON_ZCLIENT_MONITORZCLUSTER_1       101371001
+ #define MON_ZCLIENT_GETZNODEDATA_1          101371101
+ #define MON_ZCLIENT_GETZNODEDATA_2          101371102
+ #define MON_ZCLIENT_GETZNODEDATA_3          101371103
+ #define MON_ZCLIENT_WATCHCLUSTER_1          101371201
+ #define MON_ZCLIENT_WATCHCLUSTER_2          101371202
+ #define MON_ZCLIENT_SETZNODEWATCH_1         101371301
+ #define MON_ZCLIENT_SETZNODEWATCH_2         101371302
+ #define MON_ZCLIENT_WATCHNODE_1             101371401
+ #define MON_ZCLIENT_ZSESSIONWATCHER_1       101371501
+ #define MON_ZCLIENT_ZSESSIONWATCHER_2       101371502
+ #define MON_ZCLIENT_CHECKZNODE_1            101371601
+ #define MON_ZCLIENT_WATCHNODEDELETE_1       101371701
+ #define MON_ZCLIENT_WATCHNODEDELETE_2       101371702
+ #define MON_ZCLIENT_WATCHNODEDELETE_3       101371703
+ 
  /**********************************************/
  
  /*********** Seabed ***********/

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/1e294233/core/sqf/monitor/linux/cluster.cxx
----------------------------------------------------------------------
diff --cc core/sqf/monitor/linux/cluster.cxx
index 5a46cf8,104f7d8..84768c9
--- a/core/sqf/monitor/linux/cluster.cxx
+++ b/core/sqf/monitor/linux/cluster.cxx
@@@ -4447,9 -4276,10 +4474,10 @@@ int CCluster::AllgatherSock( int nbytes
          int p_n2recv;
          bool p_sending;
          bool p_receiving;
+         int p_timeout_count;
          char *p_buff;
      } peer_t;
 -    peer_t p[cfgPNodes_];
 +    peer_t p[GetConfigPNodesMax()];
      memset( p, 0, sizeof(p) );
      tag = 0; // make compiler happy
  
@@@ -4481,18 -4312,116 +4510,116 @@@
      inBarrier_ = true;
      MonStats->BarrierWaitIncr( );
  
+     static int sv_epoll_wait_timeout = -2;
+     static int sv_epoll_retry_count = 1;
+     if ( sv_epoll_wait_timeout == -2 )
+     {
+         char *lv_epoll_wait_timeout_env = getenv( "SQ_MON_EPOLL_WAIT_TIMEOUT" );
+         if ( lv_epoll_wait_timeout_env )
+         {
+             // convert to milliseconds
+             sv_epoll_wait_timeout = atoi( lv_epoll_wait_timeout_env ) * 1000;
+         }
+         else
+         {
+             sv_epoll_wait_timeout = -1;
+         }
+ 
+         char *lv_epoll_retry_count_env = getenv( "SQ_MON_EPOLL_RETRY_COUNT" );
+         if ( lv_epoll_retry_count_env )
+         {
+             sv_epoll_retry_count = atoi( lv_epoll_retry_count_env );
+         }
+         if ( sv_epoll_retry_count < 0 )
+         {
+             sv_epoll_retry_count = 0;
+         }
+         if ( sv_epoll_retry_count > 100 )
+         {
+             sv_epoll_retry_count = 100;
+         }
+     }
+ 
      // do the work
 -    struct epoll_event events[2*cfgPNodes_ + 1];
 +    struct epoll_event events[2*GetConfigPNodesMax() + 1];
      while ( 1 )
      {
 -        int maxEvents = 2*cfgPNodes_ - nsent - nrecv;
 +        int maxEvents = 2*GetConfigPNodesMax() - nsent - nrecv;
          if ( maxEvents == 0 ) break;
          int nw;
          while ( 1 )
          {
-             nw = epoll_wait( epollFD_, events, maxEvents, -1 );
+             nw = epoll_wait( epollFD_, events, maxEvents, sv_epoll_wait_timeout );
              if ( nw >= 0 || errno != EINTR ) break;
          }
+         if ( nw == 0 )
+         {
 -            for ( int iPeer = 0; iPeer < cfgPNodes_; iPeer++ )
++            for ( int iPeer = 0; iPeer < GetConfigPNodesMax(); iPeer++ )
+             {
+                 peer_t *peer = &p[iPeer];
+                 if ( (iPeer != MyPNID) &&
+                     (socks_[iPeer] != -1) )
+                 {
+                     if ( (peer->p_receiving) ||
+                         (peer->p_sending) )
+                     {
+ 
+                         peer->p_timeout_count++;
+ 
+                         if ( peer->p_timeout_count <= sv_epoll_retry_count )
+                         {
+                             continue;
+                         }
+ 
+                         char buf[MON_STRING_BUF_SIZE];
+                         snprintf( buf, sizeof(buf)
+                                 , "[%s@%d] Not heard from peer=%d\n"
+                                 , method_name
+                                 ,  __LINE__
+                                 , iPeer );
+ 
+                         mon_log_write( MON_CLUSTER_ALLGATHERSOCK_1, SQ_LOG_CRIT, buf );
+                         stats[iPeer].MPI_ERROR = MPI_ERR_EXITED;
+                         err = MPI_ERR_IN_STATUS;
+                         if ( peer->p_sending )
+                         {
+                             peer->p_sending = false;
+                             nsent++;
+                         }
+                         if ( peer->p_receiving )
+                         {
+                             peer->p_receiving = false;
+                             nrecv++;
+                         }
+ 
+                         // setup the epoll structures 
+                         struct epoll_event event;
+                         event.data.fd = socks_[iPeer];
+                         int op = 0;
+                         if ( !peer->p_sending && !peer->p_receiving )
+                         {
+                             op = EPOLL_CTL_DEL;
+                             event.events = 0;
+                         }
+                         else if ( peer->p_sending )
+                         {
+                             op = EPOLL_CTL_MOD;
+                             event.events = EPOLLOUT | EPOLLET | EPOLLRDHUP;
+                         }
+                         else if ( peer->p_receiving )
+                         {
+                             op = EPOLL_CTL_MOD;
+                             event.events = EPOLLIN | EPOLLET | EPOLLRDHUP;
+                         }
+                         if ( op == EPOLL_CTL_DEL || op == EPOLL_CTL_MOD )
+                         {
+                             EpollCtl( epollFD_, op, socks_[iPeer], &event );
+                         }
+                     }
+                 }
+             }
+         }
+  
          if ( nw < 0 )
          {
              char ebuff[256];

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/1e294233/core/sqf/monitor/linux/cluster.h
----------------------------------------------------------------------
diff --cc core/sqf/monitor/linux/cluster.h
index cfaf495,eba2d5c..4d429fd
mode 100644,100755..100644
--- a/core/sqf/monitor/linux/cluster.h
+++ b/core/sqf/monitor/linux/cluster.h

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/1e294233/core/sqf/monitor/linux/makefile
----------------------------------------------------------------------
diff --cc core/sqf/monitor/linux/makefile
index 451e459,656e9e6..0660414
mode 100644,100755..100644
--- a/core/sqf/monitor/linux/makefile
+++ b/core/sqf/monitor/linux/makefile
@@@ -288,10 -288,13 +296,15 @@@ ALLOBJS += $(TRACE_LOG_OBJS
  ALLOBJS += $(CONFOBJS)
  ALLOBJS += $(MEMLOGOBJS)
  ALLOBJS += $(RTSIGBLOCK_OBJS)
 +ALLOBJS += $(TCONFOBJS)
+ ALLOBJS += $(ZOOMONOBJS)
  
- PGMS = $(BINEXPDIR)/monitor $(BINEXPDIR)/shell $(BINEXPDIR)/sqwatchdog $(BINEXPDIR)/monmemlog $(BINEXPDIR)/trafconf
+ PGMS = $(BINEXPDIR)/monitor
+ PGMS +=  $(BINEXPDIR)/shell
+ PGMS +=  $(BINEXPDIR)/sqwatchdog
+ PGMS +=  $(BINEXPDIR)/monmemlog
  PGMS +=  $(BINEXPDIR)/pstartd
++PGMS +=  $(BINEXPDIR)/trafconf
  PGMS +=  $(LIBEXPDIR)/libseabasesig.so
  
  TEST_PGMS = $(OUTDIR)/client

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/1e294233/core/sqf/monitor/linux/monitor.cxx
----------------------------------------------------------------------
diff --cc core/sqf/monitor/linux/monitor.cxx
index 34995f4,a4b114c..669f3ad
--- a/core/sqf/monitor/linux/monitor.cxx
+++ b/core/sqf/monitor/linux/monitor.cxx
@@@ -755,6 -750,150 +758,150 @@@ void CMonitor::StartPrimitiveProcesses
      TRACE_EXIT;
  }
  
+ void HandleZSessionExpiration( void )
+ {
+     const char method_name[] = "HandleZSessionExpiration";
+     TRACE_ENTRY;
+     ReqQueue.enqueueDownReq(MyPNID);
+     TRACE_EXIT;
+ }
+ 
+ void HandleNodeExpiration( const char *nodeName )
+ {
+     const char method_name[] = "HandleNodeExpiration";
+     TRACE_ENTRY;
+     CNode *node = Nodes->GetNode((char *)nodeName);
+     if (node)
+     {
+         ReqQueue.enqueueDownReq(node->GetPNid());
+     }
+     TRACE_EXIT;
+ }
+ 
+ void CMonitor::CreateZookeeperClient( void )
+ {
+     const char method_name[] = "CMonitor::CreateZookeeperClient";
+     TRACE_ENTRY;
+ 
+     if ( ZClientEnabled )
+     {
+         string       hostName;
+         string       zkQuorumHosts;
+         stringstream zkQuorumPort;
+         char *env;
 -        char  hostsStr[MAX_PROCESSOR_NAME*3] = { 0 };
++        char  hostsStr[MPI_MAX_PROCESSOR_NAME * 3] = { 0 };
+         char *tkn = NULL;
+ 
+         int zport;
+         env = getenv("ZOOKEEPER_PORT");
+         if ( env && isdigit(*env) )
+         {
+             zport = atoi(env);
+         }
+         else
+         {
+             char buf[MON_STRING_BUF_SIZE];
+             snprintf(buf, sizeof(buf),
+                      "[%s], Zookeeper quorum port is not defined!\n"
+                     , method_name);
+             mon_log_write(MON_MONITOR_CREATEZCLIENT_1, SQ_LOG_CRIT, buf);
+ 
+             ZClientEnabled = false;
+             TRACE_EXIT;
+             return;
+         }
+         
+         env = getenv("ZOOKEEPER_NODES");
+         if ( env )
+         {
+             zkQuorumHosts = env;
+             if ( zkQuorumHosts.length() == 0 )
+             {
+                 char buf[MON_STRING_BUF_SIZE];
+                 snprintf(buf, sizeof(buf),
+                          "[%s], Zookeeper quorum hosts are not defined!\n"
+                         , method_name);
+                 mon_log_write(MON_MONITOR_CREATEZCLIENT_2, SQ_LOG_CRIT, buf);
+ 
+                 ZClientEnabled = false;
+                 TRACE_EXIT;
+                 return;
+             }
+             
+             strcpy( hostsStr, zkQuorumHosts.c_str() );
+             zkQuorumPort.str( "" );
+             
+             tkn = strtok( hostsStr, "," );
+             do
+             {
+                 if ( tkn != NULL )
+                 {
+                     hostName = tkn;
+                     zkQuorumPort << hostName.c_str()
+                                  << ":" 
+                                  << zport;
+                 }
+                 tkn = strtok( NULL, "," );
+                 if ( tkn != NULL )
+                 {
+                     zkQuorumPort << ",";
+                 }
+                 
+             }
+             while( tkn != NULL );
+             if (trace_settings & (TRACE_INIT | TRACE_RECOVERY))
+             {
+                 trace_printf( "%s@%d zkQuorumPort=%s\n"
+                             , method_name, __LINE__
+                             , zkQuorumPort.str().c_str() );
+             }
+         }
+     
+         ZClient = new CZClient( zkQuorumPort.str().c_str()
+                               , ZCLIENT_TRAFODION_ZNODE
+                               , ZCLIENT_INSTANCE_ZNODE );
+         if ( ZClient == NULL )
+         {
+             char buf[MON_STRING_BUF_SIZE];
+             snprintf(buf, sizeof(buf),
+                      "[%s], Failed to allocate ZClient object!\n"
+                     , method_name);
+             mon_log_write(MON_MONITOR_CREATEZCLIENT_3, SQ_LOG_CRIT, buf);
+             abort();
+         }
+     }
+ 
+     TRACE_EXIT;
+ }
+ 
+ void CMonitor::StartZookeeperClient( void )
+ {
+     const char method_name[] = "CMonitor::StartZookeeperClient";
+     TRACE_ENTRY;
+ 
+     int rc = -1;
+ 
+     if ( ZClientEnabled )
+     {
+         if ( ZClient )
+         {
+             rc = ZClient->StartWork();
+             if (rc == 0)
+             {
+                 ZClient->StartMonitoring();
+ 
+                 char buf[MON_STRING_BUF_SIZE];
+                 snprintf(buf, sizeof(buf),
+                          "[%s], ZClient node monitoring started\n"
+                         , method_name);
+                 mon_log_write(MON_MONITOR_STARTZCLIENT_1, SQ_LOG_INFO, buf);
+             }
+         }
+     }
+ 
+     TRACE_EXIT;
+ }
+ 
  #ifdef USE_SEQUENCE_NUM
  long long CMonitor::GetTimeSeqNum()
  {

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/1e294233/core/sqf/monitor/linux/monitor.h
----------------------------------------------------------------------
diff --cc core/sqf/monitor/linux/monitor.h
index 49308b9,1b44c57..1b44c57
mode 100644,100755..100644
--- a/core/sqf/monitor/linux/monitor.h
+++ b/core/sqf/monitor/linux/monitor.h

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/1e294233/core/sqf/monitor/linux/zclient.cxx
----------------------------------------------------------------------
diff --cc core/sqf/monitor/linux/zclient.cxx
index 0000000,01ab4e5..c767639
mode 000000,100644..100644
--- a/core/sqf/monitor/linux/zclient.cxx
+++ b/core/sqf/monitor/linux/zclient.cxx
@@@ -1,0 -1,1432 +1,1432 @@@
+ /**********************************************************************
+ // @@@ START COPYRIGHT @@@
+ //
+ // Licensed to the Apache Software Foundation (ASF) under one
+ // or more contributor license agreements.  See the NOTICE file
+ // distributed with this work for additional information
+ // regarding copyright ownership.  The ASF licenses this file
+ // to you under the Apache License, Version 2.0 (the
+ // "License"); you may not use this file except in compliance
+ // with the License.  You may obtain a copy of the License at
+ //
+ //   http://www.apache.org/licenses/LICENSE-2.0
+ //
+ // Unless required by applicable law or agreed to in writing,
+ // software distributed under the License is distributed on an
+ // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ // KIND, either express or implied.  See the License for the
+ // specific language governing permissions and limitations
+ // under the License.
+ //
+ // @@@ END COPYRIGHT @@@
+ ********************************************************************/
+ #include <stdlib.h>
+ #include <errno.h>
+ #include <sys/ioctl.h>
+ #include <sys/time.h>
+ #include <signal.h>
+ #include <ctype.h>
+ #include <string.h>
+ #include <ifaddrs.h>
+ #include <netdb.h>
+ #include <new>
+ #include <stdio.h>
+ #include <list>
+ #include <string>
+ 
+ #include "lock.h"
+ #include "msgdef.h"
+ #include "montrace.h"
+ #include "monlogging.h"
+ #include "reqqueue.h"
+ #include "pnode.h"
+ #include "zclient.h"
+ 
+ // The following specify the default values for the timers if the
+ // zclient cluster monitoring timer related variables are not defined.
+ //
+ // NOTE: It is recommended to set the checkrate to -1 which essentially
+ //       make the zclient event driven. Meaning the watcher is called
+ //       only when watched a znode is changed or is deleted (expires)
+ //       Also, the session timeout must be kept at or below 60 seconds
+ //       as this is enforced by Zookeeper. Any, value above 60 seconds
+ //       is renegotiated by Zookeeper to 60 seconds.
+ #define ZCLIENT_MONITORING_CHECKRATE         -1 // seconds (disabled)
+ #define ZCLIENT_SESSION_TIMEOUT              60 // seconds (1 minute)
+ 
+ // The monitors register their znodes under the cluster znode
+ #define ZCLIENT_CLUSTER_ZNODE               "/cluster"
+ 
+ using namespace std;
+ 
+ extern char Node_name[MPI_MAX_PROCESSOR_NAME];
+ extern int MyPNID;
+ extern int MyNid;
+ extern int MyPid;
+ 
+ extern CNodeContainer *Nodes;
+ extern CReqQueue ReqQueue;
+ extern CZClient    *ZClient;
+ extern CMonLog     *MonLog;
+ extern CMonLog     *SnmpLog;
+ 
+ extern bool debugFlag;
+ 
+ static zhandle_t *ZHandle;
+ static clientid_t MyZooId;
+ 
+ void ZSessionWatcher( zhandle_t *zzh
+                    , int type
+                    , int state
+                    , const char *path
+                    , void *watcherCtx);
+ 
+ void FreeStringVector( struct String_vector *v )
+ {
+     if ( v->data )
+     {
+         for ( int32_t i=0; i < v->count; i++ )
+         {
+             free( v->data[i] );
+         }
+         free( v->data );
+         v->data = NULL;
+         v->count = 0;
+     }
+ }
+ 
+ static const char *ZClientStateStr( CZClient::ZClientState_t state )
+ {
+     switch (state)
+     {
+         case CZClient::ZC_DISABLED:
+             return "ZC_DISABLED";
+         case CZClient::ZC_START:
+             return "ZC_START";
+         case CZClient::ZC_CLUSTER:
+             return "ZC_CLUSTER";
+         case CZClient::ZC_ZNODE:
+             return "ZC_ZNODE";
+         case CZClient::ZC_WATCH:
+             return "ZC_WATCH";
+         case CZClient::ZC_STOP:
+             return "ZC_STOP";
+         case CZClient::ZC_SHUTDOWN:
+             return "ZC_SHUTDOWN";
+         default:
+             break;
+     }
+     return "ZClient State Invalid";
+ }
+ 
+ static const char *ZooConnectionTypeStr( int type )
+ {
+     if ( type == ZOO_CREATED_EVENT )
+         return "ZOO_CREATED_EVENT";
+     if ( type == ZOO_DELETED_EVENT )
+         return "ZOO_DELETED_EVENT";
+     if ( type == ZOO_CHANGED_EVENT )
+         return "ZOO_CHANGED_EVENT";
+     if ( type == ZOO_CHILD_EVENT )
+         return "ZOO_CHILD_EVENT";
+     if ( type == ZOO_SESSION_EVENT )
+         return "ZOO_SESSION_EVENT";
+     if ( type == ZOO_NOTWATCHING_EVENT )
+         return "ZOO_NOTWATCHING_EVENT";
+ 
+     return "INVALID_TYPE";
+ }
+ 
+ static const char *ZooConnectionStateStr( int state )
+ {
+     if ( state == 0 )
+         return "CLOSED_STATE";
+     if ( state == ZOO_EXPIRED_SESSION_STATE )
+         return "EXPIRED_SESSION_STATE";
+     if ( state == ZOO_AUTH_FAILED_STATE )
+         return "AUTH_FAILED_STATE";
+     if ( state == ZOO_CONNECTING_STATE )
+         return "CONNECTING_STATE";
+     if ( state == ZOO_ASSOCIATING_STATE )
+         return "ASSOCIATING_STATE";
+     if ( state == ZOO_CONNECTED_STATE )
+         return "CONNECTED_STATE";
+ 
+     return "INVALID_STATE";
+ }
+ 
+ const char *ZooErrorStr( int error )
+ {
+     if ( error == 0 )
+         return "ZOK";
+     if ( error == ZNONODE )
+         return "ZNONODE";
+     if ( error == ZNODEEXISTS )
+         return "ZNODEEXISTS";
+     if ( error == ZNOAUTH )
+         return "ZNOAUTH";
+     if ( error == ZNOCHILDRENFOREPHEMERALS )
+         return "ZNOCHILDRENFOREPHEMERALS";
+     if ( error == ZBADARGUMENTS )
+         return "ZBADARGUMENTS";
+     if ( error == ZINVALIDSTATE )
+         return "ZINVALIDSTATE";
+     if ( error == ZMARSHALLINGERROR )
+         return "ZMARSHALLINGERROR";
+     if ( error == ZCONNECTIONLOSS )
+         return "ZCONNECTIONLOSS";
+     if ( error == ZOPERATIONTIMEOUT )
+         return "ZOPERATIONTIMEOUT";
+ 
+     static char errorStr[20];
+     sprintf( errorStr, "%d", error );
+     return errorStr;
+ }
+ 
+ void ZSessionWatcher( zhandle_t *zzh
+                     , int type
+                     , int state
+                     , const char *path
+                     , void *watcherCtx)
+ {
+     const char method_name[] = "ZSessionWatcher";
+     TRACE_ENTRY;
+ 
+     watcherCtx = watcherCtx; // Make compiler happy!
+     
+     /*
+      * Be careful using ZHandle here rather than zzh - as this may be mt code
+      * the client lib may call the watcher before zookeeper_init returns 
+      */
+     if (trace_settings & (TRACE_INIT | TRACE_RECOVERY))
+     {
+         if ( path && strlen( path ) > 0 )
+         {
+             trace_printf( "%s@%d" " - Watcher %s state = %s for path %s\n"
+                         , method_name, __LINE__
+                         , ZooConnectionTypeStr( type )
+                         , ZooConnectionStateStr( state )
+                         , path );
+         }
+         else
+         {
+             trace_printf( "%s@%d" " - Watcher %s state = %s\n"
+                         , method_name, __LINE__
+                         , ZooConnectionTypeStr( type )
+                         , ZooConnectionStateStr( state ) );
+         }
+     }
+ 
+     if ( type == ZOO_SESSION_EVENT )
+     {
+         if ( state == ZOO_CONNECTED_STATE )
+         {
+             const clientid_t *id = zoo_client_id( zzh );
+             if ( MyZooId.client_id == 0 || MyZooId.client_id != id->client_id )
+             {
+                 MyZooId = *id;
+                 if (trace_settings & (TRACE_INIT | TRACE_RECOVERY))
+                 {
+                     trace_printf( "%s@%d" " - Got a new session id: 0x%llx\n"
+                                 , method_name, __LINE__
+                                 , static_cast<long long unsigned int>(MyZooId.client_id) );
+                 }
+             }
+         }
+         else if ( state == ZOO_AUTH_FAILED_STATE )
+         {
+             char buf[MON_STRING_BUF_SIZE];
+             snprintf( buf, sizeof(buf)
+                     , "[%s], Error Zookeeper authentication failure. Node going down...\n"
+                     ,  method_name );
+             mon_log_write(MON_ZCLIENT_ZSESSIONWATCHER_1, SQ_LOG_CRIT, buf);
+ 
+             HandleZSessionExpiration();
+ 
+             zookeeper_close( zzh );
+             ZHandle=0;
+         }
+         else if ( state == ZOO_EXPIRED_SESSION_STATE )
+         {
+             char buf[MON_STRING_BUF_SIZE];
+             snprintf( buf, sizeof(buf)
+                     , "[%s], Error Zookeeper session expired. Node going down...\n"
+                     ,  method_name );
+             mon_log_write(MON_ZCLIENT_ZSESSIONWATCHER_2, SQ_LOG_CRIT, buf);
+ 
+             HandleZSessionExpiration();
+ 
+             zookeeper_close( zzh );
+             ZHandle=0;
+         }
+     }
+     else if ( type == ZOO_CREATED_EVENT )
+     {
+         ZClient->TriggerCheck( type, path );
+     }
+     else if ( type == ZOO_DELETED_EVENT )
+     {
+         ZClient->TriggerCheck( type, path );
+     }
+     else if ( type == ZOO_CHANGED_EVENT )
+     {
+         ZClient->TriggerCheck( type, path );
+     }
+     else if ( type == ZOO_CHILD_EVENT )
+     {
+         ZClient->TriggerCheck( type, path );
+     }
+     else if ( type == ZOO_NOTWATCHING_EVENT )
+     {
+         ZClient->TriggerCheck( type, path );
+     }
+ 
+     TRACE_EXIT;
+ }
+ 
+ CZClient::CZClient( const char *quorumHosts
+                   , const char *rootNode
+                   , const char *instanceNode )
+          :threadId_(0)
+          ,state_(ZC_DISABLED)
+          ,enabled_(false)
+          ,checkCluster_(false)
+          ,zcMonitoringRate_(ZCLIENT_MONITORING_CHECKRATE) // seconds
+          ,zkQuorumHosts_(quorumHosts)
+          ,zkRootNode_(rootNode)
+          ,zkRootNodeInstance_(instanceNode)
+          ,zkQuorumPort_("")
+          ,zkSessionTimeout_(ZCLIENT_SESSION_TIMEOUT) // seconds
+ {
+     const char method_name[] = "CZClient::CZClient";
+     TRACE_ENTRY;
+ 
+     memcpy(&eyecatcher_, "ZCLT", 4);
+     
+     char *zcMonitoringRateValueC;
+     int zcMonitoringRateValue;
+     if ( (zcMonitoringRateValueC = getenv( "SQ_MON_ZCLIENT_MONITORING_CHECKRATE" )) )
+     {
+         // in seconds
+         zcMonitoringRateValue = atoi( zcMonitoringRateValueC );
+         zcMonitoringRate_ = zcMonitoringRateValue; // in seconds
+     }
+     
+     char *zkSessionTimeoutC;
+     int zkSessionTimeoutValue;
+     if ( (zkSessionTimeoutC = getenv( "SQ_MON_ZCLIENT_SESSION_TIMEOUT" )) )
+     {
+         // in seconds
+         zkSessionTimeoutValue = atoi( zkSessionTimeoutC );
+         zkSessionTimeout_ = zkSessionTimeoutValue; // in seconds
+     }
+     
+     if (trace_settings & (TRACE_INIT | TRACE_RECOVERY))
+     {
+         trace_printf( "%s@%d" " - ZClient monitoring rate in seconds=%ld\n"
+                     , method_name, __LINE__, zcMonitoringRate_ );
+         trace_printf( "%s@%d" " - ZClient session timeout in seconds =%d\n"
+                     , method_name, __LINE__, zkSessionTimeout_ );
+     }
+ 
+     if ( zkQuorumHosts_.length() == 0 )
+     {
+         char buf[MON_STRING_BUF_SIZE];
+         snprintf( buf, sizeof(buf)
+                 , "[%s], Zookeeper quorum port address not initialized\n"
+                 ,  method_name);
+         mon_log_write(MON_ZCLIENT_ZCLIENT_1, SQ_LOG_ERR, buf);
+         abort();
+     }
+     else
+     {
+         zkQuorumPort_ << zkQuorumHosts_.c_str();
+ 
+         if (trace_settings & (TRACE_INIT | TRACE_RECOVERY))
+         {
+             trace_printf( "%s@%d zkQuorumPort is: %s\n"
+                         , method_name, __LINE__
+                         , zkQuorumPort_.str( ).c_str( ));
+         }
+     }
+ 
+     // Initialize zookeeper
+     zoo_deterministic_conn_order( 0 ); // non-deterministic order for client connections
+     ZHandle = zookeeper_init( zkQuorumPort_.str( ).c_str( )
+                        , ZSessionWatcher
+                        , zkSessionTimeout_ * 1000
+                        , &MyZooId
+                        , 0
+                        , 0 );
+     if ( ZHandle == 0 )
+     {
+         char buf[MON_STRING_BUF_SIZE];
+         snprintf( buf, sizeof(buf)
+                 , "[%s], zookeeper_init() failed for host:port %s\n"
+                 , method_name, zkQuorumPort_.str( ).c_str( ));
+         mon_log_write(MON_ZCLIENT_ZCLIENT_2, SQ_LOG_ERR, buf);
+         abort();
+     }
+     
+     int rc = InitializeZClient();
+     if ( rc )
+     {
+         char buf[MON_STRING_BUF_SIZE];
+         snprintf( buf, sizeof(buf)
+                 , "[%s], Failed ZClient initialization (%s)\n"
+                 , method_name, ZooErrorStr(rc) );
+         mon_log_write(MON_ZCLIENT_ZCLIENT_3, SQ_LOG_ERR, buf);
+         abort();
+     }
+ 
+     TRACE_EXIT;
+ }
+ 
+ CZClient::~CZClient( void )
+ {
+     const char method_name[] = "CZClient::~CZClient";
+     TRACE_ENTRY;
+ 
+     memcpy(&eyecatcher_, "zclt", 4);
+ 
+     if (ZHandle)
+     {
+         zookeeper_close(ZHandle);
+         ZHandle = 0;
+     }
+ 
+     TRACE_EXIT;
+ }
+ 
+ void CZClient::CheckCluster( void )
+ {
+     const char method_name[] = "CZClient::CheckCluster";
+     TRACE_ENTRY;
+ 
+     int rc;
+     struct String_vector nodes;
+ 
+     if ( IsCheckCluster() )
+     {
+         rc = GetClusterZNodes( &nodes );
+         if ( rc != ZOK )
+         {
+             char buf[MON_STRING_BUF_SIZE];
+             snprintf( buf, sizeof(buf)
+                     , "[%s], GetClusterZNodes() failed!\n"
+                     , method_name );
+             mon_log_write(MON_ZCLIENT_CHECKCLUSTER_1, SQ_LOG_ERR, buf);
+             SetState( CZClient::ZC_STOP );
+             CLock::wakeOne();
+             return;
+         }
+ 
+         stringstream newpath;
+         string monZnode;
+         string nodeName;
+         int    pnid = -1;
+     
+         if ( nodes.count > 0 )
+         {
+             for (int i = 0; i < nodes.count ; i++ )
+             {
+                 newpath.str( "" );
+                 newpath << zkRootNode_.c_str() 
+                         << zkRootNodeInstance_.c_str()
+                         << ZCLIENT_CLUSTER_ZNODE << "/"
+                         << nodes.data[i];
+                 string monZnode = newpath.str( );
+             
+                 rc = GetZNodeData( monZnode, nodeName, pnid );
+                 if ( rc != ZOK )
+                 {
+                     char buf[MON_STRING_BUF_SIZE];
+                     snprintf( buf, sizeof(buf)
+                             , "[%s], GetZNodeData() failed!\n"
+                             , method_name );
+                     mon_log_write(MON_ZCLIENT_CHECKCLUSTER_2, SQ_LOG_ERR, buf);
+                 }
+                 else
+                 {
+                     if (trace_settings & (TRACE_INIT | TRACE_RECOVERY))
+                     {
+                         trace_printf( "%s@%d monZnode=%s, nodeName=%s, pnid=%d)\n"
+                                     , method_name, __LINE__
+                                     , monZnode.c_str(), nodeName.c_str(), pnid );
+                     }
+                 }
+             }
+             FreeStringVector( &nodes );
+         }
+     }
+     else
+     {
+         if (trace_settings & (TRACE_INIT | TRACE_RECOVERY))
+         {
+             trace_printf( "%s@%d CheckCluster is NOT set!\n"
+                         , method_name, __LINE__ );
+         }
+     }
+     
+     TRACE_EXIT;
+ }
+ 
+ int CZClient::GetClusterZNodes( String_vector *nodes )
+ {
+     const char method_name[] = "CZClient::GetClusterZNodes";
+     TRACE_ENTRY;
+ 
+     bool found = false;
+     int rc = -1;
+     int retries = 0;
+     Stat stat;
+ 
+     stringstream ss;
+     ss.str( "" );
+     ss << zkRootNode_.c_str() 
+        << zkRootNodeInstance_.c_str() 
+        << ZCLIENT_CLUSTER_ZNODE;
+     string trafCluster( ss.str( ) );
+ 
+     nodes->count = 0;
+     nodes->data = NULL;
+ 
+     while ( !found )
+     {
+         if (trace_settings & (TRACE_INIT | TRACE_RECOVERY))
+         {
+             trace_printf( "%s@%d trafCluster=%s\n"
+                         , method_name, __LINE__, trafCluster.c_str() );
+         }
+         // Verify the existence of the parent ZCLIENT_CLUSTER_ZNODE
+         rc = zoo_exists( ZHandle, trafCluster.c_str( ), 0, &stat );
+         if ( rc == ZNONODE )
+         {
+             if (retries > 10)
+                 break;
+             retries++;    
+             continue;
+         }
+         else if ( rc == ZOK )
+         {
+             // Now get the list of available znodes in the cluster.
+             //
+             // This will return child znodes for each monitor process that has
+             // registered, including this process.
+             rc = zoo_get_children( ZHandle, trafCluster.c_str( ), 0, nodes );
+             if ( nodes->count > 0 )
+             {
+                 if (trace_settings & (TRACE_INIT | TRACE_RECOVERY))
+                 {
+                     trace_printf( "%s@%d nodes.count=%d\n"
+                                 , method_name, __LINE__
+                                 , nodes->count );
+                 }
+                 found = true;
+             }
+             else
+             {
+                 if (retries > 10)
+                     break;
+                 retries++;    
+                 continue;
+             }
+         }
+         else  // error
+         {
+             char buf[MON_STRING_BUF_SIZE];
+             snprintf( buf, sizeof(buf)
+                     , "[%s], zoo_exists() for %s failed with error %s\n"
+                     ,  method_name, trafCluster.c_str( ), ZooErrorStr(rc));
+             mon_log_write(MON_ZCLIENT_GETCLUSTERZNODES_2, SQ_LOG_ERR, buf);
+             break;
+         }
+     }
+ 
+     TRACE_EXIT;
+     return( rc );
+ }
+ 
+ int CZClient::GetZNodeData( string &monZnode, string &nodeName, int &pnid )
+ {
+     const char method_name[] = "CZClient::GetZNodeData";
+     TRACE_ENTRY;
+ 
+     char  pnidStr[8] = { 0 };
+     char *tkn = NULL;
 -    char  zkData[MAX_PROCESSOR_NAME];
++    char  zkData[MPI_MAX_PROCESSOR_NAME];
+     int   rc = -1;
+     int   zkDataLen = sizeof(zkData);
+     Stat  stat;
+ 
+     if (trace_settings & (TRACE_INIT | TRACE_RECOVERY))
+     {
+         trace_printf( "%s@%d monZnode=%s\n"
+                     , method_name, __LINE__, monZnode.c_str() );
+     }
+     rc = zoo_exists( ZHandle, monZnode.c_str( ), 0, &stat );
+     if ( rc == ZNONODE )
+     {
+         // return the error
+         if (trace_settings & (TRACE_INIT | TRACE_RECOVERY))
+         {
+             trace_printf( "%s@%d monZnode=%s does not exist (ZNONODE)\n"
+                         , method_name, __LINE__, monZnode.c_str() );
+         }
+     }
+     else if ( rc == ZOK )
+     {
+         // Get the pnid from the data part of znode
+         rc = zoo_get( ZHandle, monZnode.c_str( ), false, zkData, &zkDataLen, &stat );
+         if ( rc == ZOK )
+         {
+             // The first token is the node name
+             tkn = strtok( zkData, ":" );
+             if ( tkn != NULL )
+             {
+                 nodeName = tkn;
+             }
+             tkn = strtok( NULL, ":" );
+             if ( tkn != NULL )
+             {
+                 strcpy( pnidStr, tkn );
+                 pnid = atoi( pnidStr );
+             }
+             // TODO: Save monZnode path in corresponding physical node object
+             //       to match with when ZC_NODE is triggered
+         }
+         else
+         {
+             char buf[MON_STRING_BUF_SIZE];
+             snprintf( buf, sizeof(buf)
+                     , "[%s], zoo_get() for %s failed with error %s\n"
+                     ,  method_name, monZnode.c_str( ), ZooErrorStr(rc));
+             mon_log_write(MON_ZCLIENT_GETZNODEDATA_2, SQ_LOG_ERR, buf);
+         }
+     }
+     else
+     {
+         char buf[MON_STRING_BUF_SIZE];
+         snprintf( buf, sizeof(buf)
+                 , "[%s], zoo_exists() for %s failed with error %s\n"
+                 ,  method_name, monZnode.c_str( ), ZooErrorStr(rc));
+         mon_log_write(MON_ZCLIENT_GETZNODEDATA_3, SQ_LOG_ERR, buf);
+     }
+ 
+     TRACE_EXIT;
+     return( rc );
+ }
+ 
+ void CZClient::HandleExpiredZNode( void )
+ {
+     const char method_name[] = "CZClient::HandleExpiredZNode";
+     TRACE_ENTRY;
+ 
+     if ( IsCheckCluster() )
+     {
 -        char  pathStr[MAX_PROCESSOR_NAME] = { 0 };
 -        char  nodeName[MAX_PROCESSOR_NAME] = { 0 };
++        char  pathStr[MPI_MAX_PROCESSOR_NAME] = { 0 };
++        char  nodeName[MPI_MAX_PROCESSOR_NAME] = { 0 };
+         char *tkn = NULL;
+         char *tknStart = pathStr;
+         char *tknLast = NULL;
+         string monZnode;
+     
+         monZnode.assign( znodeQueue_.front() );
+ 
+         if (trace_settings)
+         {
+             trace_printf("%s@%d" " - znodePath=%s, znodeQueue_.size=%ld\n"
+                         , method_name, __LINE__
+                         , monZnode.c_str(), znodeQueue_.size() );
+         }
+ 
+         znodeQueue_.pop_front();
+         
+         trace_printf( "%s@%d" " - Checking znode=%s\n"
+                     , method_name, __LINE__
+                     , monZnode.c_str() );
+ 
+         strcpy( pathStr, monZnode.c_str() );
+ 
+         tknStart++; // skip the first '/'
+         tkn = strtok( tknStart, "/" );
+         do
+         {
+             tknLast = tkn;
+             tkn = strtok( NULL, "/" );
+         }
+         while( tkn != NULL );
+ 
+         strcpy( nodeName, tknLast );
+         if (trace_settings & (TRACE_INIT | TRACE_RECOVERY))
+         {
+             trace_printf( "%s@%d nodeName=%s\n"
+                         , method_name, __LINE__
+                         , strlen(nodeName) ? nodeName : "" );
+         }
+ 
+         char buf[MON_STRING_BUF_SIZE];
+         snprintf( buf, sizeof(buf)
+                 , "[%s], %s was deleted, handling node (%s) as a down node!\n"
+                 ,  method_name, monZnode.c_str(), nodeName );
+         mon_log_write(MON_ZCLIENT_CHECKZNODE_1, SQ_LOG_ERR, buf);
+ 
+         HandleNodeExpiration( nodeName );
+     }
+     else
+     {
+         if (trace_settings & (TRACE_INIT | TRACE_RECOVERY))
+         {
+             trace_printf( "%s@%d CheckCluster is NOT set!\n"
+                         , method_name, __LINE__ );
+         }
+     }
+     
+     TRACE_EXIT;
+ }
+ 
+ int CZClient::InitializeZClient( void )
+ {
+     const char method_name[] = "CZClient::MakeClusterZNodes";
+     TRACE_ENTRY;
+ 
+     int rc;
+ 
+     rc = MakeClusterZNodes();
+     if ( rc == ZOK )
+     {
+         rc = RegisterMyNodeZNode();
+     }
+ 
+     TRACE_EXIT;
+     return( rc );
+ }
+ 
+ int CZClient::MakeClusterZNodes( void )
+ {
+     const char method_name[] = "CZClient::MakeClusterZNodes";
+     TRACE_ENTRY;
+ 
+     int rc;
+     Stat stat;
+ 
+     stringstream ss;
+     ss.str( "" );
+     ss << zkRootNode_.c_str();
+     string rootDir( ss.str( ) );
+ 
+     rc = zoo_exists( ZHandle, rootDir.c_str(), 0, &stat );
+     switch (rc)
+     {
+     case ZOK:
+         break;
+     case ZNONODE:
+         if (trace_settings & (TRACE_INIT | TRACE_RECOVERY))
+         {
+             trace_printf( "%s@%d RegisterZNode(%s)\n"
+                         , method_name, __LINE__ 
+                         , rootDir.c_str() );
+         }
+         rc = RegisterZNode( rootDir.c_str(), NULL, 0 );
+         if (rc) return(rc); // Return the error
+         break;
+     default:
+         char buf[MON_STRING_BUF_SIZE];
+         snprintf( buf, sizeof(buf)
+                 , "[%s], zoo_exists() failed with error %s\n"
+                 , method_name, ZooErrorStr(rc) );
+         mon_log_write(MON_ZCLIENT_CHECKCLUSTERZNODES_1, SQ_LOG_ERR, buf);
+         if (rc) return(rc); // Return the error
+         break;
+     }
+ 
+     ss.str( "" );
+     ss << zkRootNode_.c_str() 
+        << zkRootNodeInstance_.c_str();
+     string instanceDir( ss.str( ) );
+ 
+     rc = zoo_exists( ZHandle, instanceDir.c_str( ), 0, &stat );
+     switch (rc)
+     {
+     case ZOK:
+         break;
+     case ZNONODE:
+         if (trace_settings & (TRACE_INIT | TRACE_RECOVERY))
+         {
+             trace_printf( "%s@%d RegisterZNode(%s)\n"
+                         , method_name, __LINE__
+                         , instanceDir.c_str() );
+         }
+         rc = RegisterZNode( instanceDir.c_str(), NULL, 0 );
+         break;
+     default:
+         char buf[MON_STRING_BUF_SIZE];
+         snprintf( buf, sizeof(buf)
+                 , "[%s], zoo_exists() failed with error %s\n"
+                 , method_name, ZooErrorStr(rc) );
+         mon_log_write(MON_ZCLIENT_CHECKCLUSTERZNODES_2, SQ_LOG_ERR, buf);
+         break;
+     }
+ 
+     ss.str( "" );
+     ss << zkRootNode_.c_str() 
+        << zkRootNodeInstance_.c_str() 
+        << ZCLIENT_CLUSTER_ZNODE;
+     string clusterDir( ss.str( ) );
+ 
+     rc = zoo_exists( ZHandle, clusterDir.c_str( ), 0, &stat );
+     switch (rc)
+     {
+     case ZOK:
+         break;
+     case ZNONODE:
+         if (trace_settings & (TRACE_INIT | TRACE_RECOVERY))
+         {
+             trace_printf( "%s@%d RegisterZNode(%s)\n"
+                         , method_name, __LINE__
+                         , clusterDir.c_str() );
+         }
+         rc = RegisterZNode( clusterDir.c_str(), NULL, 0 );
+         break;
+     default:
+         char buf[MON_STRING_BUF_SIZE];
+         snprintf( buf, sizeof(buf)
+                 , "[%s], zoo_exists() failed with error %s\n"
+                 , method_name, ZooErrorStr(rc) );
+         mon_log_write(MON_ZCLIENT_CHECKCLUSTERZNODES_3, SQ_LOG_ERR, buf);
+         break;
+     }
+ 
+     TRACE_EXIT;
+     return(rc);
+ }
+ 
+ // ZClient main processing loop
+ void CZClient::MonitorZCluster()
+ {
+     const char method_name[] = "CZClient::MonitorZCluster";
+     TRACE_ENTRY;
+ 
+     int rc;
+     struct timespec   timeout;
+ 
+     if (trace_settings & (TRACE_INIT | TRACE_RECOVERY))
+     {
+         trace_printf( "%s@%d thread %lx starting\n"
+                     , method_name, __LINE__, threadId_);
+     }
+ 
+     if (zcMonitoringRate_ >= 0)
+     {
+         SetTimeToWakeUp( timeout );
+     }
+ 
+     while ( GetState() != ZC_SHUTDOWN )
+     {
+         lock();
+         if ( !IsEnabled() )
+         {
+             // Wait until timer started
+             CLock::wait();
+         }
+         else
+         {
+             if (zcMonitoringRate_ < 0)
+             {
+                 // Wait until signaled
+                 CLock::wait();
+                 if (trace_settings & (TRACE_INIT | TRACE_RECOVERY))
+                 {
+                     trace_printf( "%s@%d" " - ZCluster signaled, state_=%s\n"
+                                 , method_name, __LINE__
+                                 , ZClientStateStr(GetState()) );
+                 }
+             }
+             else
+             {
+                 // Wait until signaled or timer expires
+                 rc = CLock::timedWait( &timeout );
+                 if ( rc != ETIMEDOUT  )
+                 {
+                     if ( rc != 0 )
+                     {
+                         StopClusterMonitoring();
+                     }
+                     else
+                     {
+                         if (trace_settings & (TRACE_INIT | TRACE_RECOVERY))
+                         {
+                             trace_printf( "%s@%d" " - ZCluster signaled, state_=%s\n"
+                                         , method_name, __LINE__
+                                         , ZClientStateStr(GetState()) );
+                         }
+                     }
+                 }
+             }
+         }
+ 
+         switch ( GetState() )
+         {
+             case ZC_START:
+                 StartClusterMonitoring();
+                 break;
+             case ZC_CLUSTER:
+                 if ( IsCheckCluster() )
+                 {
+                     CheckCluster();
+                 }
+                 break;
+             case ZC_WATCH:
+                 if ( !IsCheckCluster() )
+                 {
+                     WatchCluster();
+                 }
+                 break;
+             case ZC_ZNODE:
+                 if ( IsCheckCluster() )
+                 {
+                     HandleExpiredZNode();
+                 }
+                 break;
+             case ZC_STOP:
+                 StopClusterMonitoring();
+                 break;
+             default:
+                 break;
+         }
+         if (zcMonitoringRate_ >= 0 )
+         {
+             SetTimeToWakeUp( timeout );
+         }
+         unlock();
+     }
+ 
+     if (trace_settings & (TRACE_INIT | TRACE_RECOVERY))
+     {
+         trace_printf("%s@%d thread %lx exiting\n"
+                     , method_name,__LINE__, pthread_self());
+     }
+ 
+     TRACE_EXIT;
+ }
+ 
+ int CZClient::RegisterMyNodeZNode( void )
+ {
+     const char method_name[] = "CZClient::RegisterMyNodeZNode";
+     TRACE_ENTRY;
+ 
+     int rc;
+     char pnidStr[10];
+ 
+     sprintf( pnidStr, "%d", MyPNID);
+ 
+     stringstream newpath;
+     newpath.str( "" );
+     newpath << zkRootNode_.c_str() 
+             << zkRootNodeInstance_.c_str() 
+             << ZCLIENT_CLUSTER_ZNODE << "/"
+             << Node_name;
+     string monZnode = newpath.str( );
+ 
+     stringstream ss;
+     ss.str( "" );
+     ss << Node_name << ":" << pnidStr;
+     string monData = ss.str( );
+ 
+     if (trace_settings & (TRACE_INIT | TRACE_RECOVERY))
+     {
+         trace_printf( "%s@%d RegisterZNode(%s:%s)\n"
+                     , method_name, __LINE__
+                     , monZnode.c_str()
+                     , monData.c_str() );
+     }
+ 
+     rc = RegisterZNode( monZnode.c_str(), monData.c_str(), ZOO_EPHEMERAL );
+ 
+     TRACE_EXIT;
+ 
+     return(rc);
+ }
+ 
+ int CZClient::RegisterZNode( const char *znodePath
+                            , const char *znodeData
+                            , int flags )
+ {
+     const char method_name[] = "CZClient::RegisterZNode";
+     TRACE_ENTRY;
+ 
+     int rc = -1;
+     char realpath[1024] = { 0 };
+ 
+     stringstream ss;
+     ss.str( "" );
+     ss << znodePath;
+     string zpath( ss.str( ) );
+     
+     ss.str( "" );
+     ss << ((znodeData) ? znodeData : "");
+     string zdata( ss.str( ) );
+ 
+     if (trace_settings & (TRACE_INIT | TRACE_RECOVERY))
+     {
+         trace_printf( "%s@%d zoo_create (%s : %s)\n"
+                     , method_name, __LINE__
+                     , zpath.c_str()
+                     , zdata.c_str());
+     }
+     rc = zoo_create( ZHandle
+                    , zpath.c_str( )
+                    , zdata.length() ? zdata.c_str()  : NULL
+                    , zdata.length() ? zdata.length() : -1
+                    , &ZOO_OPEN_ACL_UNSAFE
+                    , flags
+                    , realpath
+                    , sizeof(realpath)-1 );
+     if ( rc != ZOK )
+     {
+         char buf[MON_STRING_BUF_SIZE];
+         snprintf( buf, sizeof(buf)
+                 , "[%s], zoo_create(%s) failed with error %s\n"
+                 , method_name
+                 , zpath.c_str()
+                 , ZooErrorStr(rc) );
+         mon_log_write(MON_ZCLIENT_REGISTERZNODE_1, SQ_LOG_ERR, buf);
+     }
+     if (trace_settings & (TRACE_INIT | TRACE_RECOVERY))
+     {
+         trace_printf("%s@%d realpath=%s\n", method_name, __LINE__, realpath);
+     }
+ 
+     TRACE_EXIT;
+     return( rc );
+ }
+ 
+ void CZClient::SetState( ZClientState_t state, const char *znodePath ) 
+ {
+     CAutoLock lock(getLocker());
+     state_ = state; 
+     znodeQueue_.push_back( znodePath );
+ }
+ 
+ void CZClient::SetTimeToWakeUp( struct timespec &ts )
+ {
+     const char method_name[] = "CZClient::SetTimeToWakeUp";
+     TRACE_ENTRY;
+ 
+     clock_gettime(CLOCK_REALTIME, &ts);
+ #if 0
+     if (trace_settings & (TRACE_INIT | TRACE_RECOVERY))
+     {
+             trace_printf("%s@%d" " - Clock   time %ld(secs):%ld(nsecs)(zcMonitoringRate_=%ld)\n"
+                         , method_name, __LINE__
+                         , ts.tv_sec, ts.tv_nsec, zcMonitoringRate_);
+     }
+ #endif
+ 
+     ts.tv_sec += zcMonitoringRate_;
+ 
+ #if 0
+     if (trace_settings & (TRACE_INIT | TRACE_RECOVERY))
+     {
+             trace_printf("%s@%d" " - Timeout time %ld(secs):%ld(nsecs)(zcMonitoringRate_=%ld)\n"
+                         , method_name, __LINE__
+                         , ts.tv_sec, ts.tv_nsec, zcMonitoringRate_);
+     }
+ #endif
+     TRACE_EXIT;
+ }
+ 
+ int CZClient::SetZNodeWatch( string &monZnode )
+ {
+     const char method_name[] = "CZClient::SetZNodeWatch";
+     TRACE_ENTRY;
+ 
 -    char  zkData[MAX_PROCESSOR_NAME];
++    char  zkData[MPI_MAX_PROCESSOR_NAME];
+     int   rc = -1;
+     int   zkDataLen = sizeof(zkData);
+     Stat  stat;
+ 
+     if (trace_settings & (TRACE_INIT | TRACE_RECOVERY))
+     {
+         trace_printf( "%s@%d monZnode=%s\n"
+                     , method_name, __LINE__, monZnode.c_str() );
+     }
+     rc = zoo_exists( ZHandle, monZnode.c_str( ), 0, &stat );
+     if ( rc == ZNONODE )
+     {
+         // return the error
+         if (trace_settings & (TRACE_INIT | TRACE_RECOVERY))
+         {
+             trace_printf( "%s@%d monZnode=%s does not exist (ZNONODE)\n"
+                         , method_name, __LINE__, monZnode.c_str() );
+         }
+     }
+     else if ( rc == ZOK )
+     {
+         // Get the pnid from the data part of znode
+         rc = zoo_get( ZHandle, monZnode.c_str( ), true, zkData, &zkDataLen, &stat );
+         if ( rc != ZOK )
+         {
+             char buf[MON_STRING_BUF_SIZE];
+             snprintf( buf, sizeof(buf)
+                     , "[%s], zoo_get() for %s failed with error %s\n"
+                     ,  method_name, monZnode.c_str( ), ZooErrorStr(rc));
+             mon_log_write(MON_ZCLIENT_SETZNODEWATCH_1, SQ_LOG_ERR, buf);
+         }
+     }
+     else
+     {
+         char buf[MON_STRING_BUF_SIZE];
+         snprintf( buf, sizeof(buf)
+                 , "[%s], zoo_exists() for %s failed with error %s\n"
+                 ,  method_name, monZnode.c_str( ), ZooErrorStr(rc));
+         mon_log_write(MON_ZCLIENT_SETZNODEWATCH_1, SQ_LOG_ERR, buf);
+     }
+ 
+     TRACE_EXIT;
+     return( rc );
+ }
+ 
+ void CZClient::StartClusterMonitoring( void )
+ {
+     const char method_name[] = "CZClient::StartClusterMonitoring";
+     TRACE_ENTRY;
+ 
+     if ( !IsEnabled() )
+     {
+         if (trace_settings & (TRACE_INIT | TRACE_RECOVERY))
+         {
+             trace_printf( "%s@%d Cluster monitoring started!\n\n", method_name, __LINE__ );
+         }
+         SetEnabled( true );
+         SetState( ZC_WATCH );
+         CLock::wakeOne();
+     }
+ 
+     TRACE_EXIT;
+ }
+ 
+ void CZClient::StopClusterMonitoring( void )
+ {
+     const char method_name[] = "CZClient::StopClusterMonitoring";
+     TRACE_ENTRY;
+ 
+     if ( IsEnabled() )
+     {
+         if (trace_settings & (TRACE_INIT | TRACE_RECOVERY))
+         {
+             trace_printf( "\n%s@%d Cluster monitoring stopped!\n", method_name, __LINE__ );
+         }
+         SetCheckCluster( false );
+         SetEnabled( false );
+         SetState( ZC_DISABLED );
+         CLock::wakeOne();
+     }
+ 
+     TRACE_EXIT;
+ }
+ 
+ int CZClient::ShutdownWork(void)
+ {
+     const char method_name[] = "CZClient::ShutdownWork";
+     TRACE_ENTRY;
+ 
+     // Set flag that tells the commAcceptor thread to exit
+     SetState( ZC_SHUTDOWN );
+     CLock::wakeOne();
+ 
+     if (trace_settings & (TRACE_INIT | TRACE_RECOVERY))
+     {
+         trace_printf( "%s@%d waiting for ZClient thread %lx to exit.\n"
+                     ,  method_name, __LINE__, threadId_);
+     }
+ 
+     // Wait for commAcceptor thread to exit
+     int rc = pthread_join( threadId_, NULL );
+     if (rc != 0)
+     {
+         char buf[MON_STRING_BUF_SIZE];
+         int err = rc;
+         sprintf(buf, "[%s], Error= Can't join thread! - errno=%d (%s)\n", method_name, err, strerror(err));
+         mon_log_write(MON_ZCLIENT_SHUTDOWNWORK_1, SQ_LOG_ERR, buf);
+     }
+ 
+     TRACE_EXIT;
+     return(rc);
+ }
+ 
+ // ZClientThread main
+ static void *ZClientThread(void *arg)
+ {
+     const char method_name[] = "ZClientThread";
+     TRACE_ENTRY;
+ 
+     // Parameter passed to the thread is an instance of the CommAccept object
+     CZClient *zooClient = (CZClient *) arg;
+ 
+     // Mask all allowed signals 
+     sigset_t  mask;
+     sigfillset(&mask);
+     sigdelset(&mask, SIGPROF); // allows profiling such as google profiler
+     int rc = pthread_sigmask(SIG_SETMASK, &mask, NULL);
+     if (rc != 0)
+     {
+         char buf[MON_STRING_BUF_SIZE];
+         snprintf(buf, sizeof(buf), "[%s], pthread_sigmask error=%d\n",
+                  method_name, rc);
+         mon_log_write(MON_ZCLIENT_ZCLIENTTHREAD_1, SQ_LOG_ERR, buf);
+     }
+ 
+     // Enter thread processing loop
+     zooClient->MonitorZCluster();
+ 
+     TRACE_EXIT;
+     return NULL;
+ }
+ 
+ 
+ // Create the ZClientThread
+ int CZClient::StartWork()
+ {
+     const char method_name[] = "CZClient::StartWork";
+     TRACE_ENTRY;
+ 
+     int rc = pthread_create(&threadId_, NULL, ZClientThread, this);
+     if (rc != 0)
+     {
+         char buf[MON_STRING_BUF_SIZE];
+         snprintf(buf, sizeof(buf), "[%s], ZClientThread create error=%d\n",
+                  method_name, rc);
+         mon_log_write(MON_ZCLIENT_STARTWORK_1, SQ_LOG_ERR, buf);
+     }
+ 
+     TRACE_EXIT;
+     return(rc);
+ }
+ 
+ void CZClient::StartMonitoring( void )
+ {
+     const char method_name[] = "CZClient::StartMonitoring";
+     TRACE_ENTRY;
+     if (ZHandle)
+     {
+         ZClient->SetState( CZClient::ZC_START );
+         ZClient->CLock::wakeOne();
+     }
+     TRACE_EXIT;
+ }
+ 
+ void CZClient::StopMonitoring( void )
+ {
+     const char method_name[] = "CZClient::StopMonitoring";
+     TRACE_ENTRY;
+     ZClient->SetState( CZClient::ZC_STOP );
+     ZClient->CLock::wakeOne();
+     TRACE_EXIT;
+ }
+ 
+ void CZClient::TriggerCheck( int type, const char *znodePath )
+ {
+     const char method_name[] = "CZClient::TriggerCheck";
+     TRACE_ENTRY;
+     if (trace_settings & (TRACE_INIT | TRACE_RECOVERY))
+     {
+         trace_printf( "%s@%d" " - state = %s\n"
+                     , method_name, __LINE__
+                     , ZooConnectionTypeStr( type ) );
+     }
+ 
+     if ( type == ZOO_CREATED_EVENT )
+     {
+         SetState( ZC_ZNODE, znodePath );
+     }
+     else if ( type == ZOO_DELETED_EVENT )
+     {
+         SetState( ZC_ZNODE, znodePath );
+     }
+     else if ( type == ZOO_CHANGED_EVENT )
+     {
+         SetState( ZC_ZNODE, znodePath );
+     }
+     else if ( type == ZOO_CHILD_EVENT )
+     {
+         SetState( ZC_CLUSTER, znodePath );
+     }
+     else if ( type == ZOO_NOTWATCHING_EVENT )
+     {
+         SetState( ZC_CLUSTER );
+     }
+     CLock::wakeOne();
+     TRACE_EXIT;
+ }
+ 
+ void CZClient::WatchCluster( void )
+ {
+     const char method_name[] = "CZClient::WatchCluster";
+     TRACE_ENTRY;
+ 
+     int rc;
+     struct String_vector nodes;
+ 
+     if ( !IsCheckCluster() )
+     {
+         rc = GetClusterZNodes( &nodes );
+         if ( rc != ZOK )
+         {
+             char buf[MON_STRING_BUF_SIZE];
+             snprintf( buf, sizeof(buf)
+                     , "[%s], GetClusterZNodes() failed!\n"
+                     , method_name );
+             mon_log_write(MON_ZCLIENT_WATCHCLUSTER_1, SQ_LOG_ERR, buf);
+             SetState( CZClient::ZC_STOP );
+             CLock::wakeOne();
+             return;
+         }
+ 
+         stringstream newpath;
+         string monZnode;
+     
+         if ( nodes.count > 0 )
+         {
+             for (int i = 0; i < nodes.count ; i++ )
+             {
+                 newpath.str( "" );
+                 newpath << zkRootNode_.c_str() 
+                         << zkRootNodeInstance_.c_str() 
+                         << ZCLIENT_CLUSTER_ZNODE << "/"
+                         << nodes.data[i];
+                 string monZnode = newpath.str( );
+             
+                 rc = SetZNodeWatch( monZnode );
+                 if ( rc != ZOK )
+                 {
+                     char buf[MON_STRING_BUF_SIZE];
+                     snprintf( buf, sizeof(buf)
+                             , "[%s], GetZNodeData() failed!\n"
+                             , method_name );
+                     mon_log_write(MON_ZCLIENT_WATCHCLUSTER_2, SQ_LOG_ERR, buf);
+ 
+                     FreeStringVector( &nodes );
+                     TRACE_EXIT;
+                     return;
+                 }
+                 else
+                 {
+                     if (trace_settings & (TRACE_INIT | TRACE_RECOVERY))
+                     {
+                         trace_printf( "%s@%d Watch set on monZnode=%s\n"
+                                     , method_name, __LINE__
+                                     , monZnode.c_str() );
+                     }
+                 }
+             }
+             SetCheckCluster( true );
+             SetState( ZC_CLUSTER );
+             FreeStringVector( &nodes );
+         }
+     }
+     else
+     {
+         if (trace_settings & (TRACE_INIT | TRACE_RECOVERY))
+         {
+             trace_printf( "%s@%d CheckCluster is NOT set!\n"
+                         , method_name, __LINE__ );
+         }
+     }
+     
+     TRACE_EXIT;
+ }
+ 
+ int CZClient::WatchNode( const char *nodeName )
+ {
+     const char method_name[] = "CZClient::WatchNode";
+     TRACE_ENTRY;
+ 
+     int rc;
+     stringstream newpath;
+     newpath.str( "" );
+     newpath << zkRootNode_.c_str() 
+             << zkRootNodeInstance_.c_str() 
+             << ZCLIENT_CLUSTER_ZNODE << "/"
+             << nodeName;
+     string monZnode = newpath.str( );
+ 
+     lock();
+     rc = SetZNodeWatch( monZnode );
+     unlock();
+     if ( rc != ZOK )
+     {
+         char buf[MON_STRING_BUF_SIZE];
+         snprintf( buf, sizeof(buf)
+                 , "[%s], SetZNodeWatch() failed!\n"
+                 , method_name );
+         mon_log_write(MON_ZCLIENT_WATCHNODE_1, SQ_LOG_ERR, buf);
+     }
+     else
+     {
+         if (trace_settings & (TRACE_INIT | TRACE_RECOVERY))
+         {
+             trace_printf( "%s@%d Watch set on monZnode=%s\n"
+                         , method_name, __LINE__
+                         , monZnode.c_str() );
+         }
+     }
+ 
+     TRACE_EXIT;
+     return(rc);
+ }
+ 
+ int CZClient::WatchNodeDelete( const char *nodeName )
+ {
+     const char method_name[] = "CZClient::WatchNodeDelete";
+     TRACE_ENTRY;
+ 
+     int rc = -1;
+ 
+     stringstream newpath;
+     newpath.str( "" );
+     newpath << zkRootNode_.c_str() 
+             << zkRootNodeInstance_.c_str() 
+             << ZCLIENT_CLUSTER_ZNODE << "/"
+             << nodeName;
+     string monZnode = newpath.str( );
+ 
+     if (trace_settings & (TRACE_INIT | TRACE_RECOVERY))
+     {
+         trace_printf( "%s@%d zoo_delete(%s)\n"
+                     , method_name, __LINE__
+                     , monZnode.c_str() );
+     }
+     rc = zoo_delete( ZHandle
+                    , monZnode.c_str( )
+                    , -1 );
+     if ( rc == ZOK )
+     {
+         char buf[MON_STRING_BUF_SIZE];
+         snprintf( buf, sizeof(buf)
+                 , "[%s], znode (%s) deleted!\n"
+                 , method_name, nodeName );
+         mon_log_write(MON_ZCLIENT_WATCHNODEDELETE_1, SQ_LOG_INFO, buf);
+     }
+     else if ( rc == ZNONODE )
+     {
+         rc = ZOK;
+         char buf[MON_STRING_BUF_SIZE];
+         snprintf( buf, sizeof(buf)
+                 , "[%s], znode (%s) already deleted!\n"
+                 , method_name, nodeName );
+         mon_log_write(MON_ZCLIENT_WATCHNODEDELETE_2, SQ_LOG_INFO, buf);
+     }
+     else
+     {
+         char buf[MON_STRING_BUF_SIZE];
+         snprintf( buf, sizeof(buf)
+                 , "[%s], zoo_delete(%s) failed with error %s\n"
+                 , method_name, nodeName, ZooErrorStr(rc) );
+         mon_log_write(MON_ZCLIENT_WATCHNODEDELETE_3, SQ_LOG_INFO, buf);
+     }
+ 
+     TRACE_EXIT;
+     return( rc );
+ }
+ 

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/1e294233/core/sqf/monitor/linux/zootest.cxx
----------------------------------------------------------------------
diff --cc core/sqf/monitor/linux/zootest.cxx
index 0000000,8a90299..1536e98
mode 000000,100644..100644
--- a/core/sqf/monitor/linux/zootest.cxx
+++ b/core/sqf/monitor/linux/zootest.cxx
@@@ -1,0 -1,283 +1,283 @@@
+ /**********************************************************************
+ // @@@ START COPYRIGHT @@@
+ //
+ // Licensed to the Apache Software Foundation (ASF) under one
+ // or more contributor license agreements.  See the NOTICE file
+ // distributed with this work for additional information
+ // regarding copyright ownership.  The ASF licenses this file
+ // to you under the Apache License, Version 2.0 (the
+ // "License"); you may not use this file except in compliance
+ // with the License.  You may obtain a copy of the License at
+ //
+ //   http://www.apache.org/licenses/LICENSE-2.0
+ //
+ // Unless required by applicable law or agreed to in writing,
+ // software distributed under the License is distributed on an
+ // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ // KIND, either express or implied.  See the License for the
+ // specific language governing permissions and limitations
+ // under the License.
+ //
+ // @@@ END COPYRIGHT @@@
+ ********************************************************************/
+ #include <errno.h>
+ #include <sys/socket.h>
+ #include <signal.h>
+ #include <ctype.h>
+ #include <string.h>
+ #include <ifaddrs.h>
+ #include <netdb.h>
+ #include <new>
+ #include <stdio.h>
+ #include <list>
+ #include <string>
+ 
+ #include "msgdef.h"
+ #include "montrace.h"
+ #include "monlogging.h"
+ #include "zookeeper/zookeeper.h"
+ #include "zclient.h"
+ #include "zootest.h"
+ 
+ using namespace std;
+ 
+ bool debugFlag = true;
+ 
+ bool ZClientEnabled = true;
+ char Node_name[MPI_MAX_PROCESSOR_NAME] = {'\0'};
+ char MyPNidStr[8];
+ int MyPNID = -1;
+ int MyNid = -1;
+ int MyPid = -1;
+ 
+ CZClient    *ZClient = NULL;
+ CMonLog     *MonLog =  NULL;
+ CMonLog     *SnmpLog =  NULL;
+ 
+ void HandleZSessionExpiration( void )
+ {
+     const char method_name[] = "HandleZSessionExpiration";
+     TRACE_ENTRY;
+     printf( "%s@%d ZSession expired!\n", method_name, __LINE__ );
+     ZClient->StopMonitoring();
+     ZClient->ShutdownWork();
+     printf( "%s@%d zootest exiting!\n", method_name, __LINE__ );
+     TRACE_EXIT;
+     exit( 1  );
+ }
+ 
+ void HandleNodeExpiration( const char *nodeName )
+ {
+     const char method_name[] = "HandleNodeExpiration";
+     TRACE_ENTRY;
+     printf( "%s@%d Node %s znode deleted!\n"
+           , method_name, __LINE__, nodeName );
+     TRACE_EXIT;
+ }
+ 
+ void CreateZookeeperClient( void )
+ {
+     const char method_name[] = "CreateZookeeperClient";
+     TRACE_ENTRY;
+ 
+     if ( ZClientEnabled )
+     {
+         string       hostName;
+         string       zkQuorumHosts;
+         stringstream zkQuorumPort;
+         char *env;
 -        char  hostsStr[MAX_PROCESSOR_NAME*3] = { 0 };
++        char  hostsStr[MPI_MAX_PROCESSOR_NAME*3] = { 0 };
+         char *tkn = NULL;
+ 
+         int zport;
+         env = getenv("ZOOKEEPER_PORT");
+         if ( env && isdigit(*env) )
+         {
+             zport = atoi(env);
+         }
+         else
+         {
+             char buf[MON_STRING_BUF_SIZE];
+             snprintf(buf, sizeof(buf),
+                      "[%s], Zookeeper quorum port is not defined!\n"
+                     , method_name);
+             mon_log_write(MON_ZOOCLIENT_MAIN_3, SQ_LOG_CRIT, buf);
+ 
+             ZClientEnabled = false;
+             TRACE_EXIT;
+             return;
+         }
+         
+         env = getenv("ZOOKEEPER_NODES");
+         if ( env )
+         {
+             zkQuorumHosts = env;
+             if ( zkQuorumHosts.length() == 0 )
+             {
+                 char buf[MON_STRING_BUF_SIZE];
+                 snprintf(buf, sizeof(buf),
+                          "[%s], Zookeeper quorum hosts are not defined!\n"
+                         , method_name);
+                 mon_log_write(MON_ZOOCLIENT_MAIN_4, SQ_LOG_CRIT, buf);
+ 
+                 ZClientEnabled = false;
+                 TRACE_EXIT;
+                 return;
+             }
+             
+             strcpy( hostsStr, zkQuorumHosts.c_str() );
+             zkQuorumPort.str( "" );
+             
+             tkn = strtok( hostsStr, "," );
+             do
+             {
+                 if ( tkn != NULL )
+                 {
+                     hostName = tkn;
+                     zkQuorumPort << hostName.c_str()
+                                  << ":" 
+                                  << zport;
+                 }
+                 tkn = strtok( NULL, "," );
+                 if ( tkn != NULL )
+                 {
+                     zkQuorumPort << ",";
+                 }
+                 
+             }
+             while( tkn != NULL );
+             if (trace_settings & (TRACE_INIT | TRACE_RECOVERY))
+             {
+                 trace_printf( "%s@%d zkQuorumPort=%s\n"
+                             , method_name, __LINE__
+                             , zkQuorumPort.str().c_str() );
+             }
+         }
+     
+         ZClient = new CZClient( zkQuorumPort.str().c_str()
+                               , ZCLIENT_TRAFODION_ZNODE
+                               , ZCLIENT_INSTANCE_ZNODE );
+         if ( ZClient == NULL )
+         {
+             char buf[MON_STRING_BUF_SIZE];
+             snprintf(buf, sizeof(buf),
+                      "[%s], Failed to allocate ZClient object!\n"
+                     , method_name);
+             mon_log_write(MON_ZOOCLIENT_MAIN_5, SQ_LOG_CRIT, buf);
+             abort();
+         }
+     }
+ 
+     TRACE_EXIT;
+ }
+ 
+ /*
+  *
+  * The znode hierarchy is as follows:
+  *    /trafodion/<instance-name>/cluster
+  *    /trafodion/<instance-name>/cluster/<node-name-1>
+  *    /trafodion/<instance-name>/cluster/<node-name-2>
+  * Each monitor will create an ephemeral node using its node name (hostname)
+  * followed by its <pnid>.
+  * The monitor processes will watch the cluster parent znode changes.
+  * When a change in the cluster znode occurs they will check the state of
+  * each child. A missing child znode will is assumed to be a down node.
+  *
+  */
+ int main( int argc, char *argv[], char *envp[] )
+ {
+     const char method_name[] = "main";
+     TRACE_ENTRY;
+ 
+     char *env;
+     char  MyName[MPI_MAX_PROCESSOR_NAME];
+ 
+     trace_settings |= TRACE_INIT;
+ 
+     int   count = 1;
+     while ( count < argc )
+     {
+         if ( strcmp( argv[count], "-pnid" ) == 0 )
+         {
+             if ( ++count < argc )
+             {
+                 MyPNID=atoi( argv[count] );
+             }
+         }
+         count++;
+     }
+ 
+ 
+     sigset_t newset, oldset;
+     sigemptyset( &newset );
+     sigaddset( &newset,SIGQUIT );
+     sigaddset( &newset,SIGTERM );
+     sigprocmask( SIG_BLOCK,&newset,&oldset );
+ 
+     gethostname(Node_name, MPI_MAX_PROCESSOR_NAME);
+ 
+     sprintf( MyName,"zooclient" );
+     MyPid = getpid();
+ 
+     MonLog = new CMonLog( "log4cxx.monitor.wdg.config", "ZOO", "alt.wdg", MyPNID, MyNid, MyPid, MyName  );
+ 
+     int rc;
+     env = getenv("SQ_MON_ZCLIENT_ENABLED");
+     if ( env )
+     {
+         if ( env && isdigit(*env) )
+         {
+             if ( strcmp(env,"0")==0 )
+             {
+                 ZClientEnabled = false;
+             }
+         }
+     }
+ 
+     if ( ZClientEnabled )
+     {
+         CreateZookeeperClient();
+ 
+         sleep( 3 );  // Wait for the other zclients to register
+ 
+         rc = ZClient->StartWork();
+         if (rc != 0)
+         {
+             TRACE_EXIT;
+             exit( 1 );
+         }
+         
+         ZClient->StartMonitoring();
+     
+         unsigned int sleepTime = 10; // 10 seconds
+         env = getenv("MON_INIT_SLEEP");
+         if ( env && isdigit(*env) )
+         {
+             sleepTime = atoi(env);
+         }
+         sleep( sleepTime );  // Til' quitting time!
+     
+         ZClient->StopMonitoring();
+         
+         sleep( 1 );
+     
+         // Stop the Process Monitor thread
+         rc = ZClient->ShutdownWork();
+         if (rc != 0)
+         {
+             TRACE_EXIT;
+             exit( 1 );
+         }
+     }
+     else
+     {
+         printf( "%s@%d ZClient is disabled, exiting!\n"
+               , method_name, __LINE__ );
+     }
+ 
+     printf( "%s@%d zootest exiting!\n"
+           , method_name, __LINE__ );
+ 
+     TRACE_EXIT;
+     exit( 0 );
+ }

http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/1e294233/core/sqf/sql/scripts/sqcheck
----------------------------------------------------------------------
diff --cc core/sqf/sql/scripts/sqcheck
index f5806c9,b5d79ae..2c10044
--- a/core/sqf/sql/scripts/sqcheck
+++ b/core/sqf/sql/scripts/sqcheck
@@@ -226,7 -251,14 +247,11 @@@ if [[ -z $SQSCRIPTS_DIR ]]; the
      SQSCRIPTS_DIR=$MY_SQROOT/sql/scripts
  fi
  
 -SQSCRIPT_FILE="$SQSCRIPTS_DIR/gomon.cold"
 -STARTSSMP_FILE="$SQSCRIPTS_DIR/ssmpstart"
 -STARTSSCP_FILE="$SQSCRIPTS_DIR/sscpstart"
  
+ if [ $check_node '>' -1 ]; then
+    getNodeStatus
+ fi
+ 
  ### CONFIGURED NODEs
  sq_tmp_node_info=`mktemp -t`
  sqshell -c node info > $sq_tmp_node_info