You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@trafodion.apache.org by li...@apache.org on 2018/08/05 12:39:09 UTC

[1/2] trafodion git commit: [TRAFODION-3164] Restart mxosrvrs on-demand

Repository: trafodion
Updated Branches:
  refs/heads/master 4ec0da84b -> 9c59d7803


[TRAFODION-3164] Restart mxosrvrs on-demand

This code adds a zookeeper value to signal to mxosrvr processes
when it is time to exit. This allows to phase out processes when
things in the environment change. The process will wait until it
is idle before exiting.


Project: http://git-wip-us.apache.org/repos/asf/trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/trafodion/commit/99da63e9
Tree: http://git-wip-us.apache.org/repos/asf/trafodion/tree/99da63e9
Diff: http://git-wip-us.apache.org/repos/asf/trafodion/diff/99da63e9

Branch: refs/heads/master
Commit: 99da63e9cf156c7848a8613cb37fe768e368331f
Parents: 7184f8f
Author: Hans Zeller <hz...@apache.org>
Authored: Tue Jul 31 20:42:00 2018 +0000
Committer: Hans Zeller <hz...@apache.org>
Committed: Tue Jul 31 20:42:00 2018 +0000

----------------------------------------------------------------------
 core/conn/odbc/src/odbc/nsksrvr/SrvrMain.cpp    | 40 +++++++++++++++++++-
 .../org/trafodion/dcs/master/DcsMaster.java     |  6 ++-
 2 files changed, 44 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/trafodion/blob/99da63e9/core/conn/odbc/src/odbc/nsksrvr/SrvrMain.cpp
----------------------------------------------------------------------
diff --git a/core/conn/odbc/src/odbc/nsksrvr/SrvrMain.cpp b/core/conn/odbc/src/odbc/nsksrvr/SrvrMain.cpp
index 72350ec..c23406f 100644
--- a/core/conn/odbc/src/odbc/nsksrvr/SrvrMain.cpp
+++ b/core/conn/odbc/src/odbc/nsksrvr/SrvrMain.cpp
@@ -100,9 +100,11 @@ bool keepaliveStatus = false;
 int keepaliveIdletime;
 int keepaliveIntervaltime;
 int keepaliveRetrycount;
+long epoch = -1;
 void watcher(zhandle_t *zzh, int type, int state, const char *path, void *watcherCtx);
 bool verifyPortAvailable(const char * idForPort, int portNumber);
 BOOL getInitParamSrvr(int argc, char *argv[], SRVR_INIT_PARAM_Def &initParam, char* strName, char* strValue);
+long  getEpoch(zhandle_t *zh);
 
 //only support positive number
 BOOL getNumberTemp( char* strValue, int& nValue )
@@ -651,6 +653,10 @@ catch(SB_Fatal_Excep sbfe)
 		exit(1);
 	}
 
+        // get the current epoch from zookeeper and also put a watch on it
+        // (to be even safer, take epoch as a command line arg)
+        epoch = getEpoch(zh);
+
 //LCOV_EXCL_START
 // when a server dies, the MXOAS sends message to CFG. CFG creates the MXOSRVR process
 // and passess only one command line atribute: -SQL CLEANUP OBSOLETE VOLATILE TABLES
@@ -988,6 +994,18 @@ void watcher(zhandle_t *zzh, int type, int state, const char *path, void *watche
             zh=0;
         }
     }
+
+    if (type == ZOO_CHANGED_EVENT) {
+      string masterNode(zkRootNode);
+
+      masterNode.append("/dcs/master");
+
+      if (masterNode.compare(path) == 0) {
+        if (getEpoch(zzh) != epoch) {
+          shutdownThisThing=1;
+        }
+      }
+    }
 }
 
 bool verifyPortAvailable(const char * idForPort,
@@ -1536,4 +1554,24 @@ BOOL getInitParamSrvr(int argc, char *argv[], SRVR_INIT_PARAM_Def &initParam, ch
 
 }
 
-
+// The "epoch" is a time period between configuration changes in the
+// system. When such a configuration change happens (e.g. the
+// executable of the mxosrvr is replaced, or a system default is being
+// changed), we want to stop all existing mxosrvrs once they become
+// idle and replace them with new ones. Therefore, keep a watch on
+// this value and exit when it changes and when our state is or
+// becomes idle.
+long  getEpoch(zhandle_t *zh) {
+  char path[2000];
+  char zkData[1000];
+  int zkDataLen = sizeof(zkData);
+  int result = -1;
+
+  snprintf(path, sizeof(path), "%s/dcs/master", zkRootNode);
+  int rc = zoo_get(zh, path, 1, zkData, &zkDataLen, NULL);
+
+  if (rc == ZOK && zkDataLen > 0)
+    result = atol(zkData);
+
+  return result;
+}

http://git-wip-us.apache.org/repos/asf/trafodion/blob/99da63e9/dcs/src/main/java/org/trafodion/dcs/master/DcsMaster.java
----------------------------------------------------------------------
diff --git a/dcs/src/main/java/org/trafodion/dcs/master/DcsMaster.java b/dcs/src/main/java/org/trafodion/dcs/master/DcsMaster.java
index 719c3d3..4852fab 100644
--- a/dcs/src/main/java/org/trafodion/dcs/master/DcsMaster.java
+++ b/dcs/src/main/java/org/trafodion/dcs/master/DcsMaster.java
@@ -54,6 +54,7 @@ import org.apache.zookeeper.ZooKeeper.States;
 import org.apache.hadoop.util.StringUtils;
 
 import org.trafodion.dcs.Constants;
+import org.trafodion.dcs.util.Bytes;
 import org.trafodion.dcs.util.DcsConfiguration;
 import org.trafodion.dcs.util.DcsNetworkConfiguration;
 import org.trafodion.dcs.util.InfoServer;
@@ -85,6 +86,7 @@ public class DcsMaster implements Runnable {
     private JVMShutdownHook jvmShutdownHook;
     private static String trafodionHome;
     private CountDownLatch isLeader = new CountDownLatch(1);
+    private int epoch = 1;
 
     private MasterLeaderElection mle = null;
 
@@ -162,9 +164,11 @@ public class DcsMaster implements Runnable {
             stat = zkc.exists(parentZnode
                     + Constants.DEFAULT_ZOOKEEPER_ZNODE_MASTER, false);
             if (stat == null) {
+                byte[] data = Bytes.toBytes(Long.toString(epoch));
+
                 zkc.create(parentZnode
                         + Constants.DEFAULT_ZOOKEEPER_ZNODE_MASTER,
-                        new byte[0], ZooDefs.Ids.OPEN_ACL_UNSAFE,
+                        data, ZooDefs.Ids.OPEN_ACL_UNSAFE,
                         CreateMode.PERSISTENT);
             }
             stat = zkc.exists(parentZnode


[2/2] trafodion git commit: merge [TRAFODION-3164]

Posted by li...@apache.org.
merge [TRAFODION-3164] 


Project: http://git-wip-us.apache.org/repos/asf/trafodion/repo
Commit: http://git-wip-us.apache.org/repos/asf/trafodion/commit/9c59d780
Tree: http://git-wip-us.apache.org/repos/asf/trafodion/tree/9c59d780
Diff: http://git-wip-us.apache.org/repos/asf/trafodion/diff/9c59d780

Branch: refs/heads/master
Commit: 9c59d78036f7135dbafe4c27d5ce6902ba5f5486
Parents: 4ec0da8 99da63e
Author: Liu Ming <ov...@sina.com>
Authored: Sun Aug 5 07:25:17 2018 +0000
Committer: Liu Ming <ov...@sina.com>
Committed: Sun Aug 5 07:25:17 2018 +0000

----------------------------------------------------------------------
 core/conn/odbc/src/odbc/nsksrvr/SrvrMain.cpp    | 40 +++++++++++++++++++-
 .../org/trafodion/dcs/master/DcsMaster.java     |  6 ++-
 2 files changed, 44 insertions(+), 2 deletions(-)
----------------------------------------------------------------------