You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@knox.apache.org by sm...@apache.org on 2020/01/07 19:57:26 UTC

[knox] branch master updated: KNOX-2157 - Verifying the server's state in addition to PID check at gateway start and registering shutdown hook in order to stop the server gracefully. (#230)

This is an automated email from the ASF dual-hosted git repository.

smolnar pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/knox.git


The following commit(s) were added to refs/heads/master by this push:
     new cdcb186  KNOX-2157 - Verifying the server's state in addition to PID check at gateway start and registering shutdown hook in order to stop the server gracefully. (#230)
cdcb186 is described below

commit cdcb1860b1e7b3dde83e469fe400a5a3aecd16df
Author: Sandor Molnar <sm...@apache.org>
AuthorDate: Tue Jan 7 20:57:18 2020 +0100

    KNOX-2157 - Verifying the server's state in addition to PID check at gateway start and registering shutdown hook in order to stop the server gracefully. (#230)
---
 gateway-release-common/home/bin/knox-functions.sh  | 57 +++++++++++++-
 gateway-release/home/bin/gateway.sh                | 29 +++++++-
 .../org/apache/knox/gateway/GatewayMessages.java   |  6 ++
 .../org/apache/knox/gateway/GatewayServer.java     | 35 +++++++--
 .../gateway/GatewayServerLifecycleListener.java    | 86 ++++++++++++++++++++++
 .../gateway/SimpleDescriptorHandlerFuncTest.java   |  5 ++
 6 files changed, 209 insertions(+), 9 deletions(-)

diff --git a/gateway-release-common/home/bin/knox-functions.sh b/gateway-release-common/home/bin/knox-functions.sh
index b48e967..aee3706 100644
--- a/gateway-release-common/home/bin/knox-functions.sh
+++ b/gateway-release-common/home/bin/knox-functions.sh
@@ -41,6 +41,9 @@ APP_JAVA_LIB_PATH=${KNOX_GATEWAY_JAVA_LIB_PATH:-$DEFAULT_JAVA_LIB_PATH}
 # JAVA options used by the JVM
 declare -a APP_JAVA_OPTS
 
+#status-based test related variables
+DEFAULT_APP_STATUS_TEST_RETRY_ATTEMPTS=5
+DEFAULT_APP_STATUS_TEST_RETRY_SLEEP=2s
 
 ############################
 ##### common functions #####
@@ -163,7 +166,7 @@ function buildAppJavaOpts {
     # echo "APP_JAVA_OPTS =" "${APP_JAVA_OPTS[@]}"
 }
 
-function appIsRunning {
+function appIsRunningByPID {
    if [ "$1" -eq 0 ]; then return 0; fi
 
    ps -p "$1" > /dev/null
@@ -175,6 +178,52 @@ function appIsRunning {
    fi
 }
 
+function appIsRunningByStatus {
+   retryAttempts=${APP_STATUS_TEST_RETRY_ATTEMPTS:-$DEFAULT_APP_STATUS_TEST_RETRY_ATTEMPTS}
+   retrySleep=${APP_STATUS_TEST_RETRY_SLEEP:-$DEFAULT_APP_STATUS_TEST_RETRY_SLEEP}
+
+   #echo "Retry attempts = $retryAttempts"
+   #echo "Retry sleep = $retrySleep"
+
+   statusCheck=0
+   for ((i=1; i<=retryAttempts; i++))
+   do
+     #echo "$i. try"
+
+     if grep -Fxqs "STARTED" "$APP_DATA_DIR"/gatewayServer.status; then
+       statusCheck=1
+       break
+     fi
+
+     sleep "$retrySleep"
+   done
+
+   return $statusCheck
+}
+
+#returns 0 if not running and 1 if running
+function appIsRunning {
+   appIsRunningByPID "$1"
+   if [ $? -eq 1 ]; then
+     #echo "PID check succeeded"
+     if [[ "$TEST_APP_STATUS" = "true" ]]; then
+       #echo "Checking status..."
+       appIsRunningByStatus
+       if [ $? -eq 1 ]; then
+         #echo "Status check passed"
+         return 1;
+       else
+         #echo "Status check NOT passsed"
+         return 0;
+       fi
+     else
+       return 1;
+     fi
+   fi;
+
+   return 0
+}
+
 # Returns 0 if the app is running and sets the $PID variable
 # TODO: this may be a false indication: it may happen the process started but it'll return with a <>0 exit code due to validation errors; this should be fixed ASAP
 function getPID {
@@ -221,7 +270,10 @@ function appStart {
 
       getPID
       for ((i=0; i<APP_START_WAIT_TIME*10; i++)); do
-         if appIsRunning "$APP_PID"; then break; fi
+         appIsRunning "$APP_PID"
+         if [ $? -eq 1 ]; then
+            break
+         fi
          sleep 0.1
       done
       appIsRunning "$APP_PID"
@@ -288,6 +340,7 @@ function appClean {
 function appKill {
    local localPID=$1
    kill "$localPID" || return 1
+
    for ((i=0; i<APP_KILL_WAIT_TIME*10; i++)); do
       if appIsRunning "$localPID"; then return 0; fi
       sleep 0.1
diff --git a/gateway-release/home/bin/gateway.sh b/gateway-release/home/bin/gateway.sh
index 9065cd9..9ed0fd4 100755
--- a/gateway-release/home/bin/gateway.sh
+++ b/gateway-release/home/bin/gateway.sh
@@ -76,10 +76,37 @@ function main {
          setupEnv
          ;;
       start)
-         if [ "$2" = "--printEnv" ]; then
+         printEnv=0
+         while [[ $# -gt 0 ]]
+         do
+           key="$1"
+
+           case $key in
+             --printEnv)
+               printEnv=1
+               shift # past argument
+               ;;
+             --test-gateway-retry-attempts)
+               export APP_STATUS_TEST_RETRY_ATTEMPTS="$2"
+               shift # past argument
+               shift # past value
+               ;;
+             --test-gateway-retry-sleep)
+               export APP_STATUS_TEST_RETRY_SLEEP="$2"
+               shift # past argument
+               shift # past value
+               ;;
+             *)    # unknown option
+               shift # past argument
+               ;;
+           esac
+         done
+
+         if [ $printEnv -eq 1 ]; then
            printEnv
          fi
          checkEnv
+         export TEST_APP_STATUS=true
          appStart
          ;;
       stop)   
diff --git a/gateway-server/src/main/java/org/apache/knox/gateway/GatewayMessages.java b/gateway-server/src/main/java/org/apache/knox/gateway/GatewayMessages.java
index 0a3781f..0f75ccd 100644
--- a/gateway-server/src/main/java/org/apache/knox/gateway/GatewayMessages.java
+++ b/gateway-server/src/main/java/org/apache/knox/gateway/GatewayMessages.java
@@ -51,6 +51,9 @@ public interface GatewayMessages {
   @Message( level = MessageLevel.INFO, text = "Stopped gateway." )
   void stoppedGateway();
 
+  @Message( level = MessageLevel.INFO, text = "Failed to stopped gateway." )
+  void failedToStopGateway(@StackTrace( level = MessageLevel.INFO ) Exception e);
+
   @Message( level = MessageLevel.INFO, text = "Loading configuration resource {0}" )
   void loadingConfigurationResource( String res );
 
@@ -679,4 +682,7 @@ public interface GatewayMessages {
 
   @Message(level = MessageLevel.INFO, text = "Deleted service definition {0} / {1} / {2}")
   void deletedServiceDefinitionChange(String serviceName, String role, String version);
+
+  @Message(level = MessageLevel.ERROR, text = "Failed to save gateway status")
+  void failedToSaveGatewayStatus();
 }
diff --git a/gateway-server/src/main/java/org/apache/knox/gateway/GatewayServer.java b/gateway-server/src/main/java/org/apache/knox/gateway/GatewayServer.java
index e3ef3b6..7628cf7 100644
--- a/gateway-server/src/main/java/org/apache/knox/gateway/GatewayServer.java
+++ b/gateway-server/src/main/java/org/apache/knox/gateway/GatewayServer.java
@@ -118,6 +118,7 @@ import java.util.Properties;
 import java.util.ServiceLoader;
 import java.util.Set;
 import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.regex.Pattern;
 
 public class GatewayServer {
@@ -141,6 +142,7 @@ public class GatewayServer {
   private TopologyService monitor;
   private TopologyListener listener;
   private Map<String, WebAppContext> deployments;
+  private AtomicBoolean stopped = new AtomicBoolean(false);
 
   public static void main( String[] args ) {
     try {
@@ -598,6 +600,7 @@ public class GatewayServer {
 
   }
 
+  @SuppressWarnings("PMD.DoNotUseThreads") //we need to defined a Thread in the server's shutdown hook
   private synchronized void start() throws Exception {
     // Create the global context handler.
     contexts = new ContextHandlerCollection();
@@ -678,6 +681,7 @@ public class GatewayServer {
     }
 
     jetty.setHandler(handlers);
+    jetty.addLifeCycleListener(new GatewayServerLifecycleListener(config));
 
     try {
       jetty.start();
@@ -692,15 +696,34 @@ public class GatewayServer {
     // Start the topology monitor.
     log.monitoringTopologyChangesInDirectory(topologiesDir.getAbsolutePath());
     monitor.startMonitor();
+
+    Runtime.getRuntime().addShutdownHook(new Thread() {
+
+      @Override
+      public void run() {
+        try {
+          server.stop();
+        } catch (Exception e) {
+          //NOP: error is already logged in the stop() method
+        }
+      }
+    });
   }
 
   public synchronized void stop() throws Exception {
-    log.stoppingGateway();
-    services.stop();
-    monitor.stopMonitor();
-    jetty.stop();
-    jetty.join();
-    log.stoppedGateway();
+    if (!stopped.get()) {
+      try {
+        log.stoppingGateway();
+        services.stop();
+        monitor.stopMonitor();
+        jetty.stop();
+        jetty.join();
+        log.stoppedGateway();
+        stopped.set(true);
+      } catch (Exception e) {
+        log.failedToStopGateway(e);
+      }
+    }
   }
 
   /**
diff --git a/gateway-server/src/main/java/org/apache/knox/gateway/GatewayServerLifecycleListener.java b/gateway-server/src/main/java/org/apache/knox/gateway/GatewayServerLifecycleListener.java
new file mode 100644
index 0000000..51c834f
--- /dev/null
+++ b/gateway-server/src/main/java/org/apache/knox/gateway/GatewayServerLifecycleListener.java
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.knox.gateway;
+
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.text.DateFormat;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.util.Locale;
+
+import org.apache.knox.gateway.config.GatewayConfig;
+import org.apache.knox.gateway.i18n.messages.MessagesFactory;
+import org.eclipse.jetty.util.component.LifeCycle;
+
+public class GatewayServerLifecycleListener implements LifeCycle.Listener {
+
+  private static final GatewayMessages log = MessagesFactory.get(GatewayMessages.class);
+
+  private static final ThreadLocal<DateFormat> DATE_FORMAT = ThreadLocal.withInitial(() -> new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSZ", Locale.getDefault()));
+
+  private enum Status {
+    STARTING, STARTED, FAILURE, STOPPING, STOPPED
+  };
+
+  private final Path lifeCycleFilePath;
+
+  GatewayServerLifecycleListener(GatewayConfig gatewayConfig) throws IOException {
+    this.lifeCycleFilePath = Paths.get(gatewayConfig.getGatewayDataDir(), "gatewayServer.status");
+    Files.deleteIfExists(lifeCycleFilePath);
+    Files.createFile(lifeCycleFilePath);
+  }
+
+  @Override
+  public void lifeCycleStarting(LifeCycle event) {
+    saveStatus(Status.STARTING);
+  }
+
+  @Override
+  public void lifeCycleStarted(LifeCycle event) {
+    saveStatus(Status.STARTED);
+  }
+
+  @Override
+  public void lifeCycleFailure(LifeCycle event, Throwable cause) {
+    saveStatus(Status.FAILURE);
+  }
+
+  @Override
+  public void lifeCycleStopping(LifeCycle event) {
+    saveStatus(Status.STOPPING);
+  }
+
+  @Override
+  public void lifeCycleStopped(LifeCycle event) {
+    saveStatus(Status.STOPPED);
+  }
+
+  private void saveStatus(Status status) {
+    try {
+      // saving the current timestamp in the status file is very useful at debug time
+      final String message = DATE_FORMAT.get().format(new Date()) + System.getProperty("line.separator") + status.name() + System.getProperty("line.separator");
+      Files.write(lifeCycleFilePath, message.getBytes(StandardCharsets.UTF_8));
+    } catch (IOException e) {
+      log.failedToSaveGatewayStatus();
+    }
+  }
+}
diff --git a/gateway-test/src/test/java/org/apache/knox/gateway/SimpleDescriptorHandlerFuncTest.java b/gateway-test/src/test/java/org/apache/knox/gateway/SimpleDescriptorHandlerFuncTest.java
index ccdb164..87eea0b 100644
--- a/gateway-test/src/test/java/org/apache/knox/gateway/SimpleDescriptorHandlerFuncTest.java
+++ b/gateway-test/src/test/java/org/apache/knox/gateway/SimpleDescriptorHandlerFuncTest.java
@@ -38,6 +38,8 @@ import org.junit.Test;
 import java.io.File;
 import java.net.InetSocketAddress;
 import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Paths;
 import java.security.KeyStore;
 import java.util.ArrayList;
 import java.util.Collections;
@@ -130,6 +132,8 @@ public class SimpleDescriptorHandlerFuncTest {
     File testProvDir = new File(testConfDir, "shared-providers");
     File testTopoDir = new File(testConfDir, "topologies");
     File testDeployDir = new File(testConfDir, "deployments");
+    File testDataDir = new File(testRootDir, "data");
+    Files.createDirectories(Paths.get(testDataDir.getAbsolutePath()));
 
     // Write the externalized provider config to a temp file
     File providerConfig = new File(testProvDir, "ambari-cluster-policy.xml");
@@ -162,6 +166,7 @@ public class SimpleDescriptorHandlerFuncTest {
       // Try setting up enough of the GatewayServer to support the test...
       GatewayConfig config = EasyMock.createNiceMock(GatewayConfig.class);
       InetSocketAddress gatewayAddress = new InetSocketAddress(0);
+      EasyMock.expect(config.getGatewayDataDir()).andReturn(testDataDir.getAbsolutePath()).anyTimes();
       EasyMock.expect(config.getGatewayTopologyDir()).andReturn(testTopoDir.getAbsolutePath()).anyTimes();
       EasyMock.expect(config.getGatewayDeploymentDir()).andReturn(testDeployDir.getAbsolutePath()).anyTimes();
       EasyMock.expect(config.getGatewayAddress()).andReturn(gatewayAddress).anyTimes();