You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@qpid.apache.org by kw...@apache.org on 2014/09/24 15:02:58 UTC

svn commit: r1627305 - /qpid/trunk/qpid/java/bdbstore/src/main/java/org/apache/qpid/server/store/berkeleydb/replication/ReplicatedEnvironmentFacade.java

Author: kwall
Date: Wed Sep 24 13:02:57 2014
New Revision: 1627305

URL: http://svn.apache.org/r1627305
Log:
QPID-6111: [Java Broker] Ensure that when the REF is shutdown sufficient time is allowed - bug fix used Math.min rather than Math.max.

Also:
* reduced the default envSetupTimeout from 15mins to 3mins.
* log a warning if environment re-creation takes longer than 25% of the allowed timeout
* whilst restarting, recheck the REF state to avoid needlessly recreating the environment if restart is no longer required (most likely owning to close)

Modified:
    qpid/trunk/qpid/java/bdbstore/src/main/java/org/apache/qpid/server/store/berkeleydb/replication/ReplicatedEnvironmentFacade.java

Modified: qpid/trunk/qpid/java/bdbstore/src/main/java/org/apache/qpid/server/store/berkeleydb/replication/ReplicatedEnvironmentFacade.java
URL: http://svn.apache.org/viewvc/qpid/trunk/qpid/java/bdbstore/src/main/java/org/apache/qpid/server/store/berkeleydb/replication/ReplicatedEnvironmentFacade.java?rev=1627305&r1=1627304&r2=1627305&view=diff
==============================================================================
--- qpid/trunk/qpid/java/bdbstore/src/main/java/org/apache/qpid/server/store/berkeleydb/replication/ReplicatedEnvironmentFacade.java (original)
+++ qpid/trunk/qpid/java/bdbstore/src/main/java/org/apache/qpid/server/store/berkeleydb/replication/ReplicatedEnvironmentFacade.java Wed Sep 24 13:02:57 2014
@@ -157,7 +157,7 @@ public class ReplicatedEnvironmentFacade
         /**
          * Parameter decreased as the 10 h default may cause user confusion.
          */
-        put(ReplicationConfig.ENV_SETUP_TIMEOUT, "15 min");
+        put(ReplicationConfig.ENV_SETUP_TIMEOUT, "180 s");
         /**
          * Parameter changed from default (off) to allow the Environment to start in the
          * UNKNOWN state when the majority is not available.
@@ -306,7 +306,7 @@ public class ReplicatedEnvironmentFacade
                     LOGGER.debug("Closing replicated environment facade for " + _prettyGroupNodeName + " current state is " + _state.get());
                 }
 
-                long timeout = Math.min(_executorShutdownTimeout, _envSetupTimeoutMillis);
+                long timeout = Math.max(_executorShutdownTimeout, _envSetupTimeoutMillis);
                 shutdownAndAwaitExecutorService(_environmentJobExecutor,
                                                 timeout,
                                                 TimeUnit.MILLISECONDS);
@@ -407,23 +407,32 @@ public class ReplicatedEnvironmentFacade
                 @Override
                 public void run()
                 {
-                    for (int i = 0; i < _environmentRestartRetryLimit; i++)
+                    int attemptNumber = 1;
+                    boolean restarted = false;
+                    while(_state.get() == State.RESTARTING && attemptNumber <= _environmentRestartRetryLimit)
                     {
                         try
                         {
                             restartEnvironment();
+                            restarted = true;
                             break;
                         }
                         catch(EnvironmentFailureException e)
                         {
-                            // log exception and try again
-                            LOGGER.warn("Unexpected failure on environment restart. Restart iteration: " + i, e);
+                            LOGGER.warn("Failure whilst trying to restart environment (attempt number "
+                                    + attemptNumber + " of " + _environmentRestartRetryLimit + ")", e);
                         }
                         catch (Exception e)
                         {
-                            LOGGER.error("Exception on environment restart", e);
+                            LOGGER.error("Fatal failure whilst trying to restart environment", e);
                             break;
                         }
+                        attemptNumber++;
+                    }
+
+                    if (!restarted)
+                    {
+                        LOGGER.warn("Failed to restart environment.");
                     }
                 }
             });
@@ -565,8 +574,10 @@ public class ReplicatedEnvironmentFacade
         {
             if (LOGGER.isDebugEnabled())
             {
-                LOGGER.debug("Ignoring the state environment change event as the environment facade for node '" + _prettyGroupNodeName
-                        + "' is in state " + _state.get());
+                LOGGER.debug("Ignoring the state environment change event as the environment facade for node '"
+                             + _prettyGroupNodeName
+                             + "' is in state "
+                             + _state.get());
             }
         }
     }
@@ -1106,10 +1117,29 @@ public class ReplicatedEnvironmentFacade
                 return createEnvironment(environmentPathFile, envConfig, replicationConfig);
             }});
 
-        long setUpTimeOutMillis = extractEnvSetupTimeoutMillis(replicationConfig);
+        final long setUpTimeOutMillis = extractEnvSetupTimeoutMillis(replicationConfig);
+        final long initialTimeOutMillis = Math.max(setUpTimeOutMillis / 4, 1000);
+        final long remainingTimeOutMillis = setUpTimeOutMillis - initialTimeOutMillis;
         try
         {
-            return environmentFuture.get(setUpTimeOutMillis, TimeUnit.MILLISECONDS);
+            try
+            {
+                return environmentFuture.get(initialTimeOutMillis, TimeUnit.MILLISECONDS);
+            }
+            catch (TimeoutException te)
+            {
+                if (remainingTimeOutMillis > 0)
+                {
+                    LOGGER.warn("Slow replicated environment creation for " + _prettyGroupNodeName
+                                + ". Will continue to wait for further " + remainingTimeOutMillis
+                                + "ms. for environment creation to complete.");
+                    return environmentFuture.get(remainingTimeOutMillis, TimeUnit.MILLISECONDS);
+                }
+                else
+                {
+                    throw te;
+                }
+            }
         }
         catch (InterruptedException e)
         {



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@qpid.apache.org
For additional commands, e-mail: commits-help@qpid.apache.org