You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@brooklyn.apache.org by al...@apache.org on 2018/02/13 09:56:24 UTC

[3/4] brooklyn-server git commit: BROOKLYN-580: fix calling connect-sensors on rebind

BROOKLYN-580: fix calling connect-sensors on rebind

Project: http://git-wip-us.apache.org/repos/asf/brooklyn-server/repo
Commit: http://git-wip-us.apache.org/repos/asf/brooklyn-server/commit/c68f8485
Tree: http://git-wip-us.apache.org/repos/asf/brooklyn-server/tree/c68f8485
Diff: http://git-wip-us.apache.org/repos/asf/brooklyn-server/diff/c68f8485

Branch: refs/heads/master
Commit: c68f848567711c4d78e8b11bcb58c990a2c2d8e7
Parents: 9e7e83a
Author: Aled Sage <al...@gmail.com>
Authored: Fri Feb 9 10:03:16 2018 +0000
Committer: Aled Sage <al...@gmail.com>
Committed: Fri Feb 9 10:03:16 2018 +0000

----------------------------------------------------------------------
 .../software/base/SoftwareProcessImpl.java      | 84 +++++++++++++++-----
 .../machine/MachineEntityJcloudsRebindTest.java | 24 +++++-
 2 files changed, 84 insertions(+), 24 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/brooklyn-server/blob/c68f8485/software/base/src/main/java/org/apache/brooklyn/entity/software/base/SoftwareProcessImpl.java
----------------------------------------------------------------------
diff --git a/software/base/src/main/java/org/apache/brooklyn/entity/software/base/SoftwareProcessImpl.java b/software/base/src/main/java/org/apache/brooklyn/entity/software/base/SoftwareProcessImpl.java
index 36aa679..548d659 100644
--- a/software/base/src/main/java/org/apache/brooklyn/entity/software/base/SoftwareProcessImpl.java
+++ b/software/base/src/main/java/org/apache/brooklyn/entity/software/base/SoftwareProcessImpl.java
@@ -50,7 +50,6 @@ import org.apache.brooklyn.feed.function.FunctionPollConfig;
 import org.apache.brooklyn.location.jclouds.networking.NetworkingEffectors;
 import org.apache.brooklyn.location.ssh.SshMachineLocation;
 import org.apache.brooklyn.util.collections.MutableList;
-import org.apache.brooklyn.util.collections.MutableMap;
 import org.apache.brooklyn.util.collections.MutableSet;
 import org.apache.brooklyn.util.core.config.ConfigBag;
 import org.apache.brooklyn.util.core.task.BasicTask;
@@ -385,33 +384,76 @@ public abstract class SoftwareProcessImpl extends AbstractEntity implements Soft
             connectSensors();
         } else {
             long delay = (long) (Math.random() * configuredMaxDelay.toMilliseconds());
-            LOG.debug("Scheduled reconnection of sensors on {} in {}ms", this, delay);
+            LOG.debug("Scheduling reconnection of sensors on {} in {}ms", this, delay);
             
-            // This is functionally equivalent to new scheduledExecutor.schedule(job, delay, TimeUnit.MILLISECONDS).
-            // It uses the entity's execution context to schedule and thus execute the job.
-            Map<?,?> flags = MutableMap.of("delay", Duration.millis(delay), "maxIterations", 1, "cancelOnException", true);
-            Callable<Void> job = new Callable<Void>() {
-                public Void call() {
-                    try {
-                        if (getManagementSupport().isNoLongerManaged()) {
-                            LOG.debug("Entity {} no longer managed; ignoring scheduled connect sensors on rebind", SoftwareProcessImpl.this);
-                            return null;
-                        }
-                        connectSensors();
-                    } catch (Throwable e) {
-                        LOG.warn("Problem connecting sensors on rebind of "+SoftwareProcessImpl.this, e);
-                        Exceptions.propagateIfFatal(e);
-                    }
-                    return null;
-                }};
-            ScheduledTask scheduledTask = new ScheduledTask(flags, new BasicTask<Void>(job));
-            getExecutionContext().submit(scheduledTask);
+            scheduleConnectSensorsOnRebind(Duration.millis(delay));
         }
+        
         // don't wait here - it may be long-running, e.g. if remote entity has died, and we don't want to block rebind waiting or cause it to fail
         // the service will subsequently show service not up and thus failure
 //        waitForServiceUp();
     }
 
+    protected void scheduleConnectSensorsOnRebind(Duration delay) {
+        Callable<Void> job = new Callable<Void>() {
+            public Void call() {
+                try {
+                    if (!getManagementContext().isRunning()) {
+                        LOG.debug("Management context not running; entity {} ignoring scheduled connect-sensors on rebind", SoftwareProcessImpl.this);
+                        return null;
+                    }
+                    if (getManagementSupport().isNoLongerManaged()) {
+                        LOG.debug("Entity {} no longer managed; ignoring scheduled connect-sensors on rebind", SoftwareProcessImpl.this);
+                        return null;
+                    }
+                    
+                    // Don't call connectSensors until the entity is actually managed.
+                    // See https://issues.apache.org/jira/browse/BROOKLYN-580
+                    boolean rebindActive = getManagementContext().getRebindManager().isRebindActive();
+                    if (!getManagementSupport().wasDeployed()) {
+                        if (rebindActive) {
+                            // We are still rebinding, and entity not yet managed - reschedule.
+                            Duration configuredMaxDelay = getConfig(MAXIMUM_REBIND_SENSOR_CONNECT_DELAY);
+                            if (configuredMaxDelay == null) configuredMaxDelay = Duration.millis(100);
+                            long delay = (long) (Math.random() * configuredMaxDelay.toMilliseconds());
+                            delay = Math.max(10, delay);
+                            LOG.debug("Entity {} not yet managed; re-scheduling connect-sensors on rebind in {}ms", SoftwareProcessImpl.this, delay);
+                            
+                            scheduleConnectSensorsOnRebind(Duration.millis(delay));
+                            return null;
+                        } else {
+                            // Not rebinding and yet not managed - presumably means that rebind aborted
+                            // (e.g. with a "fail-fast" configuration).
+                            LOG.debug("Rebind no longer executing, yet entity {} not managed; not re-scheduling connect-sensors", SoftwareProcessImpl.this);
+                            return null;
+                        }
+                    }
+                    
+                    connectSensors();
+                    
+                } catch (Throwable e) {
+                    LOG.warn("Problem connecting sensors on rebind of "+SoftwareProcessImpl.this, e);
+                    Exceptions.propagateIfFatal(e);
+                }
+                return null;
+            }};
+            
+        Callable<Task<?>> jobFactory = new Callable<Task<?>>() {
+            public Task<?> call() {
+                return new BasicTask<Void>(job);
+            }};
+
+        // This is functionally equivalent to new scheduledExecutor.schedule(job, delay, TimeUnit.MILLISECONDS).
+        // It uses the entity's execution context to schedule and thus execute the job.
+        ScheduledTask scheduledTask = ScheduledTask.builder(jobFactory)
+                .delay(delay)
+                .maxIterations(1)
+                .cancelOnException(true)
+                .build();
+
+        getExecutionContext().submit(scheduledTask);
+    }
+
     @Override 
     public void onManagementStarting() {
         super.onManagementStarting();

http://git-wip-us.apache.org/repos/asf/brooklyn-server/blob/c68f8485/software/base/src/test/java/org/apache/brooklyn/entity/machine/MachineEntityJcloudsRebindTest.java
----------------------------------------------------------------------
diff --git a/software/base/src/test/java/org/apache/brooklyn/entity/machine/MachineEntityJcloudsRebindTest.java b/software/base/src/test/java/org/apache/brooklyn/entity/machine/MachineEntityJcloudsRebindTest.java
index 196a51c..c4675c2 100644
--- a/software/base/src/test/java/org/apache/brooklyn/entity/machine/MachineEntityJcloudsRebindTest.java
+++ b/software/base/src/test/java/org/apache/brooklyn/entity/machine/MachineEntityJcloudsRebindTest.java
@@ -20,6 +20,7 @@ package org.apache.brooklyn.entity.machine;
 
 import static org.testng.Assert.fail;
 
+import java.util.ArrayList;
 import java.util.Collection;
 import java.util.List;
 import java.util.concurrent.atomic.AtomicInteger;
@@ -78,7 +79,19 @@ public class MachineEntityJcloudsRebindTest extends JcloudsRebindStubUnitTest {
             }});
     }
     
-    // See https://issues.apache.org/jira/browse/BROOKLYN-425
+    /**
+     * See https://issues.apache.org/jira/browse/BROOKLYN-425
+     * and https://issues.apache.org/jira/browse/BROOKLYN-580
+     *
+     * Also manually checked that, with rebind fail-fast, the SoftwareProcessImpl#scheduleConnectSensorsOnRebind
+     * does not leave it running (e.g. repeatedly re-scheduling). Tested this by using the debugger, simulating a
+     * rebind exception, and using the rebind option:
+     * <pre>
+     * {@code
+     * rebind(RebindOptions.create().exceptionHandler(RebindExceptionHandlerImpl.builder().rebindFailureMode(RebindFailureMode.FAIL_FAST).build()));
+     * }
+     * </pre>
+     */
     @Test
     @Override
     public void testRebind() throws Exception {
@@ -100,12 +113,15 @@ public class MachineEntityJcloudsRebindTest extends JcloudsRebindStubUnitTest {
         
         // Expect SshMachineLocation.inferMachineDetails to have successfully retrieved os details,
         // which we've stubbed to return centos (in ssh call).
+        // This is called in `MachineEntityImpl.connectSensors` to determine if it's linux, and thus whether
+        // to create the machine-metrics feeds.
         assertRecordedSshCmdContainsEventually("/etc/os-release");
         
-        // TODO Would like to assert that we have the feed, but it's not actually added to the entity!
+        // TODO Would like to assert that we have the feed, but it's not actually recorded against the entity!
         // See AddMachineMetrics.createMachineMetricsFeed, which doesn't call `feeds().addFeed()` so
         // it's not persisted and is not accessible from entity.feeds().getFeeds(). Instead, it just
         // adds the entity to the feed (which is the old way, for if your feed is not persistable).
+        // The feed will be operational, it's just that the entity doesn't list it.
         //     assertHasFeedEventually(newMachine, "machineMetricsFeed");
 
         // TODO AddMachineMetrics.createMachineMetricsFeed poll period is not configurable; 
@@ -117,12 +133,14 @@ public class MachineEntityJcloudsRebindTest extends JcloudsRebindStubUnitTest {
         Asserts.succeedsEventually(new Runnable() {
             public void run() {
                 List<ExecCmd> cmds = RecordingSshTool.getExecCmds();
+                List<List<String>> cmdLists = new ArrayList<>(cmds.size());
                 for (ExecCmd cmd : cmds) {
                     if (cmd.commands.toString().contains(expected)) {
                         return;
                     }
+                    cmdLists.add(cmd.commands);
                 }
-                fail("Commands (" + expected + ") not contain in " + cmds);
+                fail("Commands (" + expected + ") not contain in " + cmdLists);
             }});
     }