You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by sm...@apache.org on 2015/06/10 03:53:44 UTC

ambari git commit: AMBARI-11804. Improvements to agent level command retry (smohanty)

Repository: ambari
Updated Branches:
  refs/heads/trunk 3ef169e02 -> 52dd06d72


AMBARI-11804. Improvements to agent level command retry (smohanty)


Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/52dd06d7
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/52dd06d7
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/52dd06d7

Branch: refs/heads/trunk
Commit: 52dd06d720a5b6007adffd2feba05b533c405d82
Parents: 3ef169e
Author: Sumit Mohanty <sm...@hortonworks.com>
Authored: Mon Jun 8 17:10:45 2015 -0700
Committer: Sumit Mohanty <sm...@hortonworks.com>
Committed: Tue Jun 9 18:30:42 2015 -0700

----------------------------------------------------------------------
 .../src/main/python/ambari_agent/ActionQueue.py |  38 +++--
 .../test/python/ambari_agent/TestActionQueue.py |  41 ++++-
 .../ambari/server/agent/ExecutionCommand.java   |   2 +-
 .../server/configuration/Configuration.java     |  14 --
 .../AmbariCustomCommandExecutionHelper.java     |   6 -
 .../AmbariManagementControllerImpl.java         |  39 ++++-
 .../ambari/server/state/ConfigHelper.java       |  18 ++-
 .../AmbariManagementControllerTest.java         | 157 +++++++++++++++++++
 8 files changed, 271 insertions(+), 44 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/ambari/blob/52dd06d7/ambari-agent/src/main/python/ambari_agent/ActionQueue.py
----------------------------------------------------------------------
diff --git a/ambari-agent/src/main/python/ambari_agent/ActionQueue.py b/ambari-agent/src/main/python/ambari_agent/ActionQueue.py
index d48c965..6a64f99 100644
--- a/ambari-agent/src/main/python/ambari_agent/ActionQueue.py
+++ b/ambari-agent/src/main/python/ambari_agent/ActionQueue.py
@@ -254,36 +254,50 @@ class ActionQueue(threading.Thread):
     self.commandStatuses.put_command_status(command, in_progress_status)
 
     numAttempts = 0
-    maxAttempts = 1
+    retryDuration = 0  # even with 0 allow one attempt
     retryAble = False
     delay = 1
     if 'commandParams' in command:
-      if 'command_retry_max_attempt_count' in command['commandParams']:
-        maxAttempts = int(command['commandParams']['command_retry_max_attempt_count'])
+      if 'max_duration_for_retries' in command['commandParams']:
+        retryDuration = int(command['commandParams']['max_duration_for_retries'])
       if 'command_retry_enabled' in command['commandParams']:
         retryAble = command['commandParams']['command_retry_enabled'] == "true"
     if isAutoExecuteCommand:
       retryAble = False
 
-    logger.debug("Command execution metadata - retry enabled = {retryAble}, max attempt count = {maxAttemptCount}".
-                 format(retryAble = retryAble, maxAttemptCount = maxAttempts))
-    while numAttempts < maxAttempts:
+    logger.debug("Command execution metadata - retry enabled = {retryAble}, max retry duration (sec) = {retryDuration}".
+                 format(retryAble=retryAble, retryDuration=retryDuration))
+    while retryDuration >= 0:
       numAttempts += 1
+      start = 0
+      if retryAble:
+        start = int(time.time())
       # running command
       commandresult = self.customServiceOrchestrator.runCommand(command,
-        in_progress_status['tmpout'], in_progress_status['tmperr'],
-        override_output_files=numAttempts == 1, retry=numAttempts > 1)
-
+                                                                in_progress_status['tmpout'],
+                                                                in_progress_status['tmperr'],
+                                                                override_output_files=numAttempts == 1,
+                                                                retry=numAttempts > 1)
+      end = 1
+      if retryAble:
+        end = int(time.time())
+      retryDuration -= (end - start)
 
       # dumping results
       if isCommandBackground:
         return
       else:
-        status = self.COMPLETED_STATUS if commandresult['exitcode'] == 0 else self.FAILED_STATUS
+        if commandresult['exitcode'] == 0:
+          status = self.COMPLETED_STATUS
+        else:
+          status = self.FAILED_STATUS
 
-      if status != self.COMPLETED_STATUS and retryAble == True and maxAttempts > numAttempts:
+      if status != self.COMPLETED_STATUS and retryAble == True and retryDuration > 0:
         delay = self.get_retry_delay(delay)
-        logger.info("Retrying command id {cid} after a wait of {delay}".format(cid = taskId, delay=delay))
+        if delay > retryDuration:
+          delay = retryDuration
+        retryDuration -= delay  # allow one last attempt
+        logger.info("Retrying command id {cid} after a wait of {delay}".format(cid=taskId, delay=delay))
         time.sleep(delay)
         continue
       else:

http://git-wip-us.apache.org/repos/asf/ambari/blob/52dd06d7/ambari-agent/src/test/python/ambari_agent/TestActionQueue.py
----------------------------------------------------------------------
diff --git a/ambari-agent/src/test/python/ambari_agent/TestActionQueue.py b/ambari-agent/src/test/python/ambari_agent/TestActionQueue.py
index 59e01a6..4e130f9 100644
--- a/ambari-agent/src/test/python/ambari_agent/TestActionQueue.py
+++ b/ambari-agent/src/test/python/ambari_agent/TestActionQueue.py
@@ -193,7 +193,7 @@ class TestActionQueue(TestCase):
       'jdk_location' : '.',
       'service_package_folder' : '.',
       'command_retry_enabled' : 'true',
-      'command_retry_max_attempt_count' : '3'
+      'max_duration_for_retries' : '5'
     },
     'hostLevelParams' : {}
   }
@@ -816,13 +816,50 @@ class TestActionQueue(TestCase):
     self.assertTrue(runCommand_mock.called)
     self.assertEqual(3, runCommand_mock.call_count)
     self.assertEqual(2, sleep_mock.call_count)
-    sleep_mock.assert_has_calls([call(2), call(4)], False)
+    sleep_mock.assert_has_calls([call(2), call(3)], False)
     runCommand_mock.assert_has_calls([
       call(command, '/tmp/ambari-agent/output-19.txt', '/tmp/ambari-agent/errors-19.txt', override_output_files=True, retry=False),
       call(command, '/tmp/ambari-agent/output-19.txt', '/tmp/ambari-agent/errors-19.txt', override_output_files=False, retry=True),
       call(command, '/tmp/ambari-agent/output-19.txt', '/tmp/ambari-agent/errors-19.txt', override_output_files=False, retry=True)])
 
 
+  @patch("time.time")
+  @patch("time.sleep")
+  @patch.object(OSCheck, "os_distribution", new=MagicMock(return_value=os_distro_value))
+  @patch.object(StackVersionsFileHandler, "read_stack_version")
+  @patch.object(CustomServiceOrchestrator, "__init__")
+  def test_execute_retryable_command_with_time_lapse(self, CustomServiceOrchestrator_mock,
+                                     read_stack_version_mock, sleep_mock, time_mock
+  ):
+    CustomServiceOrchestrator_mock.return_value = None
+    dummy_controller = MagicMock()
+    actionQueue = ActionQueue(AmbariConfig(), dummy_controller)
+    python_execution_result_dict = {
+      'exitcode': 1,
+      'stdout': 'out',
+      'stderr': 'stderr',
+      'structuredOut': '',
+      'status': 'FAILED'
+    }
+    time_mock.side_effect = [4, 8, 10, 14, 18, 22]
+
+    def side_effect(command, tmpoutfile, tmperrfile, override_output_files=True, retry=False):
+      return python_execution_result_dict
+
+    command = copy.deepcopy(self.retryable_command)
+    with patch.object(CustomServiceOrchestrator, "runCommand") as runCommand_mock:
+      runCommand_mock.side_effect = side_effect
+      actionQueue.execute_command(command)
+
+    #assert that python executor start
+    self.assertTrue(runCommand_mock.called)
+    self.assertEqual(2, runCommand_mock.call_count)
+    self.assertEqual(1, sleep_mock.call_count)
+    sleep_mock.assert_has_calls([call(2)], False)
+    runCommand_mock.assert_has_calls([
+      call(command, '/tmp/ambari-agent/output-19.txt', '/tmp/ambari-agent/errors-19.txt', override_output_files=True, retry=False),
+      call(command, '/tmp/ambari-agent/output-19.txt', '/tmp/ambari-agent/errors-19.txt', override_output_files=False, retry=True)])
+
   #retryable_command
   @patch("time.sleep")
   @patch.object(OSCheck, "os_distribution", new=MagicMock(return_value=os_distro_value))

http://git-wip-us.apache.org/repos/asf/ambari/blob/52dd06d7/ambari-server/src/main/java/org/apache/ambari/server/agent/ExecutionCommand.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/java/org/apache/ambari/server/agent/ExecutionCommand.java b/ambari-server/src/main/java/org/apache/ambari/server/agent/ExecutionCommand.java
index e4abb1d..07b7d10 100644
--- a/ambari-server/src/main/java/org/apache/ambari/server/agent/ExecutionCommand.java
+++ b/ambari-server/src/main/java/org/apache/ambari/server/agent/ExecutionCommand.java
@@ -330,7 +330,7 @@ public class ExecutionCommand extends AgentCommand {
     String VERSION = "version";
     String REFRESH_TOPOLOGY = "refresh_topology";
     String HOST_SYS_PREPPED = "host_sys_prepped";
-    String COMMAND_RETRY_MAX_ATTEMPT_COUNT = "command_retry_max_attempt_count";
+    String MAX_DURATION_OF_RETRIES = "max_duration_for_retries";
     String COMMAND_RETRY_ENABLED = "command_retry_enabled";
 
     String SERVICE_CHECK = "SERVICE_CHECK"; // TODO: is it standard command? maybe add it to RoleCommand enum?

http://git-wip-us.apache.org/repos/asf/ambari/blob/52dd06d7/ambari-server/src/main/java/org/apache/ambari/server/configuration/Configuration.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/java/org/apache/ambari/server/configuration/Configuration.java b/ambari-server/src/main/java/org/apache/ambari/server/configuration/Configuration.java
index 318e8b3..f62ab64 100644
--- a/ambari-server/src/main/java/org/apache/ambari/server/configuration/Configuration.java
+++ b/ambari-server/src/main/java/org/apache/ambari/server/configuration/Configuration.java
@@ -375,10 +375,6 @@ public class Configuration {
   private static final String DEFAULT_JDBC_POOL_MAX_AGE_SECONDS = "0";
   private static final String DEFAULT_JDBC_POOL_IDLE_TEST_INTERVAL = "7200";
 
-  private static final String IS_COMMAND_RETRY_ENABLED_KEY = "command.retry.enabled";
-  private static final String IS_COMMAND_RETRY_ENABLED_DEFAULT = "false";
-  private static final String COMMAND_RETRY_COUNT_KEY = "command.retry.count";
-  private static final String COMMAND_RETRY_COUNT_DEFAULT = "3";
   /**
    * The full path to the XML file that describes the different alert templates.
    */
@@ -1239,16 +1235,6 @@ public class Configuration {
   }
 
   /**
-   * Command retry configs
-   */
-  public boolean isCommandRetryEnabled() {
-    return Boolean.parseBoolean(properties.getProperty(IS_COMMAND_RETRY_ENABLED_KEY, IS_COMMAND_RETRY_ENABLED_DEFAULT));
-  }
-
-  public int commandRetryCount() {
-    return Integer.parseInt(properties.getProperty(COMMAND_RETRY_COUNT_KEY, COMMAND_RETRY_COUNT_DEFAULT));
-  }
-  /**
    * @return custom properties for database connections
    */
   public Map<String,String> getDatabaseCustomProperties() {

http://git-wip-us.apache.org/repos/asf/ambari/blob/52dd06d7/ambari-server/src/main/java/org/apache/ambari/server/controller/AmbariCustomCommandExecutionHelper.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/java/org/apache/ambari/server/controller/AmbariCustomCommandExecutionHelper.java b/ambari-server/src/main/java/org/apache/ambari/server/controller/AmbariCustomCommandExecutionHelper.java
index 52e4d19..4d61858 100644
--- a/ambari-server/src/main/java/org/apache/ambari/server/controller/AmbariCustomCommandExecutionHelper.java
+++ b/ambari-server/src/main/java/org/apache/ambari/server/controller/AmbariCustomCommandExecutionHelper.java
@@ -19,8 +19,6 @@
 package org.apache.ambari.server.controller;
 
 import static org.apache.ambari.server.agent.ExecutionCommand.KeyNames.CLIENTS_TO_UPDATE_CONFIGS;
-import static org.apache.ambari.server.agent.ExecutionCommand.KeyNames.COMMAND_RETRY_ENABLED;
-import static org.apache.ambari.server.agent.ExecutionCommand.KeyNames.COMMAND_RETRY_MAX_ATTEMPT_COUNT;
 import static org.apache.ambari.server.agent.ExecutionCommand.KeyNames.COMMAND_TIMEOUT;
 import static org.apache.ambari.server.agent.ExecutionCommand.KeyNames.COMPONENT_CATEGORY;
 import static org.apache.ambari.server.agent.ExecutionCommand.KeyNames.CUSTOM_COMMAND;
@@ -370,8 +368,6 @@ public class AmbariCustomCommandExecutionHelper {
         if (script != null) {
           commandParams.put(SCRIPT, script.getScript());
           commandParams.put(SCRIPT_TYPE, script.getScriptType().toString());
-          commandParams.put(COMMAND_RETRY_MAX_ATTEMPT_COUNT, Integer.toString(configs.commandRetryCount()));
-          commandParams.put(COMMAND_RETRY_ENABLED, Boolean.toString(configs.isCommandRetryEnabled()));
           if (script.getTimeout() > 0) {
             commandTimeout = String.valueOf(script.getTimeout());
           }
@@ -569,8 +565,6 @@ public class AmbariCustomCommandExecutionHelper {
       if (script != null) {
         commandParams.put(SCRIPT, script.getScript());
         commandParams.put(SCRIPT_TYPE, script.getScriptType().toString());
-        commandParams.put(COMMAND_RETRY_MAX_ATTEMPT_COUNT, Integer.toString(configs.commandRetryCount()));
-        commandParams.put(COMMAND_RETRY_ENABLED, Boolean.toString(configs.isCommandRetryEnabled()));
         if (script.getTimeout() > 0) {
           commandTimeout = String.valueOf(script.getTimeout());
         }

http://git-wip-us.apache.org/repos/asf/ambari/blob/52dd06d7/ambari-server/src/main/java/org/apache/ambari/server/controller/AmbariManagementControllerImpl.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/java/org/apache/ambari/server/controller/AmbariManagementControllerImpl.java b/ambari-server/src/main/java/org/apache/ambari/server/controller/AmbariManagementControllerImpl.java
index 9e8a5e5..bd12cee 100644
--- a/ambari-server/src/main/java/org/apache/ambari/server/controller/AmbariManagementControllerImpl.java
+++ b/ambari-server/src/main/java/org/apache/ambari/server/controller/AmbariManagementControllerImpl.java
@@ -35,7 +35,7 @@ import static org.apache.ambari.server.agent.ExecutionCommand.KeyNames.SERVICE_P
 import static org.apache.ambari.server.agent.ExecutionCommand.KeyNames.SERVICE_REPO_INFO;
 import static org.apache.ambari.server.agent.ExecutionCommand.KeyNames.USER_LIST;
 import static org.apache.ambari.server.agent.ExecutionCommand.KeyNames.VERSION;
-import static org.apache.ambari.server.agent.ExecutionCommand.KeyNames.COMMAND_RETRY_MAX_ATTEMPT_COUNT;
+import static org.apache.ambari.server.agent.ExecutionCommand.KeyNames.MAX_DURATION_OF_RETRIES;
 import static org.apache.ambari.server.agent.ExecutionCommand.KeyNames.COMMAND_RETRY_ENABLED;
 
 import java.io.File;
@@ -1822,8 +1822,41 @@ public class AmbariManagementControllerImpl implements AmbariManagementControlle
       if (script != null) {
         commandParams.put(SCRIPT, script.getScript());
         commandParams.put(SCRIPT_TYPE, script.getScriptType().toString());
-        commandParams.put(COMMAND_RETRY_MAX_ATTEMPT_COUNT, Integer.toString(configs.commandRetryCount()));
-        commandParams.put(COMMAND_RETRY_ENABLED, Boolean.toString(configs.isCommandRetryEnabled()));
+
+        String retryEnabledStr = configHelper.getValueFromDesiredConfigurations(cluster, ConfigHelper.CLUSTER_ENV,
+                                                                                ConfigHelper.CLUSTER_ENV_RETRY_ENABLED);
+        String commandsStr = configHelper.getValueFromDesiredConfigurations(cluster, ConfigHelper.CLUSTER_ENV,
+                                                                            ConfigHelper.CLUSTER_ENV_RETRY_COMMANDS);
+        String retryMaxTimeStr = configHelper.getValueFromDesiredConfigurations(cluster, ConfigHelper.CLUSTER_ENV,
+                                                                            ConfigHelper.CLUSTER_ENV_RETRY_MAX_TIME_IN_SEC);
+
+        boolean retryEnabled = false;
+        if(StringUtils.isNotEmpty(retryEnabledStr)) {
+          retryEnabled = Boolean.TRUE.toString().equals(retryEnabledStr);
+        }
+
+        Integer retryMaxTime = 0;
+
+        if (retryEnabled) {
+          retryMaxTime = NumberUtils.toInt(retryMaxTimeStr, 0);
+          if(retryMaxTime < 0) {
+            retryMaxTime = 0;
+          }
+
+          if (StringUtils.isNotEmpty(commandsStr)) {
+            boolean commandMayBeRetried = false;
+            String[] commands = commandsStr.split(",");
+            for (String command : commands) {
+              if (roleCommand.toString().equals(command.trim())) {
+                commandMayBeRetried = true;
+              }
+            }
+            retryEnabled = commandMayBeRetried;
+          }
+        }
+
+        commandParams.put(MAX_DURATION_OF_RETRIES, Integer.toString(retryMaxTime));
+        commandParams.put(COMMAND_RETRY_ENABLED, Boolean.toString(retryEnabled));
         ClusterVersionEntity currentClusterVersion = cluster.getCurrentClusterVersion();
         if (currentClusterVersion != null) {
          commandParams.put(VERSION, currentClusterVersion.getRepositoryVersion().getVersion());

http://git-wip-us.apache.org/repos/asf/ambari/blob/52dd06d7/ambari-server/src/main/java/org/apache/ambari/server/state/ConfigHelper.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/java/org/apache/ambari/server/state/ConfigHelper.java b/ambari-server/src/main/java/org/apache/ambari/server/state/ConfigHelper.java
index eb1e52a..8b4b1e2 100644
--- a/ambari-server/src/main/java/org/apache/ambari/server/state/ConfigHelper.java
+++ b/ambari-server/src/main/java/org/apache/ambari/server/state/ConfigHelper.java
@@ -76,6 +76,10 @@ public class ConfigHelper {
   public static final String HDFS_SITE = "hdfs-site";
   public static final String HIVE_SITE = "hive-site";
   public static final String YARN_SITE = "yarn-site";
+  public static final String CLUSTER_ENV = "cluster-env";
+  public static final String CLUSTER_ENV_RETRY_ENABLED = "command_retry_enabled";
+  public static final String CLUSTER_ENV_RETRY_COMMANDS = "commands_to_retry";
+  public static final String CLUSTER_ENV_RETRY_MAX_TIME_IN_SEC = "command_retry_max_time_in_sec";
 
   public static final String HTTP_ONLY = "HTTP_ONLY";
   public static final String HTTPS_ONLY = "HTTPS_ONLY";
@@ -551,12 +555,14 @@ public class ConfigHelper {
   public String getValueFromDesiredConfigurations(Cluster cluster, String configType, String propertyName) {
     Map<String, DesiredConfig> desiredConfigs = cluster.getDesiredConfigs();
     DesiredConfig desiredConfig = desiredConfigs.get(configType);
-    Config config = cluster.getConfig(configType, desiredConfig.getTag());
-    Map<String, String> configurationProperties = config.getProperties();
-    if (null != configurationProperties) {
-      String value = configurationProperties.get(propertyName);
-      if (null != value) {
-        return value;
+    if(desiredConfig != null) {
+      Config config = cluster.getConfig(configType, desiredConfig.getTag());
+      Map<String, String> configurationProperties = config.getProperties();
+      if (null != configurationProperties) {
+        String value = configurationProperties.get(propertyName);
+        if (null != value) {
+          return value;
+        }
       }
     }
     return null;

http://git-wip-us.apache.org/repos/asf/ambari/blob/52dd06d7/ambari-server/src/test/java/org/apache/ambari/server/controller/AmbariManagementControllerTest.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/test/java/org/apache/ambari/server/controller/AmbariManagementControllerTest.java b/ambari-server/src/test/java/org/apache/ambari/server/controller/AmbariManagementControllerTest.java
index 58bdd64..17db993 100644
--- a/ambari-server/src/test/java/org/apache/ambari/server/controller/AmbariManagementControllerTest.java
+++ b/ambari-server/src/test/java/org/apache/ambari/server/controller/AmbariManagementControllerTest.java
@@ -1001,6 +1001,159 @@ public class AmbariManagementControllerTest {
   }
 
   @Test
+  public void testGetExecutionCommandWithClusterEnvForRetry() throws Exception {
+    testCreateServiceComponentHostSimple();
+
+    String clusterName = "foo1";
+    String serviceName = "HDFS";
+
+    Cluster cluster = clusters.getCluster(clusterName);
+    Service s1 = cluster.getService(serviceName);
+
+    // Create and attach config
+    Map<String, String> configs = new HashMap<String, String>();
+    configs.put("a", "b");
+    configs.put("command_retry_enabled", "true");
+    configs.put("command_retry_max_time_in_sec", "5");
+    configs.put("commands_to_retry", "START");
+
+    ConfigurationRequest cr1,cr2;
+    cr1 = new ConfigurationRequest(clusterName, "core-site","version1",
+                                   configs, null);
+    cr2 = new ConfigurationRequest(clusterName, "cluster-env","version1",
+                                   configs, null);
+
+    ClusterRequest crReq = new ClusterRequest(cluster.getClusterId(), clusterName, null, null);
+    crReq.setDesiredConfig(Collections.singletonList(cr1));
+    controller.updateClusters(Collections.singleton(crReq), null);
+    crReq = new ClusterRequest(cluster.getClusterId(), clusterName, null, null);
+    crReq.setDesiredConfig(Collections.singletonList(cr2));
+    controller.updateClusters(Collections.singleton(crReq), null);
+
+    // Install
+    installService(clusterName, serviceName, false, false);
+    ExecutionCommand ec =
+        controller.getExecutionCommand(cluster,
+                                       s1.getServiceComponent("NAMENODE").getServiceComponentHost("h1"),
+                                       RoleCommand.START);
+    assertEquals("1-0", ec.getCommandId());
+    assertEquals("foo1", ec.getClusterName());
+    Map<String, Map<String, String>> configurations = ec.getConfigurations();
+    assertNotNull(configurations);
+    assertEquals(2, configurations.size());
+    assertTrue(configurations.containsKey("core-site"));
+    assertTrue(ec.getConfigurationAttributes().containsKey("core-site"));
+    assertTrue(ec.getCommandParams().containsKey("max_duration_for_retries"));
+    assertEquals("5", ec.getCommandParams().get("max_duration_for_retries"));
+    assertTrue(ec.getCommandParams().containsKey("command_retry_enabled"));
+    assertEquals("true", ec.getCommandParams().get("command_retry_enabled"));
+    Map<String, Set<String>> chInfo = ec.getClusterHostInfo();
+    assertTrue(chInfo.containsKey("namenode_host"));
+  }
+
+  @Test
+  public void testGetExecutionCommandWithClusterEnvForRetryOnlyInstall() throws Exception {
+    testCreateServiceComponentHostSimple();
+
+    String clusterName = "foo1";
+    String serviceName = "HDFS";
+
+    Cluster cluster = clusters.getCluster(clusterName);
+    Service s1 = cluster.getService(serviceName);
+
+    // Create and attach config
+    Map<String, String> configs = new HashMap<String, String>();
+    configs.put("a", "b");
+    configs.put("command_retry_enabled", "true");
+    configs.put("max_duration_for_retries", "1");
+    configs.put("commands_to_retry", "INSTALL");
+
+    ConfigurationRequest cr1,cr2;
+    cr1 = new ConfigurationRequest(clusterName, "core-site","version1",
+                                   configs, null);
+    cr2 = new ConfigurationRequest(clusterName, "cluster-env","version1",
+                                   configs, null);
+
+    ClusterRequest crReq = new ClusterRequest(cluster.getClusterId(), clusterName, null, null);
+    crReq.setDesiredConfig(Collections.singletonList(cr1));
+    controller.updateClusters(Collections.singleton(crReq), null);
+    crReq = new ClusterRequest(cluster.getClusterId(), clusterName, null, null);
+    crReq.setDesiredConfig(Collections.singletonList(cr2));
+    controller.updateClusters(Collections.singleton(crReq), null);
+
+    // Install
+    installService(clusterName, serviceName, false, false);
+    ExecutionCommand ec =
+        controller.getExecutionCommand(cluster,
+                                       s1.getServiceComponent("NAMENODE").getServiceComponentHost("h1"),
+                                       RoleCommand.START);
+    assertEquals("1-0", ec.getCommandId());
+    assertEquals("foo1", ec.getClusterName());
+    Map<String, Map<String, String>> configurations = ec.getConfigurations();
+    assertNotNull(configurations);
+    assertEquals(2, configurations.size());
+    assertTrue(configurations.containsKey("core-site"));
+    assertTrue(ec.getConfigurationAttributes().containsKey("core-site"));
+    assertTrue(ec.getCommandParams().containsKey("max_duration_for_retries"));
+    assertEquals("0", ec.getCommandParams().get("max_duration_for_retries"));
+    assertTrue(ec.getCommandParams().containsKey("command_retry_enabled"));
+    assertEquals("false", ec.getCommandParams().get("command_retry_enabled"));
+    Map<String, Set<String>> chInfo = ec.getClusterHostInfo();
+    assertTrue(chInfo.containsKey("namenode_host"));
+  }
+
+  @Test
+  public void testGetExecutionCommandWithBadClusterEnvForRetry() throws Exception {
+    testCreateServiceComponentHostSimple();
+
+    String clusterName = "foo1";
+    String serviceName = "HDFS";
+
+    Cluster cluster = clusters.getCluster(clusterName);
+    Service s1 = cluster.getService(serviceName);
+
+    // Create and attach config
+    Map<String, String> configs = new HashMap<String, String>();
+    configs.put("a", "b");
+    configs.put("command_retry_enabled", "asdf");
+    configs.put("command_retry_max_time_in_sec", "-5");
+    configs.put("commands_to_retry2", "START");
+
+    ConfigurationRequest cr1,cr2;
+    cr1 = new ConfigurationRequest(clusterName, "core-site","version1",
+                                   configs, null);
+    cr2 = new ConfigurationRequest(clusterName, "cluster-env","version1",
+                                   configs, null);
+
+    ClusterRequest crReq = new ClusterRequest(cluster.getClusterId(), clusterName, null, null);
+    crReq.setDesiredConfig(Collections.singletonList(cr1));
+    controller.updateClusters(Collections.singleton(crReq), null);
+    crReq = new ClusterRequest(cluster.getClusterId(), clusterName, null, null);
+    crReq.setDesiredConfig(Collections.singletonList(cr2));
+    controller.updateClusters(Collections.singleton(crReq), null);
+
+    // Install
+    installService(clusterName, serviceName, false, false);
+    ExecutionCommand ec =
+        controller.getExecutionCommand(cluster,
+                                       s1.getServiceComponent("NAMENODE").getServiceComponentHost("h1"),
+                                       RoleCommand.START);
+    assertEquals("1-0", ec.getCommandId());
+    assertEquals("foo1", ec.getClusterName());
+    Map<String, Map<String, String>> configurations = ec.getConfigurations();
+    assertNotNull(configurations);
+    assertEquals(2, configurations.size());
+    assertTrue(configurations.containsKey("core-site"));
+    assertTrue(ec.getConfigurationAttributes().containsKey("core-site"));
+    assertTrue(ec.getCommandParams().containsKey("max_duration_for_retries"));
+    assertEquals("0", ec.getCommandParams().get("max_duration_for_retries"));
+    assertTrue(ec.getCommandParams().containsKey("command_retry_enabled"));
+    assertEquals("false", ec.getCommandParams().get("command_retry_enabled"));
+    Map<String, Set<String>> chInfo = ec.getClusterHostInfo();
+    assertTrue(chInfo.containsKey("namenode_host"));
+  }
+
+  @Test
   public void testGetExecutionCommand() throws Exception {
     testCreateServiceComponentHostSimple();
 
@@ -1042,6 +1195,10 @@ public class AmbariManagementControllerTest {
     assertTrue(configurations.containsKey("core-site"));
     assertTrue(ec.getConfigurationAttributes().containsKey("hdfs-site"));
     assertTrue(ec.getConfigurationAttributes().containsKey("core-site"));
+    assertTrue(ec.getCommandParams().containsKey("max_duration_for_retries"));
+    assertEquals("0", ec.getCommandParams().get("max_duration_for_retries"));
+    assertTrue(ec.getCommandParams().containsKey("command_retry_enabled"));
+    assertEquals("false", ec.getCommandParams().get("command_retry_enabled"));
     Map<String, Set<String>> chInfo = ec.getClusterHostInfo();
     assertTrue(chInfo.containsKey("namenode_host"));
   }