You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by ds...@apache.org on 2015/11/23 18:43:15 UTC

[2/2] ambari git commit: AMBARI-14016 Backport AMS auto-start and watchdog thread changes to 2.1.2 (dsen)

AMBARI-14016 Backport AMS auto-start and watchdog thread changes to 2.1.2 (dsen)


Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/7b1ac0f0
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/7b1ac0f0
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/7b1ac0f0

Branch: refs/heads/branch-2.1.2
Commit: 7b1ac0f0a81c12acdd93e8f6ff702eb8992b662c
Parents: c510dc8
Author: Dmytro Sen <ds...@apache.org>
Authored: Mon Nov 23 19:41:54 2015 +0200
Committer: Dmytro Sen <ds...@apache.org>
Committed: Mon Nov 23 19:41:54 2015 +0200

----------------------------------------------------------------------
 .../conf/unix/ambari-metrics-collector          |    8 -
 .../ApplicationHistoryServer.java               |    2 +-
 .../timeline/HBaseTimelineMetricStore.java      |   18 +
 .../metrics/timeline/PhoenixHBaseAccessor.java  |    1 +
 .../timeline/TimelineMetricConfiguration.java   |   40 +
 .../timeline/TimelineMetricStoreWatcher.java    |  130 ++
 .../TimelineMetricStoreWatcherTest.java         |  107 ++
 .../0.1.0/configuration/ams-hbase-site.xml      |    2 +-
 .../0.1.0/configuration/ams-site.xml            |   26 +
 .../0.1.0/package/scripts/ams_service.py        |    3 +-
 .../0.1.0/package/scripts/metrics_collector.py  |    2 +
 .../AMBARI_METRICS/test_metrics_collector.py    |  237 ++--
 .../python/stacks/2.0.6/configs/default.json    |    2 +-
 .../2.0.6/configs/default_ams_embedded.json     | 1108 ++++++++++++++++++
 14 files changed, 1584 insertions(+), 102 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/ambari/blob/7b1ac0f0/ambari-metrics/ambari-metrics-timelineservice/conf/unix/ambari-metrics-collector
----------------------------------------------------------------------
diff --git a/ambari-metrics/ambari-metrics-timelineservice/conf/unix/ambari-metrics-collector b/ambari-metrics/ambari-metrics-timelineservice/conf/unix/ambari-metrics-collector
index dd7ee22..06d1c7d 100644
--- a/ambari-metrics/ambari-metrics-timelineservice/conf/unix/ambari-metrics-collector
+++ b/ambari-metrics/ambari-metrics-timelineservice/conf/unix/ambari-metrics-collector
@@ -301,14 +301,6 @@ fi
 case "$1" in
 
 	start)
-    daemon_status "${HBASE_MASTER_PID}"
-    HBASE_DAEMON_STATUS=$?
-    daemon_status "${PIDFILE}"
-    DAEMON_STATUS=$?
-
-    if [[ !"${DISTRIBUTED_HBASE}"  && ( ${DAEMON_STATUS} != 0 || ${HBASE_DAEMON_STATUS} != 0 ) ]]; then
-      stop
-    fi
     start
 
   ;;

http://git-wip-us.apache.org/repos/asf/ambari/blob/7b1ac0f0/ambari-metrics/ambari-metrics-timelineservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/ApplicationHistoryServer.java
----------------------------------------------------------------------
diff --git a/ambari-metrics/ambari-metrics-timelineservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/ApplicationHistoryServer.java b/ambari-metrics/ambari-metrics-timelineservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/ApplicationHistoryServer.java
index d79ca68..11c162a 100644
--- a/ambari-metrics/ambari-metrics-timelineservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/ApplicationHistoryServer.java
+++ b/ambari-metrics/ambari-metrics-timelineservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/ApplicationHistoryServer.java
@@ -45,7 +45,7 @@ import org.apache.hadoop.yarn.webapp.WebApps;
 
 import com.google.common.annotations.VisibleForTesting;
 
-import static org.apache.hadoop.yarn.server.applicationhistoryservice.metrics.timeline.TimelineMetricConfiguration.*;
+import static org.apache.hadoop.yarn.server.applicationhistoryservice.metrics.timeline.TimelineMetricConfiguration.DISABLE_APPLICATION_TIMELINE_STORE;
 
 /**
  * History server that keeps track of all types of history in the cluster.

http://git-wip-us.apache.org/repos/asf/ambari/blob/7b1ac0f0/ambari-metrics/ambari-metrics-timelineservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/metrics/timeline/HBaseTimelineMetricStore.java
----------------------------------------------------------------------
diff --git a/ambari-metrics/ambari-metrics-timelineservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/metrics/timeline/HBaseTimelineMetricStore.java b/ambari-metrics/ambari-metrics-timelineservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/metrics/timeline/HBaseTimelineMetricStore.java
index 43cb554..f069ae0 100644
--- a/ambari-metrics/ambari-metrics-timelineservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/metrics/timeline/HBaseTimelineMetricStore.java
+++ b/ambari-metrics/ambari-metrics-timelineservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/metrics/timeline/HBaseTimelineMetricStore.java
@@ -40,6 +40,15 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.TreeMap;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.Executors;
+import java.util.concurrent.RejectedExecutionHandler;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.ScheduledFuture;
+import java.util.concurrent.ScheduledThreadPoolExecutor;
+import java.util.concurrent.ThreadFactory;
+import java.util.concurrent.ThreadPoolExecutor;
+import java.util.concurrent.TimeUnit;
 
 import static org.apache.hadoop.yarn.server.applicationhistoryservice.metrics.timeline.TimelineMetricConfiguration.USE_GROUPBY_AGGREGATOR_QUERIES;
 
@@ -49,6 +58,8 @@ public class HBaseTimelineMetricStore extends AbstractService implements Timelin
   private final TimelineMetricConfiguration configuration;
   private PhoenixHBaseAccessor hBaseAccessor;
   private static volatile boolean isInitialized = false;
+  private final ScheduledExecutorService executorService =
+    Executors.newSingleThreadScheduledExecutor();
 
   /**
    * Construct the service.
@@ -123,6 +134,13 @@ public class HBaseTimelineMetricStore extends AbstractService implements Timelin
         aggregatorDailyThread.start();
       }
 
+      int initDelay = configuration.getTimelineMetricsServiceWatcherInitDelay();
+      int delay = configuration.getTimelineMetricsServiceWatcherDelay();
+      // Start the watchdog
+      executorService.scheduleWithFixedDelay(
+        new TimelineMetricStoreWatcher(this, configuration), initDelay, delay,
+        TimeUnit.SECONDS);
+
       isInitialized = true;
     }
 

http://git-wip-us.apache.org/repos/asf/ambari/blob/7b1ac0f0/ambari-metrics/ambari-metrics-timelineservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/metrics/timeline/PhoenixHBaseAccessor.java
----------------------------------------------------------------------
diff --git a/ambari-metrics/ambari-metrics-timelineservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/metrics/timeline/PhoenixHBaseAccessor.java b/ambari-metrics/ambari-metrics-timelineservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/metrics/timeline/PhoenixHBaseAccessor.java
index 2073d75..cba971c 100644
--- a/ambari-metrics/ambari-metrics-timelineservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/metrics/timeline/PhoenixHBaseAccessor.java
+++ b/ambari-metrics/ambari-metrics-timelineservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/metrics/timeline/PhoenixHBaseAccessor.java
@@ -414,6 +414,7 @@ public class PhoenixHBaseAccessor {
         }
       }
 
+      // commit() blocked if HBase unavailable
       conn.commit();
 
     } finally {

http://git-wip-us.apache.org/repos/asf/ambari/blob/7b1ac0f0/ambari-metrics/ambari-metrics-timelineservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/metrics/timeline/TimelineMetricConfiguration.java
----------------------------------------------------------------------
diff --git a/ambari-metrics/ambari-metrics-timelineservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/metrics/timeline/TimelineMetricConfiguration.java b/ambari-metrics/ambari-metrics-timelineservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/metrics/timeline/TimelineMetricConfiguration.java
index 30e42f2..daab437 100644
--- a/ambari-metrics/ambari-metrics-timelineservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/metrics/timeline/TimelineMetricConfiguration.java
+++ b/ambari-metrics/ambari-metrics-timelineservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/metrics/timeline/TimelineMetricConfiguration.java
@@ -169,6 +169,18 @@ public class TimelineMetricConfiguration {
   public static final String HANDLER_THREAD_COUNT =
     "timeline.metrics.service.handler.thread.count";
 
+  public static final String WATCHER_INITIAL_DELAY =
+    "timeline.metrics.service.watcher.initial.delay";
+
+  public static final String WATCHER_DELAY =
+    "timeline.metrics.service.watcher.delay";
+
+  public static final String WATCHER_TIMEOUT =
+    "timeline.metrics.service.watcher.timeout";
+
+  public static final String WATCHER_MAX_FAILURES =
+    "timeline.metrics.service.watcher.max.failures";
+
   public static final String PRECISION_TABLE_SPLIT_POINTS =
     "timeline.metrics.host.aggregate.splitpoints";
 
@@ -237,6 +249,34 @@ public class TimelineMetricConfiguration {
     return 20;
   }
 
+  public int getTimelineMetricsServiceWatcherInitDelay() {
+    if (metricsConf != null) {
+      return Integer.parseInt(metricsConf.get(WATCHER_INITIAL_DELAY, "120"));
+    }
+    return 120;
+  }
+
+  public int getTimelineMetricsServiceWatcherDelay() {
+    if (metricsConf != null) {
+      return Integer.parseInt(metricsConf.get(WATCHER_DELAY, "30"));
+    }
+    return 30;
+  }
+
+  public int getTimelineMetricsServiceWatcherTimeout() {
+    if (metricsConf != null) {
+      return Integer.parseInt(metricsConf.get(WATCHER_TIMEOUT, "30"));
+    }
+    return 30;
+  }
+
+  public int getTimelineMetricsServiceWatcherMaxFailures() {
+    if (metricsConf != null) {
+      return Integer.parseInt(metricsConf.get(WATCHER_MAX_FAILURES, "3"));
+    }
+    return 3;
+  }
+
   public String getTimelineServiceRpcAddress() {
     String defaultRpcAddress = "0.0.0.0:60200";
     if (metricsConf != null) {

http://git-wip-us.apache.org/repos/asf/ambari/blob/7b1ac0f0/ambari-metrics/ambari-metrics-timelineservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/metrics/timeline/TimelineMetricStoreWatcher.java
----------------------------------------------------------------------
diff --git a/ambari-metrics/ambari-metrics-timelineservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/metrics/timeline/TimelineMetricStoreWatcher.java b/ambari-metrics/ambari-metrics-timelineservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/metrics/timeline/TimelineMetricStoreWatcher.java
new file mode 100644
index 0000000..363b43a
--- /dev/null
+++ b/ambari-metrics/ambari-metrics-timelineservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/metrics/timeline/TimelineMetricStoreWatcher.java
@@ -0,0 +1,130 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.applicationhistoryservice.metrics.timeline;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.metrics2.sink.timeline.Precision;
+import org.apache.hadoop.metrics2.sink.timeline.TimelineMetric;
+import org.apache.hadoop.metrics2.sink.timeline.TimelineMetrics;
+import org.apache.hadoop.util.ExitUtil;
+
+import java.util.Collections;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.TimeUnit;
+
+/**
+ * Acts as the single TimetineMetricStore Watcher.
+ */
+public class TimelineMetricStoreWatcher implements Runnable {
+
+  private static final Log LOG = LogFactory
+    .getLog(TimelineMetricStoreWatcher.class);
+  private static final String FAKE_METRIC_NAME = "TimelineMetricStoreWatcher.FakeMetric";
+  private static final String FAKE_HOSTNAME = "fakehostname";
+  private static final String FAKE_APP_ID = "timeline_metric_store_watcher";
+
+  private static int failures = 0;
+  private final TimelineMetricConfiguration configuration;
+
+  private TimelineMetricStore timelineMetricStore;
+
+  //used to call timelineMetricStore blocking methods with timeout
+  private ExecutorService executor = Executors.newSingleThreadExecutor();
+
+
+  public TimelineMetricStoreWatcher(TimelineMetricStore timelineMetricStore,
+                                    TimelineMetricConfiguration configuration) {
+    this.timelineMetricStore = timelineMetricStore;
+    this.configuration = configuration;
+  }
+
+  @Override
+  public void run() {
+
+    if (checkMetricStore()) {
+      failures = 0;
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("Successfully got metrics from TimelineMetricStore");
+      }
+    } else {
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("Failed to get metrics from TimelineMetricStore");
+      }
+      failures++;
+    }
+
+    if (failures >= configuration.getTimelineMetricsServiceWatcherMaxFailures()) {
+      String msg = "Error getting metrics from TimelineMetricStore. " +
+        "Shutting down by TimelineMetricStoreWatcher.";
+      LOG.fatal(msg);
+      ExitUtil.terminate(-1, msg);
+    }
+
+  }
+
+  /**
+   * Checks TimelineMetricStore functionality by adding and getting
+   * a fake metric to/from HBase
+   * @return if check was successful
+   */
+  private boolean checkMetricStore() {
+    final long startTime = System.currentTimeMillis();
+    final int delay = configuration.getTimelineMetricsServiceWatcherDelay();
+    final int timeout = configuration.getTimelineMetricsServiceWatcherTimeout();
+
+    TimelineMetric fakeMetric = new TimelineMetric();
+    fakeMetric.setMetricName(FAKE_METRIC_NAME);
+    fakeMetric.setHostName(FAKE_HOSTNAME);
+    fakeMetric.setAppId(FAKE_APP_ID);
+    fakeMetric.setStartTime(startTime);
+    fakeMetric.setTimestamp(startTime);
+    fakeMetric.getMetricValues().put(startTime, 0.0);
+
+    final TimelineMetrics metrics = new TimelineMetrics();
+    metrics.setMetrics(Collections.singletonList(fakeMetric));
+
+    Callable<TimelineMetric> task = new Callable<TimelineMetric>() {
+      public TimelineMetric call() throws Exception {
+        timelineMetricStore.putMetrics(metrics);
+        return timelineMetricStore.getTimelineMetric(
+          FAKE_METRIC_NAME, Collections.singletonList(FAKE_HOSTNAME),
+          FAKE_APP_ID, null, startTime - delay * 2 * 1000,
+          startTime + delay * 2 * 1000, Precision.SECONDS, 1);
+      }
+    };
+
+    Future<TimelineMetric> future = executor.submit(task);
+    TimelineMetric timelineMetric = null;
+    try {
+      timelineMetric = future.get(timeout, TimeUnit.SECONDS);
+    // Phoenix might throw RuntimeExeption's
+    } catch (Exception e) {
+      return false;
+    } finally {
+      future.cancel(true);
+    }
+
+    return timelineMetric != null;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/ambari/blob/7b1ac0f0/ambari-metrics/ambari-metrics-timelineservice/src/test/java/org/apache/hadoop/yarn/server/applicationhistoryservice/metrics/timeline/TimelineMetricStoreWatcherTest.java
----------------------------------------------------------------------
diff --git a/ambari-metrics/ambari-metrics-timelineservice/src/test/java/org/apache/hadoop/yarn/server/applicationhistoryservice/metrics/timeline/TimelineMetricStoreWatcherTest.java b/ambari-metrics/ambari-metrics-timelineservice/src/test/java/org/apache/hadoop/yarn/server/applicationhistoryservice/metrics/timeline/TimelineMetricStoreWatcherTest.java
new file mode 100644
index 0000000..277a98c
--- /dev/null
+++ b/ambari-metrics/ambari-metrics-timelineservice/src/test/java/org/apache/hadoop/yarn/server/applicationhistoryservice/metrics/timeline/TimelineMetricStoreWatcherTest.java
@@ -0,0 +1,107 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.yarn.server.applicationhistoryservice.metrics.timeline;
+
+import org.apache.hadoop.metrics2.sink.timeline.Precision;
+import org.apache.hadoop.metrics2.sink.timeline.TimelineMetric;
+import org.apache.hadoop.metrics2.sink.timeline.TimelineMetrics;
+import org.apache.hadoop.util.ExitUtil;
+import org.apache.hadoop.yarn.api.records.timeline.TimelinePutResponse;
+import org.easymock.EasyMock;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.powermock.core.classloader.annotations.PrepareForTest;
+import org.powermock.modules.junit4.PowerMockRunner;
+
+import java.util.List;
+
+import static org.easymock.EasyMock.anyObject;
+import static org.easymock.EasyMock.createNiceMock;
+import static org.easymock.EasyMock.eq;
+import static org.easymock.EasyMock.expect;
+import static org.easymock.EasyMock.expectLastCall;
+import static org.easymock.EasyMock.replay;
+import static org.easymock.EasyMock.verify;
+import static org.powermock.api.easymock.PowerMock.mockStatic;
+import static org.powermock.api.easymock.PowerMock.replayAll;
+import static org.powermock.api.easymock.PowerMock.verifyAll;
+
+@RunWith(PowerMockRunner.class)
+@PrepareForTest(ExitUtil.class)
+public class TimelineMetricStoreWatcherTest {
+
+  @Test
+  public void testRunPositive() throws Exception {
+    TimelineMetricStore metricStore = createNiceMock(TimelineMetricStore.class);
+
+    expect(metricStore.putMetrics(anyObject(TimelineMetrics.class)))
+      .andReturn(new TimelinePutResponse());
+
+    // metric found
+    expect(metricStore.getTimelineMetric(anyObject(String.class),
+      EasyMock.<List<String>> anyObject(), anyObject(String.class),
+      anyObject(String.class), anyObject(Long.class), anyObject(Long.class),
+      eq(Precision.SECONDS), eq(1)))
+      .andReturn(new TimelineMetric()).anyTimes();
+
+    mockStatic(ExitUtil.class);
+
+    replay(metricStore);
+
+    TimelineMetricStoreWatcher timelineMetricStoreWatcher =
+      new TimelineMetricStoreWatcher(metricStore, new TimelineMetricConfiguration());
+    timelineMetricStoreWatcher.run();
+    timelineMetricStoreWatcher.run();
+    timelineMetricStoreWatcher.run();
+    verify(metricStore);
+
+  }
+
+  @Test
+  public void testRunNegative() throws Exception {
+    TimelineMetricStore metricStore = createNiceMock(TimelineMetricStore.class);
+
+    expect(metricStore.putMetrics(anyObject(TimelineMetrics.class)))
+      .andReturn(new TimelinePutResponse());
+
+    // no metrics found
+    expect(metricStore.getTimelineMetric(anyObject(String.class),
+      EasyMock.<List<String>> anyObject(), anyObject(String.class),
+      anyObject(String.class), anyObject(Long.class), anyObject(Long.class),
+      eq(Precision.SECONDS), eq(1)))
+      .andReturn(null).anyTimes();
+
+    String msg = "Error getting metrics from TimelineMetricStore. " +
+      "Shutting down by TimelineMetricStoreWatcher.";
+    mockStatic(ExitUtil.class);
+    ExitUtil.terminate(-1, msg);
+    expectLastCall().anyTimes();
+
+    replayAll();
+
+    TimelineMetricStoreWatcher timelineMetricStoreWatcher =
+      new TimelineMetricStoreWatcher(metricStore, new TimelineMetricConfiguration());
+    timelineMetricStoreWatcher.run();
+    timelineMetricStoreWatcher.run();
+    timelineMetricStoreWatcher.run();
+
+    verifyAll();
+
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/ambari/blob/7b1ac0f0/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/configuration/ams-hbase-site.xml
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/configuration/ams-hbase-site.xml b/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/configuration/ams-hbase-site.xml
index 165b87e..093693f 100644
--- a/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/configuration/ams-hbase-site.xml
+++ b/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/configuration/ams-hbase-site.xml
@@ -265,7 +265,7 @@
   </property>
   <property>
     <name>zookeeper.session.timeout.localHBaseCluster</name>
-    <value>20000</value>
+va    <value>120000</value>
     <description>
       ZooKeeper session timeout in milliseconds for
       pseudo distributed mode.

http://git-wip-us.apache.org/repos/asf/ambari/blob/7b1ac0f0/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/configuration/ams-site.xml
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/configuration/ams-site.xml b/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/configuration/ams-site.xml
index 734bf69..9186c10 100644
--- a/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/configuration/ams-site.xml
+++ b/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/configuration/ams-site.xml
@@ -395,4 +395,30 @@
       The interval between two service metrics data exports.
     </description>
   </property>
+  <property>
+    <name>timeline.metrics.service.watcher.initial.delay</name>
+    <value>120</value>
+    <description>
+      The time to delay first watcher check execution
+      Default resolution is 2 minutes.
+    </description>
+  </property>
+  <property>
+    <name>timeline.metrics.service.watcher.delay</name>
+    <value>30</value>
+    <description>
+      The delay between the termination of one
+      watcher check execution and the commencement of the next
+      Default resolution is 30 seconds.
+    </description>
+  </property>
+  <property>
+    <name>timeline.metrics.service.watcher.timeout</name>
+    <value>30</value>
+    <description>
+      The maximum time to wait for a single watcher check execution
+      Default resolution is 30 seconds.
+    </description>
+  </property>
+
 </configuration>

http://git-wip-us.apache.org/repos/asf/ambari/blob/7b1ac0f0/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/package/scripts/ams_service.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/package/scripts/ams_service.py b/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/package/scripts/ams_service.py
index 1afe1d8..2b475dd 100644
--- a/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/package/scripts/ams_service.py
+++ b/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/package/scripts/ams_service.py
@@ -55,7 +55,8 @@ def ams_service(name, action):
       )
 
       if not params.is_hbase_distributed and os.path.exists(format("{zookeeper_data_dir}")):
-        Execute(format("{sudo} rm -rf {zookeeper_data_dir}/*")
+        Directory(params.zookeeper_data_dir,
+                  action='delete'
         )
 
       daemon_cmd = format("{cmd} start")

http://git-wip-us.apache.org/repos/asf/ambari/blob/7b1ac0f0/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/package/scripts/metrics_collector.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/package/scripts/metrics_collector.py b/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/package/scripts/metrics_collector.py
index 00e4123..dcb8ed4 100644
--- a/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/package/scripts/metrics_collector.py
+++ b/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/package/scripts/metrics_collector.py
@@ -42,6 +42,8 @@ class AmsCollector(Script):
 
   def start(self, env):
     self.configure(env, action = 'start') # for security
+    # stop hanging components before start
+    ams_service('collector', action = 'stop')
     ams_service('collector', action = 'start')
 
   def stop(self, env):

http://git-wip-us.apache.org/repos/asf/ambari/blob/7b1ac0f0/ambari-server/src/test/python/stacks/2.0.6/AMBARI_METRICS/test_metrics_collector.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/test/python/stacks/2.0.6/AMBARI_METRICS/test_metrics_collector.py b/ambari-server/src/test/python/stacks/2.0.6/AMBARI_METRICS/test_metrics_collector.py
index 7add394..f7e44b6 100644
--- a/ambari-server/src/test/python/stacks/2.0.6/AMBARI_METRICS/test_metrics_collector.py
+++ b/ambari-server/src/test/python/stacks/2.0.6/AMBARI_METRICS/test_metrics_collector.py
@@ -20,13 +20,13 @@ limitations under the License.
 from mock.mock import MagicMock, patch
 from stacks.utils.RMFTestCase import *
 
-
+@patch("os.path.exists", new = MagicMock(return_value=True))
 @patch("platform.linux_distribution", new = MagicMock(return_value="Linux"))
 class TestOozieClient(RMFTestCase):
   COMMON_SERVICES_PACKAGE_DIR = "AMBARI_METRICS/0.1.0/package"
   STACK_VERSION = "2.0.6"
 
-  def test_start_default(self):
+  def test_start_default_distributed(self):
     self.executeScript(self.COMMON_SERVICES_PACKAGE_DIR + "/scripts/metrics_collector.py",
                        classname = "AmsCollector",
                        command = "start",
@@ -35,9 +35,36 @@ class TestOozieClient(RMFTestCase):
                        target = RMFTestCase.TARGET_COMMON_SERVICES
     )
     self.maxDiff=None
-    self.assert_hbase_configure('master')
-    self.assert_hbase_configure('regionserver')
-    self.assert_ams('collector')
+    self.assert_hbase_configure('master', distributed=True)
+    self.assert_hbase_configure('regionserver', distributed=True)
+    self.assert_ams('collector', distributed=True)
+    self.assertResourceCalled('Execute', '/usr/lib/ams-hbase/bin/hbase-daemon.sh --config /etc/ams-hbase/conf stop zookeeper',
+                              on_timeout = 'ls /var/run/ambari-metrics-collector//hbase-ams-zookeeper.pid >/dev/null 2>&1 && ps `cat /var/run/ambari-metrics-collector//hbase-ams-zookeeper.pid` >/dev/null 2>&1 && kill -9 `cat /var/run/ambari-metrics-collector//hbase-ams-zookeeper.pid`',
+                              timeout = 30,
+                              user = 'ams'
+    )
+    self.assertResourceCalled('File', '/var/run/ambari-metrics-collector//hbase-ams-zookeeper.pid',
+                              action = ['delete']
+    )
+    self.assertResourceCalled('Execute', '/usr/lib/ams-hbase/bin/hbase-daemon.sh --config /etc/ams-hbase/conf stop master',
+                              on_timeout = 'ls /var/run/ambari-metrics-collector//hbase-ams-master.pid >/dev/null 2>&1 && ps `cat /var/run/ambari-metrics-collector//hbase-ams-master.pid` >/dev/null 2>&1 && kill -9 `cat /var/run/ambari-metrics-collector//hbase-ams-master.pid`',
+                              timeout = 30,
+                              user = 'ams'
+    )
+    self.assertResourceCalled('File', '/var/run/ambari-metrics-collector//hbase-ams-master.pid',
+                              action = ['delete']
+    )
+    self.assertResourceCalled('Execute', '/usr/lib/ams-hbase/bin/hbase-daemon.sh --config /etc/ams-hbase/conf stop regionserver',
+                              on_timeout = 'ls /var/run/ambari-metrics-collector//hbase-ams-regionserver.pid >/dev/null 2>&1 && ps `cat /var/run/ambari-metrics-collector//hbase-ams-regionserver.pid` >/dev/null 2>&1 && kill -9 `cat /var/run/ambari-metrics-collector//hbase-ams-regionserver.pid`',
+                              timeout = 30,
+                              user = 'ams'
+    )
+    self.assertResourceCalled('File', '/var/run/ambari-metrics-collector//hbase-ams-regionserver.pid',
+                              action = ['delete']
+    )
+    self.assertResourceCalled('Execute', '/usr/sbin/ambari-metrics-collector --config /etc/ambari-metrics-collector/conf --distributed stop',
+                              user = 'ams'
+    )
     self.assertResourceCalled('Execute', '/usr/lib/ams-hbase/bin/hbase-daemon.sh --config /etc/ams-hbase/conf start zookeeper',
                               not_if = 'ls /var/run/ambari-metrics-collector//hbase-ams-zookeeper.pid >/dev/null 2>&1 && ps `cat /var/run/ambari-metrics-collector//hbase-ams-zookeeper.pid` >/dev/null 2>&1',
                               user = 'ams'
@@ -57,7 +84,32 @@ class TestOozieClient(RMFTestCase):
     )
     self.assertNoMoreResources()
 
-  def assert_ams(self, name=None):
+  def test_start_default_embedded(self):
+    self.executeScript(self.COMMON_SERVICES_PACKAGE_DIR + "/scripts/metrics_collector.py",
+                       classname = "AmsCollector",
+                       command = "start",
+                       config_file="default_ams_embedded.json",
+                       hdp_stack_version = self.STACK_VERSION,
+                       target = RMFTestCase.TARGET_COMMON_SERVICES
+    )
+    self.maxDiff=None
+    self.assert_hbase_configure('master')
+    self.assert_hbase_configure('regionserver')
+    self.assert_ams('collector')
+    self.assertResourceCalled('Execute', '/usr/sbin/ambari-metrics-collector --config /etc/ambari-metrics-collector/conf stop',
+                              user = 'ams'
+    )
+    self.assertResourceCalled('Execute', 'ambari-sudo.sh rm -rf /var/lib/ambari-metrics-collector/hbase-tmp/*.tmp',
+    )
+    self.assertResourceCalled('Directory', '/var/lib/ambari-metrics-collector/hbase-tmp/zookeeper',
+                              action = ['delete']
+    )
+    self.assertResourceCalled('Execute', '/usr/sbin/ambari-metrics-collector --config /etc/ambari-metrics-collector/conf start',
+                              user = 'ams'
+    )
+    self.assertNoMoreResources()
+
+  def assert_ams(self, name=None, distributed=False):
     self.assertResourceCalled('Directory', '/etc/ambari-metrics-collector/conf',
                               owner = 'ams',
                               group = 'hadoop',
@@ -125,40 +177,41 @@ class TestOozieClient(RMFTestCase):
                               mode=0644,
                               content=Template("ams.conf.j2")
     )
-    self.assertResourceCalled('XmlConfig', 'hdfs-site.xml',
-                              owner = 'ams',
-                              group = 'hadoop',
-                              mode=0644,
-                              conf_dir = '/etc/ambari-metrics-collector/conf',
-                              configurations = self.getConfig()['configurations']['hdfs-site'],
-                              configuration_attributes = self.getConfig()['configuration_attributes']['hdfs-site']
-    )
-    self.assertResourceCalled('XmlConfig', 'hdfs-site.xml',
-                              owner = 'ams',
-                              group = 'hadoop',
-                              mode=0644,
-                              conf_dir = '/etc/ams-hbase/conf',
-                              configurations = self.getConfig()['configurations']['hdfs-site'],
-                              configuration_attributes = self.getConfig()['configuration_attributes']['hdfs-site']
-    )
-    self.assertResourceCalled('XmlConfig', 'core-site.xml',
-                              owner = 'ams',
-                              group = 'hadoop',
-                              mode=0644,
-                              conf_dir = '/etc/ambari-metrics-collector/conf',
-                              configurations = self.getConfig()['configurations']['core-site'],
-                              configuration_attributes = self.getConfig()['configuration_attributes']['core-site']
-    )
-    self.assertResourceCalled('XmlConfig', 'core-site.xml',
-                              owner = 'ams',
-                              group = 'hadoop',
-                              mode=0644,
-                              conf_dir = '/etc/ams-hbase/conf',
-                              configurations = self.getConfig()['configurations']['core-site'],
-                              configuration_attributes = self.getConfig()['configuration_attributes']['core-site']
-    )
+    if distributed:
+      self.assertResourceCalled('XmlConfig', 'hdfs-site.xml',
+                                owner = 'ams',
+                                group = 'hadoop',
+                                mode=0644,
+                                conf_dir = '/etc/ambari-metrics-collector/conf',
+                                configurations = self.getConfig()['configurations']['hdfs-site'],
+                                configuration_attributes = self.getConfig()['configuration_attributes']['hdfs-site']
+      )
+      self.assertResourceCalled('XmlConfig', 'hdfs-site.xml',
+                                owner = 'ams',
+                                group = 'hadoop',
+                                mode=0644,
+                                conf_dir = '/etc/ams-hbase/conf',
+                                configurations = self.getConfig()['configurations']['hdfs-site'],
+                                configuration_attributes = self.getConfig()['configuration_attributes']['hdfs-site']
+      )
+      self.assertResourceCalled('XmlConfig', 'core-site.xml',
+                                owner = 'ams',
+                                group = 'hadoop',
+                                mode=0644,
+                                conf_dir = '/etc/ambari-metrics-collector/conf',
+                                configurations = self.getConfig()['configurations']['core-site'],
+                                configuration_attributes = self.getConfig()['configuration_attributes']['core-site']
+      )
+      self.assertResourceCalled('XmlConfig', 'core-site.xml',
+                                owner = 'ams',
+                                group = 'hadoop',
+                                mode=0644,
+                                conf_dir = '/etc/ams-hbase/conf',
+                                configurations = self.getConfig()['configurations']['core-site'],
+                                configuration_attributes = self.getConfig()['configuration_attributes']['core-site']
+      )
 
-  def assert_hbase_configure(self, name=None):
+  def assert_hbase_configure(self, name=None, distributed=False):
     self.assertResourceCalled('Directory', '/etc/ams-hbase/conf',
                               owner = 'ams',
                               group = 'hadoop',
@@ -183,13 +236,6 @@ class TestOozieClient(RMFTestCase):
                               configurations = self.getConfig()['configurations']['ams-hbase-site'],
                               configuration_attributes = self.getConfig()['configuration_attributes']['ams-hbase-site']
                               )
-    self.assertResourceCalled('Directory', '/var/lib/ambari-metrics-collector/hbase-tmp/phoenix-spool',
-                              owner = 'ams',
-                              cd_access = 'a',
-                              group = 'hadoop',
-                              mode = 0755,
-                              recursive = True
-    )
     self.assertResourceCalled('XmlConfig', 'hbase-policy.xml',
                               owner = 'ams',
                               group = 'hadoop',
@@ -220,50 +266,61 @@ class TestOozieClient(RMFTestCase):
     )
 
     if name == 'master':
-      self.assertResourceCalled('HdfsResource', 'hdfs://localhost:8020/apps/hbase/data',
-                                security_enabled = False,
-                                hadoop_bin_dir = '/usr/bin',
-                                keytab = UnknownConfigurationMock(),
-                                kinit_path_local = '/usr/bin/kinit',
-                                user = 'hdfs',
-                                owner = 'ams',
-                                mode = 0775,
-                                hadoop_conf_dir = '/etc/hadoop/conf',
-                                type = 'directory',
-                                action = ['create_on_execute'],
-                                hdfs_site=self.getConfig()['configurations']['hdfs-site'],
-                                principal_name=UnknownConfigurationMock(),
-                                default_fs='hdfs://c6401.ambari.apache.org:8020',
-                                )
-      self.assertResourceCalled('HdfsResource', '/amshbase/staging',
-                                security_enabled = False,
-                                hadoop_bin_dir = '/usr/bin',
-                                keytab = UnknownConfigurationMock(),
-                                kinit_path_local = '/usr/bin/kinit',
-                                user = 'hdfs',
-                                owner = 'ams',
-                                mode = 0711,
-                                hadoop_conf_dir = '/etc/hadoop/conf',
-                                type = 'directory',
-                                action = ['create_on_execute'],
-                                hdfs_site=self.getConfig()['configurations']['hdfs-site'],
-                                principal_name=UnknownConfigurationMock(),
-                                default_fs='hdfs://c6401.ambari.apache.org:8020',
-                                )
-      self.assertResourceCalled('HdfsResource', None,
-                                security_enabled = False,
-                                hadoop_bin_dir = '/usr/bin',
-                                keytab = UnknownConfigurationMock(),
-                                kinit_path_local = '/usr/bin/kinit',
-                                user = 'hdfs',
-                                hadoop_conf_dir = '/etc/hadoop/conf',
-                                action = ['execute'],
-                                hdfs_site=self.getConfig()['configurations']['hdfs-site'],
-                                principal_name=UnknownConfigurationMock(),
-                                default_fs='hdfs://c6401.ambari.apache.org:8020',
-                                )
-      self.assertResourceCalled('File', '/var/run/ambari-metrics-collector//distributed_mode', action=["create"],
-                                mode=0644, owner='ams')
+      if distributed:
+        self.assertResourceCalled('HdfsResource', 'hdfs://localhost:8020/apps/hbase/data',
+                                  security_enabled = False,
+                                  hadoop_bin_dir = '/usr/bin',
+                                  keytab = UnknownConfigurationMock(),
+                                  kinit_path_local = '/usr/bin/kinit',
+                                  user = 'hdfs',
+                                  owner = 'ams',
+                                  mode = 0775,
+                                  hadoop_conf_dir = '/etc/hadoop/conf',
+                                  type = 'directory',
+                                  action = ['create_on_execute'],
+                                  hdfs_site=self.getConfig()['configurations']['hdfs-site'],
+                                  principal_name=UnknownConfigurationMock(),
+                                  default_fs='hdfs://c6401.ambari.apache.org:8020',
+                                  )
+        self.assertResourceCalled('HdfsResource', '/amshbase/staging',
+                                  security_enabled = False,
+                                  hadoop_bin_dir = '/usr/bin',
+                                  keytab = UnknownConfigurationMock(),
+                                  kinit_path_local = '/usr/bin/kinit',
+                                  user = 'hdfs',
+                                  owner = 'ams',
+                                  mode = 0711,
+                                  hadoop_conf_dir = '/etc/hadoop/conf',
+                                  type = 'directory',
+                                  action = ['create_on_execute'],
+                                  hdfs_site=self.getConfig()['configurations']['hdfs-site'],
+                                  principal_name=UnknownConfigurationMock(),
+                                  default_fs='hdfs://c6401.ambari.apache.org:8020',
+                                  )
+        self.assertResourceCalled('HdfsResource', None,
+                                  security_enabled = False,
+                                  hadoop_bin_dir = '/usr/bin',
+                                  keytab = UnknownConfigurationMock(),
+                                  kinit_path_local = '/usr/bin/kinit',
+                                  user = 'hdfs',
+                                  hadoop_conf_dir = '/etc/hadoop/conf',
+                                  action = ['execute'],
+                                  hdfs_site=self.getConfig()['configurations']['hdfs-site'],
+                                  principal_name=UnknownConfigurationMock(),
+                                  default_fs='hdfs://c6401.ambari.apache.org:8020',
+                                  )
+        self.assertResourceCalled('File', '/var/run/ambari-metrics-collector//distributed_mode', action=["create"],
+                                  mode=0644, owner='ams')
+      else:
+        self.assertResourceCalled('Directory', '/var/lib/ambari-metrics-collector/hbase',
+                                  owner = 'ams',
+                                  cd_access="a",
+                                  recursive = True
+        )
+        self.assertResourceCalled('File', '/var/run/ambari-metrics-collector//distributed_mode',
+                                  owner = 'ams',
+                                  action = ['delete']
+        )
     self.assertResourceCalled('File', '/etc/ams-hbase/conf/log4j.properties',
                               owner = 'ams',
                               group = 'hadoop',

http://git-wip-us.apache.org/repos/asf/ambari/blob/7b1ac0f0/ambari-server/src/test/python/stacks/2.0.6/configs/default.json
----------------------------------------------------------------------
diff --git a/ambari-server/src/test/python/stacks/2.0.6/configs/default.json b/ambari-server/src/test/python/stacks/2.0.6/configs/default.json
index 2f75bcf..bb32d78 100644
--- a/ambari-server/src/test/python/stacks/2.0.6/configs/default.json
+++ b/ambari-server/src/test/python/stacks/2.0.6/configs/default.json
@@ -787,7 +787,7 @@
             "hbase.regionserver.thread.compaction.large": "2",
             "phoenix.query.timeoutMs": "1200000",
             "hbase.local.dir": "${hbase.tmp.dir}/local",
-            "hbase.cluster.distributed": "false",
+            "hbase.cluster.distributed": "true",
             "zookeeper.session.timeout.localHBaseCluster": "20000",
             "hbase.client.scanner.caching": "10000",
             "phoenix.sequence.saltBuckets": "2",