You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by sw...@apache.org on 2015/08/25 22:40:05 UTC

ambari git commit: AMBARI-12859. AMS cache could result in OOM in ambari server. (swagle)

Repository: ambari
Updated Branches:
  refs/heads/branch-2.1 52e2c20dc -> 6e13ad525


AMBARI-12859. AMS cache could result in OOM in ambari server. (swagle)


Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/6e13ad52
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/6e13ad52
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/6e13ad52

Branch: refs/heads/branch-2.1
Commit: 6e13ad5258b3496259594cc09d9598533d0a0cee
Parents: 52e2c20
Author: Siddharth Wagle <sw...@hortonworks.com>
Authored: Tue Aug 25 13:36:53 2015 -0700
Committer: Siddharth Wagle <sw...@hortonworks.com>
Committed: Tue Aug 25 13:37:06 2015 -0700

----------------------------------------------------------------------
 ambari-server/pom.xml                           | 24 ++++++++++++++++
 .../server/configuration/Configuration.java     | 20 ++++++++++++--
 .../timeline/AMSReportPropertyProvider.java     |  1 -
 .../timeline/cache/TimelineMetricCache.java     | 11 +++++++-
 .../cache/TimelineMetricCacheEntryFactory.java  |  8 ++++--
 .../cache/TimelineMetricCacheProvider.java      | 29 ++++++++++++++------
 .../hadoop-metrics2-hbase.properties.j2         |  8 +++---
 .../timeline/cache/TimelineMetricCacheTest.java | 15 +++++++---
 8 files changed, 94 insertions(+), 22 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/ambari/blob/6e13ad52/ambari-server/pom.xml
----------------------------------------------------------------------
diff --git a/ambari-server/pom.xml b/ambari-server/pom.xml
index da4977c..3866924 100644
--- a/ambari-server/pom.xml
+++ b/ambari-server/pom.xml
@@ -1647,11 +1647,23 @@
       <groupId>org.apache.directory.server</groupId>
       <artifactId>apacheds-server-annotations</artifactId>
       <scope>test</scope>
+      <exclusions>
+        <exclusion>
+          <groupId>net.sf.ehcache</groupId>
+          <artifactId>ehcache-core</artifactId>
+        </exclusion>
+      </exclusions>
     </dependency>
     <dependency>
       <groupId>org.apache.directory.server</groupId>
       <artifactId>apacheds-core-integ</artifactId>
       <scope>test</scope>
+      <exclusions>
+        <exclusion>
+          <groupId>net.sf.ehcache</groupId>
+          <artifactId>ehcache-core</artifactId>
+        </exclusion>
+      </exclusions>
     </dependency>
     <dependency>
       <groupId>org.apache.directory.server</groupId>
@@ -1666,11 +1678,23 @@
     <dependency>
       <groupId>org.apache.directory.server</groupId>
       <artifactId>apacheds-kerberos-codec</artifactId>
+      <exclusions>
+        <exclusion>
+          <groupId>net.sf.ehcache</groupId>
+          <artifactId>ehcache-core</artifactId>
+        </exclusion>
+      </exclusions>
     </dependency>
     <dependency>
       <groupId>org.apache.directory.server</groupId>
       <artifactId>apacheds-core</artifactId>
       <scope>test</scope>
+      <exclusions>
+        <exclusion>
+          <groupId>net.sf.ehcache</groupId>
+          <artifactId>ehcache-core</artifactId>
+        </exclusion>
+      </exclusions>
     </dependency>
     <dependency>
       <groupId>org.apache.directory.server</groupId>

http://git-wip-us.apache.org/repos/asf/ambari/blob/6e13ad52/ambari-server/src/main/java/org/apache/ambari/server/configuration/Configuration.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/java/org/apache/ambari/server/configuration/Configuration.java b/ambari-server/src/main/java/org/apache/ambari/server/configuration/Configuration.java
index 808cca4..0b0ee95 100644
--- a/ambari-server/src/main/java/org/apache/ambari/server/configuration/Configuration.java
+++ b/ambari-server/src/main/java/org/apache/ambari/server/configuration/Configuration.java
@@ -427,6 +427,8 @@ public class Configuration {
   private static final String DEFAULT_TIMELINE_METRICS_REQUEST_CONNECT_TIMEOUT = "5000";
   private static final String TIMELINE_METRICS_REQUEST_CATCHUP_INTERVAL = "server.timeline.metrics.cache.catchup.interval";
   private static final String DEFAULT_TIMELINE_METRICS_REQUEST_CATCHUP_INTERVAL = "300000";
+  private static final String TIMELINE_METRICS_CACHE_HEAP_PERCENT = "server.timeline.metrics.cache.heap.percent";
+  private static final String DEFAULT_TIMELINE_METRICS_CACHE_HEAP_PERCENT = "15%";
 
   /**
    * The full path to the XML file that describes the different alert templates.
@@ -1552,7 +1554,7 @@ public class Configuration {
   public int getHttpSessionInactiveTimeout() {
     return Integer.parseInt(properties.getProperty(
       SERVER_HTTP_SESSION_INACTIVE_TIMEOUT,
-        "1800"));
+      "1800"));
   }
 
   /**
@@ -1822,7 +1824,9 @@ public class Configuration {
 
   /**
    * Max allowed entries in metrics cache.
+   * @deprecated Ehcache only supports either a max heap bytes or entries.
    */
+  @Deprecated
   public int getMetricCacheMaxEntries() {
     return Integer.parseInt(properties.getProperty(TIMELINE_METRICS_CACHE_MAX_ENTRIES,
       DEFAULT_TIMELINE_METRICS_CACHE_MAX_ENTRIES));
@@ -1887,6 +1891,18 @@ public class Configuration {
    */
   public Long getMetricRequestBufferTimeCatchupInterval() {
     return Long.parseLong(properties.getProperty(TIMELINE_METRICS_REQUEST_CATCHUP_INTERVAL,
-        DEFAULT_TIMELINE_METRICS_REQUEST_CATCHUP_INTERVAL));
+      DEFAULT_TIMELINE_METRICS_REQUEST_CATCHUP_INTERVAL));
+  }
+
+  /**
+   * Percentage of total heap allocated to metrics cache, default is 15%.
+   * Default heap setting for the server is 2 GB so max allocated heap size
+   * for this cache is 300 MB.
+   */
+  public String getMetricsCacheManagerHeapPercent() {
+    String percent = properties.getProperty(TIMELINE_METRICS_CACHE_HEAP_PERCENT,
+      DEFAULT_TIMELINE_METRICS_CACHE_HEAP_PERCENT);
+
+    return percent.trim().endsWith("%") ? percent.trim() : percent.trim() + "%";
   }
 }

http://git-wip-us.apache.org/repos/asf/ambari/blob/6e13ad52/ambari-server/src/main/java/org/apache/ambari/server/controller/metrics/timeline/AMSReportPropertyProvider.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/java/org/apache/ambari/server/controller/metrics/timeline/AMSReportPropertyProvider.java b/ambari-server/src/main/java/org/apache/ambari/server/controller/metrics/timeline/AMSReportPropertyProvider.java
index 0605123..85ea575 100644
--- a/ambari-server/src/main/java/org/apache/ambari/server/controller/metrics/timeline/AMSReportPropertyProvider.java
+++ b/ambari-server/src/main/java/org/apache/ambari/server/controller/metrics/timeline/AMSReportPropertyProvider.java
@@ -42,7 +42,6 @@ import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Map;
 import java.util.Set;
-import java.util.TreeMap;
 import java.util.concurrent.atomic.AtomicInteger;
 
 import static org.apache.ambari.server.controller.metrics.MetricsPaddingMethod.ZERO_PADDING_PARAM;

http://git-wip-us.apache.org/repos/asf/ambari/blob/6e13ad52/ambari-server/src/main/java/org/apache/ambari/server/controller/metrics/timeline/cache/TimelineMetricCache.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/java/org/apache/ambari/server/controller/metrics/timeline/cache/TimelineMetricCache.java b/ambari-server/src/main/java/org/apache/ambari/server/controller/metrics/timeline/cache/TimelineMetricCache.java
index 47b2c4a..9e343e3 100644
--- a/ambari-server/src/main/java/org/apache/ambari/server/controller/metrics/timeline/cache/TimelineMetricCache.java
+++ b/ambari-server/src/main/java/org/apache/ambari/server/controller/metrics/timeline/cache/TimelineMetricCache.java
@@ -23,6 +23,7 @@ import net.sf.ehcache.Element;
 import net.sf.ehcache.constructs.blocking.LockTimeoutException;
 import net.sf.ehcache.constructs.blocking.UpdatingCacheEntryFactory;
 import net.sf.ehcache.constructs.blocking.UpdatingSelfPopulatingCache;
+import net.sf.ehcache.statistics.StatisticsGateway;
 import org.apache.ambari.server.AmbariException;
 import org.apache.hadoop.metrics2.sink.timeline.TimelineMetric;
 import org.apache.hadoop.metrics2.sink.timeline.TimelineMetrics;
@@ -74,7 +75,15 @@ public class TimelineMetricCache extends UpdatingSelfPopulatingCache {
     if (LOG.isDebugEnabled()) {
       // Print stats every 100 calls - Note: Supported in debug mode only
       if (printCacheStatsCounter.getAndIncrement() == 0) {
-        LOG.debug("Metrics cache stats => \n" + this.getStatistics());
+        StatisticsGateway statistics = this.getStatistics();
+        LOG.debug("Metrics cache stats => \n" +
+          ", Evictions = " + statistics.cacheEvictedCount() +
+          ", Expired = " + statistics.cacheExpiredCount() +
+          ", Hits = " + statistics.cacheHitCount() +
+          ", Misses = " + statistics.cacheMissCount() +
+          ", Hit ratio = " + statistics.cacheHitRatio() +
+          ", Puts = " + statistics.cachePutCount() +
+          ", Size in MB = " + (statistics.getLocalHeapSizeInBytes() / 1048576));
       } else {
         printCacheStatsCounter.compareAndSet(100, 0);
       }

http://git-wip-us.apache.org/repos/asf/ambari/blob/6e13ad52/ambari-server/src/main/java/org/apache/ambari/server/controller/metrics/timeline/cache/TimelineMetricCacheEntryFactory.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/java/org/apache/ambari/server/controller/metrics/timeline/cache/TimelineMetricCacheEntryFactory.java b/ambari-server/src/main/java/org/apache/ambari/server/controller/metrics/timeline/cache/TimelineMetricCacheEntryFactory.java
index 597f037..b7b081d 100644
--- a/ambari-server/src/main/java/org/apache/ambari/server/controller/metrics/timeline/cache/TimelineMetricCacheEntryFactory.java
+++ b/ambari-server/src/main/java/org/apache/ambari/server/controller/metrics/timeline/cache/TimelineMetricCacheEntryFactory.java
@@ -81,8 +81,12 @@ public class TimelineMetricCacheEntryFactory implements UpdatingCacheEntryFactor
     LOG.debug("Creating cache entry since none exists, key = " + key);
     TimelineAppMetricCacheKey metricCacheKey = (TimelineAppMetricCacheKey) key;
 
-    TimelineMetrics timelineMetrics =
-      requestHelperForGets.fetchTimelineMetrics(metricCacheKey.getSpec());
+    TimelineMetrics timelineMetrics = null;
+    try {
+      timelineMetrics = requestHelperForGets.fetchTimelineMetrics(metricCacheKey.getSpec());
+    } catch (IOException io) {
+      LOG.debug("Caught IOException on fetching metrics. " + io.getMessage());
+    }
 
     TimelineMetricsCacheValue value = null;
 

http://git-wip-us.apache.org/repos/asf/ambari/blob/6e13ad52/ambari-server/src/main/java/org/apache/ambari/server/controller/metrics/timeline/cache/TimelineMetricCacheProvider.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/java/org/apache/ambari/server/controller/metrics/timeline/cache/TimelineMetricCacheProvider.java b/ambari-server/src/main/java/org/apache/ambari/server/controller/metrics/timeline/cache/TimelineMetricCacheProvider.java
index 8df957e..6d80687 100644
--- a/ambari-server/src/main/java/org/apache/ambari/server/controller/metrics/timeline/cache/TimelineMetricCacheProvider.java
+++ b/ambari-server/src/main/java/org/apache/ambari/server/controller/metrics/timeline/cache/TimelineMetricCacheProvider.java
@@ -22,11 +22,15 @@ import com.google.inject.Singleton;
 import net.sf.ehcache.Cache;
 import net.sf.ehcache.CacheManager;
 import net.sf.ehcache.config.CacheConfiguration;
+import net.sf.ehcache.config.PersistenceConfiguration;
 import net.sf.ehcache.store.MemoryStoreEvictionPolicy;
 import org.apache.ambari.server.configuration.Configuration;
+import org.apache.commons.lang.StringUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import static net.sf.ehcache.config.PersistenceConfiguration.*;
+
 /**
  * Cache implementation that provides ability to perform incremental reads
  * from Metrics backend and reduce the amount of calls between Ambari and the
@@ -56,26 +60,35 @@ public class TimelineMetricCacheProvider {
       return;
     }
 
-    //Create a singleton CacheManager using defaults
     System.setProperty("net.sf.ehcache.skipUpdateCheck", "true");
-    CacheManager manager = CacheManager.getInstance();
+    net.sf.ehcache.config.Configuration managerConfig =
+      new net.sf.ehcache.config.Configuration();
+
+    // Set max heap available to the cache manager
+    managerConfig.setMaxBytesLocalHeap(configuration.getMetricsCacheManagerHeapPercent());
+
+    //Create a singleton CacheManager using defaults
+    CacheManager manager = CacheManager.create(managerConfig);
 
     LOG.info("Creating Metrics Cache with timeouts => ttl = " +
       configuration.getMetricCacheTTLSeconds() + ", idle = " +
       configuration.getMetricCacheIdleSeconds());
 
+    PersistenceConfiguration persistenceConfiguration = new PersistenceConfiguration();
+    persistenceConfiguration.setStrategy(Strategy.NONE.name());
+
     //Create a Cache specifying its configuration.
-    Cache cache = new Cache(
-      new CacheConfiguration(TIMELINE_METRIC_CACHE_INSTANCE_NAME, configuration.getMetricCacheMaxEntries())
+    CacheConfiguration cacheConfiguration = new CacheConfiguration()
+        .name(TIMELINE_METRIC_CACHE_INSTANCE_NAME)
         .timeToLiveSeconds(configuration.getMetricCacheTTLSeconds()) // 1 hour
         .timeToIdleSeconds(configuration.getMetricCacheIdleSeconds()) // 5 minutes
         .memoryStoreEvictionPolicy(MemoryStoreEvictionPolicy.LRU)
         .eternal(false)
-        .diskPersistent(false)
-        .overflowToDisk(false)
-        .statistics(LOG.isDebugEnabled() || LOG.isTraceEnabled())
-    );
+        .persistence(persistenceConfiguration);
+
+    Cache cache = new Cache(cacheConfiguration);
 
+    // Decorate with UpdatingSelfPopulatingCache
     timelineMetricsCache = new TimelineMetricCache(cache, cacheEntryFactory);
 
     LOG.info("Registering metrics cache with provider: name = " +

http://git-wip-us.apache.org/repos/asf/ambari/blob/6e13ad52/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/package/templates/hadoop-metrics2-hbase.properties.j2
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/package/templates/hadoop-metrics2-hbase.properties.j2 b/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/package/templates/hadoop-metrics2-hbase.properties.j2
index 5a4dd16..3f404eb 100644
--- a/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/package/templates/hadoop-metrics2-hbase.properties.j2
+++ b/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/package/templates/hadoop-metrics2-hbase.properties.j2
@@ -39,21 +39,21 @@
 hbase.extendedperiod = 3600
 
 hbase.class=org.apache.hadoop.metrics2.sink.timeline.HadoopTimelineMetricsSink
-hbase.period=10
+hbase.period=30
 hbase.collector={{ams_collector_host_single}}:{{metric_collector_port}}
 
 jvm.class=org.apache.hadoop.metrics2.sink.timeline.HadoopTimelineMetricsSink
-jvm.period=10
+jvm.period=30
 jvm.collector={{ams_collector_host_single}}:{{metric_collector_port}}
 
 rpc.class=org.apache.hadoop.metrics2.sink.timeline.HadoopTimelineMetricsSink
-rpc.period=10
+rpc.period=30
 rpc.collector={{ams_collector_host_single}}:{{metric_collector_port}}
 
 *.timeline.plugin.urls=file:///usr/lib/ambari-metrics-hadoop-sink/ambari-metrics-hadoop-sink.jar
 *.sink.timeline.slave.host.name={{hostname}}
 hbase.sink.timeline.class=org.apache.hadoop.metrics2.sink.timeline.HadoopTimelineMetricsSink
-hbase.sink.timeline.period=10
+hbase.sink.timeline.period=60
 hbase.sink.timeline.collector={{ams_collector_host_single}}:{{metric_collector_port}}
 hbase.sink.timeline.serviceName-prefix=ams
 

http://git-wip-us.apache.org/repos/asf/ambari/blob/6e13ad52/ambari-server/src/test/java/org/apache/ambari/server/controller/metrics/timeline/cache/TimelineMetricCacheTest.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/test/java/org/apache/ambari/server/controller/metrics/timeline/cache/TimelineMetricCacheTest.java b/ambari-server/src/test/java/org/apache/ambari/server/controller/metrics/timeline/cache/TimelineMetricCacheTest.java
index b16024b..3432e1d 100644
--- a/ambari-server/src/test/java/org/apache/ambari/server/controller/metrics/timeline/cache/TimelineMetricCacheTest.java
+++ b/ambari-server/src/test/java/org/apache/ambari/server/controller/metrics/timeline/cache/TimelineMetricCacheTest.java
@@ -102,8 +102,13 @@ public class TimelineMetricCacheTest {
 
     replay(cacheEntryFactory);
 
-    CacheManager manager = CacheManager.getInstance();
-    Cache cache = new Cache("test", 10, false, false, 10000, 10000);
+    // Need to set this due what seems like a bug in Ehcache 2.10.0, setting
+    // it on the second cache instance results in a assertion error.
+    // Since this is not out production use case, setting it here as well.
+    net.sf.ehcache.config.Configuration managerConfig = new net.sf.ehcache.config.Configuration();
+    managerConfig.setMaxBytesLocalHeap("10%");
+    CacheManager manager = CacheManager.create(managerConfig);
+    Cache cache = new Cache("test", 0, false, false, 10000, 10000);
     UpdatingSelfPopulatingCache testCache = new UpdatingSelfPopulatingCache(cache, cacheEntryFactory);
     manager.addCache(testCache);
 
@@ -116,9 +121,11 @@ public class TimelineMetricCacheTest {
   @Test
   public void testTimlineMetricCacheProviderGets() throws Exception {
     Configuration configuration = createNiceMock(Configuration.class);
-    expect(configuration.getMetricCacheMaxEntries()).andReturn(1000);
     expect(configuration.getMetricCacheTTLSeconds()).andReturn(3600);
     expect(configuration.getMetricCacheIdleSeconds()).andReturn(100);
+    expect(configuration.getMetricsCacheManagerHeapPercent()).andReturn("10%");
+
+    replay(configuration);
 
     final long now = System.currentTimeMillis();
     Map<String, TimelineMetric> valueMap = new HashMap<String, TimelineMetric>();
@@ -150,7 +157,7 @@ public class TimelineMetricCacheTest {
     cacheEntryFactory.updateEntryValue(testKey, value);
     expectLastCall().once();
 
-    replay(configuration, cacheEntryFactory);
+    replay(cacheEntryFactory);
 
     TimelineMetricCacheProvider cacheProvider = getMetricCacheProvider(configuration, cacheEntryFactory);
     TimelineMetricCache cache = cacheProvider.getTimelineMetricsCache();