You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ambari.apache.org by sw...@apache.org on 2015/08/25 22:40:05 UTC
ambari git commit: AMBARI-12859. AMS cache could result in OOM in
ambari server. (swagle)
Repository: ambari
Updated Branches:
refs/heads/branch-2.1 52e2c20dc -> 6e13ad525
AMBARI-12859. AMS cache could result in OOM in ambari server. (swagle)
Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/6e13ad52
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/6e13ad52
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/6e13ad52
Branch: refs/heads/branch-2.1
Commit: 6e13ad5258b3496259594cc09d9598533d0a0cee
Parents: 52e2c20
Author: Siddharth Wagle <sw...@hortonworks.com>
Authored: Tue Aug 25 13:36:53 2015 -0700
Committer: Siddharth Wagle <sw...@hortonworks.com>
Committed: Tue Aug 25 13:37:06 2015 -0700
----------------------------------------------------------------------
ambari-server/pom.xml | 24 ++++++++++++++++
.../server/configuration/Configuration.java | 20 ++++++++++++--
.../timeline/AMSReportPropertyProvider.java | 1 -
.../timeline/cache/TimelineMetricCache.java | 11 +++++++-
.../cache/TimelineMetricCacheEntryFactory.java | 8 ++++--
.../cache/TimelineMetricCacheProvider.java | 29 ++++++++++++++------
.../hadoop-metrics2-hbase.properties.j2 | 8 +++---
.../timeline/cache/TimelineMetricCacheTest.java | 15 +++++++---
8 files changed, 94 insertions(+), 22 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/ambari/blob/6e13ad52/ambari-server/pom.xml
----------------------------------------------------------------------
diff --git a/ambari-server/pom.xml b/ambari-server/pom.xml
index da4977c..3866924 100644
--- a/ambari-server/pom.xml
+++ b/ambari-server/pom.xml
@@ -1647,11 +1647,23 @@
<groupId>org.apache.directory.server</groupId>
<artifactId>apacheds-server-annotations</artifactId>
<scope>test</scope>
+ <exclusions>
+ <exclusion>
+ <groupId>net.sf.ehcache</groupId>
+ <artifactId>ehcache-core</artifactId>
+ </exclusion>
+ </exclusions>
</dependency>
<dependency>
<groupId>org.apache.directory.server</groupId>
<artifactId>apacheds-core-integ</artifactId>
<scope>test</scope>
+ <exclusions>
+ <exclusion>
+ <groupId>net.sf.ehcache</groupId>
+ <artifactId>ehcache-core</artifactId>
+ </exclusion>
+ </exclusions>
</dependency>
<dependency>
<groupId>org.apache.directory.server</groupId>
@@ -1666,11 +1678,23 @@
<dependency>
<groupId>org.apache.directory.server</groupId>
<artifactId>apacheds-kerberos-codec</artifactId>
+ <exclusions>
+ <exclusion>
+ <groupId>net.sf.ehcache</groupId>
+ <artifactId>ehcache-core</artifactId>
+ </exclusion>
+ </exclusions>
</dependency>
<dependency>
<groupId>org.apache.directory.server</groupId>
<artifactId>apacheds-core</artifactId>
<scope>test</scope>
+ <exclusions>
+ <exclusion>
+ <groupId>net.sf.ehcache</groupId>
+ <artifactId>ehcache-core</artifactId>
+ </exclusion>
+ </exclusions>
</dependency>
<dependency>
<groupId>org.apache.directory.server</groupId>
http://git-wip-us.apache.org/repos/asf/ambari/blob/6e13ad52/ambari-server/src/main/java/org/apache/ambari/server/configuration/Configuration.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/java/org/apache/ambari/server/configuration/Configuration.java b/ambari-server/src/main/java/org/apache/ambari/server/configuration/Configuration.java
index 808cca4..0b0ee95 100644
--- a/ambari-server/src/main/java/org/apache/ambari/server/configuration/Configuration.java
+++ b/ambari-server/src/main/java/org/apache/ambari/server/configuration/Configuration.java
@@ -427,6 +427,8 @@ public class Configuration {
private static final String DEFAULT_TIMELINE_METRICS_REQUEST_CONNECT_TIMEOUT = "5000";
private static final String TIMELINE_METRICS_REQUEST_CATCHUP_INTERVAL = "server.timeline.metrics.cache.catchup.interval";
private static final String DEFAULT_TIMELINE_METRICS_REQUEST_CATCHUP_INTERVAL = "300000";
+ private static final String TIMELINE_METRICS_CACHE_HEAP_PERCENT = "server.timeline.metrics.cache.heap.percent";
+ private static final String DEFAULT_TIMELINE_METRICS_CACHE_HEAP_PERCENT = "15%";
/**
* The full path to the XML file that describes the different alert templates.
@@ -1552,7 +1554,7 @@ public class Configuration {
public int getHttpSessionInactiveTimeout() {
return Integer.parseInt(properties.getProperty(
SERVER_HTTP_SESSION_INACTIVE_TIMEOUT,
- "1800"));
+ "1800"));
}
/**
@@ -1822,7 +1824,9 @@ public class Configuration {
/**
* Max allowed entries in metrics cache.
+ * @deprecated Ehcache only supports either a max heap bytes or entries.
*/
+ @Deprecated
public int getMetricCacheMaxEntries() {
return Integer.parseInt(properties.getProperty(TIMELINE_METRICS_CACHE_MAX_ENTRIES,
DEFAULT_TIMELINE_METRICS_CACHE_MAX_ENTRIES));
@@ -1887,6 +1891,18 @@ public class Configuration {
*/
public Long getMetricRequestBufferTimeCatchupInterval() {
return Long.parseLong(properties.getProperty(TIMELINE_METRICS_REQUEST_CATCHUP_INTERVAL,
- DEFAULT_TIMELINE_METRICS_REQUEST_CATCHUP_INTERVAL));
+ DEFAULT_TIMELINE_METRICS_REQUEST_CATCHUP_INTERVAL));
+ }
+
+ /**
+ * Percentage of total heap allocated to metrics cache, default is 15%.
+ * Default heap setting for the server is 2 GB so max allocated heap size
+ * for this cache is 300 MB.
+ */
+ public String getMetricsCacheManagerHeapPercent() {
+ String percent = properties.getProperty(TIMELINE_METRICS_CACHE_HEAP_PERCENT,
+ DEFAULT_TIMELINE_METRICS_CACHE_HEAP_PERCENT);
+
+ return percent.trim().endsWith("%") ? percent.trim() : percent.trim() + "%";
}
}
http://git-wip-us.apache.org/repos/asf/ambari/blob/6e13ad52/ambari-server/src/main/java/org/apache/ambari/server/controller/metrics/timeline/AMSReportPropertyProvider.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/java/org/apache/ambari/server/controller/metrics/timeline/AMSReportPropertyProvider.java b/ambari-server/src/main/java/org/apache/ambari/server/controller/metrics/timeline/AMSReportPropertyProvider.java
index 0605123..85ea575 100644
--- a/ambari-server/src/main/java/org/apache/ambari/server/controller/metrics/timeline/AMSReportPropertyProvider.java
+++ b/ambari-server/src/main/java/org/apache/ambari/server/controller/metrics/timeline/AMSReportPropertyProvider.java
@@ -42,7 +42,6 @@ import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
-import java.util.TreeMap;
import java.util.concurrent.atomic.AtomicInteger;
import static org.apache.ambari.server.controller.metrics.MetricsPaddingMethod.ZERO_PADDING_PARAM;
http://git-wip-us.apache.org/repos/asf/ambari/blob/6e13ad52/ambari-server/src/main/java/org/apache/ambari/server/controller/metrics/timeline/cache/TimelineMetricCache.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/java/org/apache/ambari/server/controller/metrics/timeline/cache/TimelineMetricCache.java b/ambari-server/src/main/java/org/apache/ambari/server/controller/metrics/timeline/cache/TimelineMetricCache.java
index 47b2c4a..9e343e3 100644
--- a/ambari-server/src/main/java/org/apache/ambari/server/controller/metrics/timeline/cache/TimelineMetricCache.java
+++ b/ambari-server/src/main/java/org/apache/ambari/server/controller/metrics/timeline/cache/TimelineMetricCache.java
@@ -23,6 +23,7 @@ import net.sf.ehcache.Element;
import net.sf.ehcache.constructs.blocking.LockTimeoutException;
import net.sf.ehcache.constructs.blocking.UpdatingCacheEntryFactory;
import net.sf.ehcache.constructs.blocking.UpdatingSelfPopulatingCache;
+import net.sf.ehcache.statistics.StatisticsGateway;
import org.apache.ambari.server.AmbariException;
import org.apache.hadoop.metrics2.sink.timeline.TimelineMetric;
import org.apache.hadoop.metrics2.sink.timeline.TimelineMetrics;
@@ -74,7 +75,15 @@ public class TimelineMetricCache extends UpdatingSelfPopulatingCache {
if (LOG.isDebugEnabled()) {
// Print stats every 100 calls - Note: Supported in debug mode only
if (printCacheStatsCounter.getAndIncrement() == 0) {
- LOG.debug("Metrics cache stats => \n" + this.getStatistics());
+ StatisticsGateway statistics = this.getStatistics();
+ LOG.debug("Metrics cache stats => \n" +
+ ", Evictions = " + statistics.cacheEvictedCount() +
+ ", Expired = " + statistics.cacheExpiredCount() +
+ ", Hits = " + statistics.cacheHitCount() +
+ ", Misses = " + statistics.cacheMissCount() +
+ ", Hit ratio = " + statistics.cacheHitRatio() +
+ ", Puts = " + statistics.cachePutCount() +
+ ", Size in MB = " + (statistics.getLocalHeapSizeInBytes() / 1048576));
} else {
printCacheStatsCounter.compareAndSet(100, 0);
}
http://git-wip-us.apache.org/repos/asf/ambari/blob/6e13ad52/ambari-server/src/main/java/org/apache/ambari/server/controller/metrics/timeline/cache/TimelineMetricCacheEntryFactory.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/java/org/apache/ambari/server/controller/metrics/timeline/cache/TimelineMetricCacheEntryFactory.java b/ambari-server/src/main/java/org/apache/ambari/server/controller/metrics/timeline/cache/TimelineMetricCacheEntryFactory.java
index 597f037..b7b081d 100644
--- a/ambari-server/src/main/java/org/apache/ambari/server/controller/metrics/timeline/cache/TimelineMetricCacheEntryFactory.java
+++ b/ambari-server/src/main/java/org/apache/ambari/server/controller/metrics/timeline/cache/TimelineMetricCacheEntryFactory.java
@@ -81,8 +81,12 @@ public class TimelineMetricCacheEntryFactory implements UpdatingCacheEntryFactor
LOG.debug("Creating cache entry since none exists, key = " + key);
TimelineAppMetricCacheKey metricCacheKey = (TimelineAppMetricCacheKey) key;
- TimelineMetrics timelineMetrics =
- requestHelperForGets.fetchTimelineMetrics(metricCacheKey.getSpec());
+ TimelineMetrics timelineMetrics = null;
+ try {
+ timelineMetrics = requestHelperForGets.fetchTimelineMetrics(metricCacheKey.getSpec());
+ } catch (IOException io) {
+ LOG.debug("Caught IOException on fetching metrics. " + io.getMessage());
+ }
TimelineMetricsCacheValue value = null;
http://git-wip-us.apache.org/repos/asf/ambari/blob/6e13ad52/ambari-server/src/main/java/org/apache/ambari/server/controller/metrics/timeline/cache/TimelineMetricCacheProvider.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/java/org/apache/ambari/server/controller/metrics/timeline/cache/TimelineMetricCacheProvider.java b/ambari-server/src/main/java/org/apache/ambari/server/controller/metrics/timeline/cache/TimelineMetricCacheProvider.java
index 8df957e..6d80687 100644
--- a/ambari-server/src/main/java/org/apache/ambari/server/controller/metrics/timeline/cache/TimelineMetricCacheProvider.java
+++ b/ambari-server/src/main/java/org/apache/ambari/server/controller/metrics/timeline/cache/TimelineMetricCacheProvider.java
@@ -22,11 +22,15 @@ import com.google.inject.Singleton;
import net.sf.ehcache.Cache;
import net.sf.ehcache.CacheManager;
import net.sf.ehcache.config.CacheConfiguration;
+import net.sf.ehcache.config.PersistenceConfiguration;
import net.sf.ehcache.store.MemoryStoreEvictionPolicy;
import org.apache.ambari.server.configuration.Configuration;
+import org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import static net.sf.ehcache.config.PersistenceConfiguration.*;
+
/**
* Cache implementation that provides ability to perform incremental reads
* from Metrics backend and reduce the amount of calls between Ambari and the
@@ -56,26 +60,35 @@ public class TimelineMetricCacheProvider {
return;
}
- //Create a singleton CacheManager using defaults
System.setProperty("net.sf.ehcache.skipUpdateCheck", "true");
- CacheManager manager = CacheManager.getInstance();
+ net.sf.ehcache.config.Configuration managerConfig =
+ new net.sf.ehcache.config.Configuration();
+
+ // Set max heap available to the cache manager
+ managerConfig.setMaxBytesLocalHeap(configuration.getMetricsCacheManagerHeapPercent());
+
+ //Create a singleton CacheManager using defaults
+ CacheManager manager = CacheManager.create(managerConfig);
LOG.info("Creating Metrics Cache with timeouts => ttl = " +
configuration.getMetricCacheTTLSeconds() + ", idle = " +
configuration.getMetricCacheIdleSeconds());
+ PersistenceConfiguration persistenceConfiguration = new PersistenceConfiguration();
+ persistenceConfiguration.setStrategy(Strategy.NONE.name());
+
//Create a Cache specifying its configuration.
- Cache cache = new Cache(
- new CacheConfiguration(TIMELINE_METRIC_CACHE_INSTANCE_NAME, configuration.getMetricCacheMaxEntries())
+ CacheConfiguration cacheConfiguration = new CacheConfiguration()
+ .name(TIMELINE_METRIC_CACHE_INSTANCE_NAME)
.timeToLiveSeconds(configuration.getMetricCacheTTLSeconds()) // 1 hour
.timeToIdleSeconds(configuration.getMetricCacheIdleSeconds()) // 5 minutes
.memoryStoreEvictionPolicy(MemoryStoreEvictionPolicy.LRU)
.eternal(false)
- .diskPersistent(false)
- .overflowToDisk(false)
- .statistics(LOG.isDebugEnabled() || LOG.isTraceEnabled())
- );
+ .persistence(persistenceConfiguration);
+
+ Cache cache = new Cache(cacheConfiguration);
+ // Decorate with UpdatingSelfPopulatingCache
timelineMetricsCache = new TimelineMetricCache(cache, cacheEntryFactory);
LOG.info("Registering metrics cache with provider: name = " +
http://git-wip-us.apache.org/repos/asf/ambari/blob/6e13ad52/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/package/templates/hadoop-metrics2-hbase.properties.j2
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/package/templates/hadoop-metrics2-hbase.properties.j2 b/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/package/templates/hadoop-metrics2-hbase.properties.j2
index 5a4dd16..3f404eb 100644
--- a/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/package/templates/hadoop-metrics2-hbase.properties.j2
+++ b/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/package/templates/hadoop-metrics2-hbase.properties.j2
@@ -39,21 +39,21 @@
hbase.extendedperiod = 3600
hbase.class=org.apache.hadoop.metrics2.sink.timeline.HadoopTimelineMetricsSink
-hbase.period=10
+hbase.period=30
hbase.collector={{ams_collector_host_single}}:{{metric_collector_port}}
jvm.class=org.apache.hadoop.metrics2.sink.timeline.HadoopTimelineMetricsSink
-jvm.period=10
+jvm.period=30
jvm.collector={{ams_collector_host_single}}:{{metric_collector_port}}
rpc.class=org.apache.hadoop.metrics2.sink.timeline.HadoopTimelineMetricsSink
-rpc.period=10
+rpc.period=30
rpc.collector={{ams_collector_host_single}}:{{metric_collector_port}}
*.timeline.plugin.urls=file:///usr/lib/ambari-metrics-hadoop-sink/ambari-metrics-hadoop-sink.jar
*.sink.timeline.slave.host.name={{hostname}}
hbase.sink.timeline.class=org.apache.hadoop.metrics2.sink.timeline.HadoopTimelineMetricsSink
-hbase.sink.timeline.period=10
+hbase.sink.timeline.period=60
hbase.sink.timeline.collector={{ams_collector_host_single}}:{{metric_collector_port}}
hbase.sink.timeline.serviceName-prefix=ams
http://git-wip-us.apache.org/repos/asf/ambari/blob/6e13ad52/ambari-server/src/test/java/org/apache/ambari/server/controller/metrics/timeline/cache/TimelineMetricCacheTest.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/test/java/org/apache/ambari/server/controller/metrics/timeline/cache/TimelineMetricCacheTest.java b/ambari-server/src/test/java/org/apache/ambari/server/controller/metrics/timeline/cache/TimelineMetricCacheTest.java
index b16024b..3432e1d 100644
--- a/ambari-server/src/test/java/org/apache/ambari/server/controller/metrics/timeline/cache/TimelineMetricCacheTest.java
+++ b/ambari-server/src/test/java/org/apache/ambari/server/controller/metrics/timeline/cache/TimelineMetricCacheTest.java
@@ -102,8 +102,13 @@ public class TimelineMetricCacheTest {
replay(cacheEntryFactory);
- CacheManager manager = CacheManager.getInstance();
- Cache cache = new Cache("test", 10, false, false, 10000, 10000);
+ // Need to set this due what seems like a bug in Ehcache 2.10.0, setting
+ // it on the second cache instance results in a assertion error.
+ // Since this is not out production use case, setting it here as well.
+ net.sf.ehcache.config.Configuration managerConfig = new net.sf.ehcache.config.Configuration();
+ managerConfig.setMaxBytesLocalHeap("10%");
+ CacheManager manager = CacheManager.create(managerConfig);
+ Cache cache = new Cache("test", 0, false, false, 10000, 10000);
UpdatingSelfPopulatingCache testCache = new UpdatingSelfPopulatingCache(cache, cacheEntryFactory);
manager.addCache(testCache);
@@ -116,9 +121,11 @@ public class TimelineMetricCacheTest {
@Test
public void testTimlineMetricCacheProviderGets() throws Exception {
Configuration configuration = createNiceMock(Configuration.class);
- expect(configuration.getMetricCacheMaxEntries()).andReturn(1000);
expect(configuration.getMetricCacheTTLSeconds()).andReturn(3600);
expect(configuration.getMetricCacheIdleSeconds()).andReturn(100);
+ expect(configuration.getMetricsCacheManagerHeapPercent()).andReturn("10%");
+
+ replay(configuration);
final long now = System.currentTimeMillis();
Map<String, TimelineMetric> valueMap = new HashMap<String, TimelineMetric>();
@@ -150,7 +157,7 @@ public class TimelineMetricCacheTest {
cacheEntryFactory.updateEntryValue(testKey, value);
expectLastCall().once();
- replay(configuration, cacheEntryFactory);
+ replay(cacheEntryFactory);
TimelineMetricCacheProvider cacheProvider = getMetricCacheProvider(configuration, cacheEntryFactory);
TimelineMetricCache cache = cacheProvider.getTimelineMetricsCache();