You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@solr.apache.org by ja...@apache.org on 2023/09/22 08:34:05 UTC
[solr] branch branch_9x updated: SOLR-16968: The MemoryCircuitBreaker now uses average heap usage (#1905)
This is an automated email from the ASF dual-hosted git repository.
janhoy pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/solr.git
The following commit(s) were added to refs/heads/branch_9x by this push:
new 210b10b55b9 SOLR-16968: The MemoryCircuitBreaker now uses average heap usage (#1905)
210b10b55b9 is described below
commit 210b10b55b9177c56e3d2c17c641932925dbd01f
Author: Jan Høydahl <ja...@apache.org>
AuthorDate: Fri Sep 22 10:05:15 2023 +0200
SOLR-16968: The MemoryCircuitBreaker now uses average heap usage (#1905)
(cherry picked from commit 8ca4a5d657f04d5467d42334d5ed7c9efac2b9ea)
---
solr/CHANGES.txt | 2 +
.../src/java/org/apache/solr/core/SolrCore.java | 17 ++++-
.../circuitbreaker/AveragingMetricProvider.java | 80 ++++++++++++++++++++++
.../solr/util/circuitbreaker/CircuitBreaker.java | 9 ++-
.../util/circuitbreaker/CircuitBreakerManager.java | 14 ++++
.../circuitbreaker/CircuitBreakerRegistry.java | 75 +++++++++++++++++---
.../util/circuitbreaker/MemoryCircuitBreaker.java | 70 +++++++++++++------
.../apache/solr/util/BaseTestCircuitBreaker.java | 31 ++++++---
8 files changed, 253 insertions(+), 45 deletions(-)
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 870b1bfdccb..c4eb19c71e1 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -69,6 +69,8 @@ Improvements
* SOLR-16970: SOLR_OPTS is now able to override options set by the Solr control scripts, "bin/solr" and "bin/solr.cmd". (Houston Putman)
+* SOLR-16968: The MemoryCircuitBreaker now uses average heap usage over the last 30 seconds (janhoy, Christine Poerschke)
+
* SOLR-14886: Suppress stack traces in query response (Isabelle Giguere via Alex Deparvu)
* SOLR-16461: `/solr/coreName/replication?command=backup` now has a v2 equivalent, available at
diff --git a/solr/core/src/java/org/apache/solr/core/SolrCore.java b/solr/core/src/java/org/apache/solr/core/SolrCore.java
index 944d4684016..f5e28b34aa0 100644
--- a/solr/core/src/java/org/apache/solr/core/SolrCore.java
+++ b/solr/core/src/java/org/apache/solr/core/SolrCore.java
@@ -1088,9 +1088,6 @@ public class SolrCore implements SolrInfoBean, Closeable {
solrMetricsContext = coreMetricManager.getSolrMetricsContext();
this.coreMetricManager.loadReporters();
- // init pluggable circuit breakers
- initPlugins(null, CircuitBreaker.class);
-
if (updateHandler == null) {
directoryFactory = initDirectoryFactory();
recoveryStrategyBuilder = initRecoveryStrategyBuilder();
@@ -1115,6 +1112,9 @@ public class SolrCore implements SolrInfoBean, Closeable {
// initialize core metrics
initializeMetrics(solrMetricsContext, null);
+ // init pluggable circuit breakers, after metrics because some circuit breakers use metrics
+ initPlugins(null, CircuitBreaker.class);
+
SolrFieldCacheBean solrFieldCacheBean = new SolrFieldCacheBean();
// this is registered at the CONTAINER level because it's not core-specific - for now we
// also register it here for back-compat
@@ -1764,6 +1764,17 @@ public class SolrCore implements SolrInfoBean, Closeable {
ExecutorUtil.shutdownAndAwaitTermination(coreAsyncTaskExecutor);
+ // Close circuit breakers that may have background threads, before metrics because some circuit
+ // breakers use metrics
+ try {
+ getCircuitBreakerRegistry().close();
+ } catch (Throwable e) {
+ log.error("Exception closing circuit breakers", e);
+ if (e instanceof Error) {
+ throw (Error) e;
+ }
+ }
+
// stop reporting metrics
try {
coreMetricManager.close();
diff --git a/solr/core/src/java/org/apache/solr/util/circuitbreaker/AveragingMetricProvider.java b/solr/core/src/java/org/apache/solr/util/circuitbreaker/AveragingMetricProvider.java
new file mode 100644
index 00000000000..60161e98181
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/util/circuitbreaker/AveragingMetricProvider.java
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.util.circuitbreaker;
+
+import com.google.common.util.concurrent.AtomicDouble;
+import java.io.Closeable;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+import org.apache.solr.common.util.ExecutorUtil;
+import org.apache.solr.common.util.SolrNamedThreadFactory;
+import org.apache.solr.logging.CircularList;
+
+/** Averages the metric value over a period of time */
+public class AveragingMetricProvider implements Closeable {
+ private final CircularList<Double> samplesRingBuffer;
+ private ScheduledExecutorService executor;
+ private final AtomicDouble currentAverageValue = new AtomicDouble(-1);
+
+ /**
+ * Creates an instance with an executor that runs every sampleInterval seconds and averages over
+ * numSamples samples.
+ *
+ * @param metricProvider metric provider that will provide a value
+ * @param numSamples number of samples to calculate average for
+ * @param sampleInterval interval between each sample
+ */
+ public AveragingMetricProvider(
+ MetricProvider metricProvider, int numSamples, long sampleInterval) {
+ this.samplesRingBuffer = new CircularList<>(numSamples);
+ executor =
+ Executors.newSingleThreadScheduledExecutor(
+ new SolrNamedThreadFactory(
+ "AveragingMetricProvider-" + metricProvider.getClass().getSimpleName()));
+ executor.scheduleWithFixedDelay(
+ () -> {
+ samplesRingBuffer.add(metricProvider.getMetricValue());
+ currentAverageValue.set(
+ samplesRingBuffer.toList().stream()
+ .mapToDouble(Double::doubleValue)
+ .average()
+ .orElse(-1));
+ },
+ 0,
+ sampleInterval,
+ TimeUnit.SECONDS);
+ }
+
+ /**
+ * Return current average. This is a cached value, so calling this method will not incur any
+ * calculations
+ */
+ public double getMetricValue() {
+ return currentAverageValue.get();
+ }
+
+ @Override
+ public void close() {
+ ExecutorUtil.shutdownAndAwaitTermination(executor);
+ }
+
+ /** Interface to provide the metric value. */
+ public interface MetricProvider {
+ double getMetricValue();
+ }
+}
diff --git a/solr/core/src/java/org/apache/solr/util/circuitbreaker/CircuitBreaker.java b/solr/core/src/java/org/apache/solr/util/circuitbreaker/CircuitBreaker.java
index 4b9d24bb694..78841cceaf7 100644
--- a/solr/core/src/java/org/apache/solr/util/circuitbreaker/CircuitBreaker.java
+++ b/solr/core/src/java/org/apache/solr/util/circuitbreaker/CircuitBreaker.java
@@ -17,6 +17,8 @@
package org.apache.solr.util.circuitbreaker;
+import java.io.Closeable;
+import java.io.IOException;
import java.util.List;
import java.util.Locale;
import java.util.Set;
@@ -41,7 +43,7 @@ import org.apache.solr.util.plugin.NamedListInitializedPlugin;
*
* @lucene.experimental
*/
-public abstract class CircuitBreaker implements NamedListInitializedPlugin {
+public abstract class CircuitBreaker implements NamedListInitializedPlugin, Closeable {
// Only query requests are checked by default
private Set<SolrRequestType> requestTypes = Set.of(SolrRequestType.QUERY);
private final List<SolrRequestType> SUPPORTED_TYPES =
@@ -60,6 +62,11 @@ public abstract class CircuitBreaker implements NamedListInitializedPlugin {
/** Get error message when the circuit breaker triggers */
public abstract String getErrorMessage();
+ @Override
+ public void close() throws IOException {
+ // Nothing to do by default
+ }
+
/**
* Set the request types for which this circuit breaker should be checked. If not called, the
* circuit breaker will be checked for the {@link SolrRequestType#QUERY} request type only.
diff --git a/solr/core/src/java/org/apache/solr/util/circuitbreaker/CircuitBreakerManager.java b/solr/core/src/java/org/apache/solr/util/circuitbreaker/CircuitBreakerManager.java
index 02e3c7af676..3ca0c760a86 100644
--- a/solr/core/src/java/org/apache/solr/util/circuitbreaker/CircuitBreakerManager.java
+++ b/solr/core/src/java/org/apache/solr/util/circuitbreaker/CircuitBreakerManager.java
@@ -17,6 +17,7 @@
package org.apache.solr.util.circuitbreaker;
+import java.io.IOException;
import java.lang.invoke.MethodHandles;
import org.apache.solr.common.util.NamedList;
import org.slf4j.Logger;
@@ -77,6 +78,19 @@ public class CircuitBreakerManager extends CircuitBreaker {
}
}
+ @Override
+ public void close() throws IOException {
+ try {
+ if (memEnabled) {
+ memCB.close();
+ }
+ } finally {
+ if (cpuEnabled) {
+ cpuCB.close();
+ }
+ }
+ }
+
// The methods below will be called by super class during init
public void setMemEnabled(String enabled) {
this.memEnabled = Boolean.getBoolean(enabled);
diff --git a/solr/core/src/java/org/apache/solr/util/circuitbreaker/CircuitBreakerRegistry.java b/solr/core/src/java/org/apache/solr/util/circuitbreaker/CircuitBreakerRegistry.java
index 84c2f61fb9b..a7081df96f6 100644
--- a/solr/core/src/java/org/apache/solr/util/circuitbreaker/CircuitBreakerRegistry.java
+++ b/solr/core/src/java/org/apache/solr/util/circuitbreaker/CircuitBreakerRegistry.java
@@ -18,12 +18,19 @@
package org.apache.solr.util.circuitbreaker;
import com.google.common.annotations.VisibleForTesting;
+import java.io.Closeable;
+import java.io.IOException;
+import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
+import java.util.Locale;
import java.util.Map;
+import java.util.concurrent.atomic.AtomicInteger;
import org.apache.solr.client.solrj.SolrRequest.SolrRequestType;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
/**
* Keeps track of all registered circuit breaker instances for various request types. Responsible
@@ -32,26 +39,35 @@ import org.apache.solr.client.solrj.SolrRequest.SolrRequestType;
* @lucene.experimental
* @since 9.4
*/
-public class CircuitBreakerRegistry {
+public class CircuitBreakerRegistry implements Closeable {
+ private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private final Map<SolrRequestType, List<CircuitBreaker>> circuitBreakerMap = new HashMap<>();
public CircuitBreakerRegistry() {}
public void register(CircuitBreaker circuitBreaker) {
- circuitBreaker
- .getRequestTypes()
- .forEach(
- r -> {
- List<CircuitBreaker> list =
- circuitBreakerMap.computeIfAbsent(r, k -> new ArrayList<>());
- list.add(circuitBreaker);
- });
+ synchronized (circuitBreakerMap) {
+ circuitBreaker
+ .getRequestTypes()
+ .forEach(
+ r -> {
+ List<CircuitBreaker> list =
+ circuitBreakerMap.computeIfAbsent(r, k -> new ArrayList<>());
+ list.add(circuitBreaker);
+ if (log.isInfoEnabled()) {
+ log.info(
+ "Registered circuit breaker {} for request type(s) {}",
+ circuitBreaker.getClass().getSimpleName(),
+ r);
+ }
+ });
+ }
}
@VisibleForTesting
- public void deregisterAll() {
- circuitBreakerMap.clear();
+ public void deregisterAll() throws IOException {
+ this.close();
}
/**
@@ -97,4 +113,41 @@ public class CircuitBreakerRegistry {
public boolean isEnabled(SolrRequestType requestType) {
return circuitBreakerMap.containsKey(requestType);
}
+
+ @Override
+ public void close() throws IOException {
+ synchronized (circuitBreakerMap) {
+ final AtomicInteger closeFailedCounter = new AtomicInteger(0);
+ circuitBreakerMap
+ .values()
+ .forEach(
+ list ->
+ list.forEach(
+ it -> {
+ try {
+ if (log.isDebugEnabled()) {
+ log.debug(
+ "Closed circuit breaker {} for request type(s) {}",
+ it.getClass().getSimpleName(),
+ it.getRequestTypes());
+ }
+ it.close();
+ } catch (IOException e) {
+ if (log.isErrorEnabled()) {
+ log.error(
+ String.format(
+ Locale.ROOT,
+ "Failed to close circuit breaker %s",
+ it.getClass().getSimpleName()),
+ e);
+ }
+ closeFailedCounter.incrementAndGet();
+ }
+ }));
+ circuitBreakerMap.clear();
+ if (closeFailedCounter.get() > 0) {
+ throw new IOException("Failed to close " + closeFailedCounter.get() + " circuit breakers");
+ }
+ }
+ }
}
diff --git a/solr/core/src/java/org/apache/solr/util/circuitbreaker/MemoryCircuitBreaker.java b/solr/core/src/java/org/apache/solr/util/circuitbreaker/MemoryCircuitBreaker.java
index 3004d732e4d..4a3eb3f5b9f 100644
--- a/solr/core/src/java/org/apache/solr/util/circuitbreaker/MemoryCircuitBreaker.java
+++ b/solr/core/src/java/org/apache/solr/util/circuitbreaker/MemoryCircuitBreaker.java
@@ -17,32 +17,64 @@
package org.apache.solr.util.circuitbreaker;
+import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.lang.management.ManagementFactory;
import java.lang.management.MemoryMXBean;
+import org.apache.solr.util.RefCounted;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
- * Tracks the current JVM heap usage and triggers if it exceeds the defined percentage of the
- * maximum heap size allocated to the JVM. This circuit breaker is a part of the default
- * CircuitBreakerRegistry so is checked for every request -- hence it is realtime. Once the memory
- * usage goes below the threshold, it will start allowing queries again.
+ * Tracks the current JVM heap usage and triggers if a moving heap usage average over 30 seconds
+ * exceeds the defined percentage of the maximum heap size allocated to the JVM. Once the average
+ * memory usage goes below the threshold, it will start allowing queries again.
*
* <p>The memory threshold is defined as a percentage of the maximum memory allocated -- see
- * memThreshold in solrconfig.xml.
+ * memThreshold in <code>solrconfig.xml</code>.
*/
public class MemoryCircuitBreaker extends CircuitBreaker {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private static final MemoryMXBean MEMORY_MX_BEAN = ManagementFactory.getMemoryMXBean();
+ // One shared provider / executor for all instances of this class
+ private static RefCounted<AveragingMetricProvider> averagingMetricProvider;
private long heapMemoryThreshold;
private static final ThreadLocal<Long> seenMemory = ThreadLocal.withInitial(() -> 0L);
private static final ThreadLocal<Long> allowedMemory = ThreadLocal.withInitial(() -> 0L);
+ /** Creates an instance which averages over 6 samples during last 30 seconds. */
public MemoryCircuitBreaker() {
+ this(6, 5);
+ }
+
+ /**
+ * Constructor that allows override of sample interval for which the memory usage is fetched. This
+ * is provided for testing, not intended for general use because the average metric provider
+ * implementation is the same for all instances of the class.
+ *
+ * @param numSamples number of samples to calculate average for
+ * @param sampleInterval interval between each sample
+ */
+ protected MemoryCircuitBreaker(int numSamples, int sampleInterval) {
super();
+ synchronized (MemoryCircuitBreaker.class) {
+ if (averagingMetricProvider == null || averagingMetricProvider.getRefcount() == 0) {
+ averagingMetricProvider =
+ new RefCounted<>(
+ new AveragingMetricProvider(
+ () -> MEMORY_MX_BEAN.getHeapMemoryUsage().getUsed(),
+ numSamples,
+ sampleInterval)) {
+ @Override
+ protected void close() {
+ get().close();
+ }
+ };
+ }
+ averagingMetricProvider.incref();
+ }
}
public void setThreshold(double thresholdValueInPercentage) {
@@ -60,14 +92,11 @@ public class MemoryCircuitBreaker extends CircuitBreaker {
}
}
- // TODO: An optimization can be to trip the circuit breaker for a duration of time
- // after the circuit breaker condition is matched. This will optimize for per call
- // overhead of calculating the condition parameters but can result in false positives.
@Override
public boolean isTripped() {
long localAllowedMemory = getCurrentMemoryThreshold();
- long localSeenMemory = calculateLiveMemoryUsage();
+ long localSeenMemory = getAvgMemoryUsage();
allowedMemory.set(localAllowedMemory);
@@ -76,6 +105,10 @@ public class MemoryCircuitBreaker extends CircuitBreaker {
return (localSeenMemory >= localAllowedMemory);
}
+ protected long getAvgMemoryUsage() {
+ return (long) averagingMetricProvider.get().getMetricValue();
+ }
+
@Override
public String getErrorMessage() {
return "Memory Circuit Breaker triggered as JVM heap usage values are greater than allocated threshold. "
@@ -89,17 +122,12 @@ public class MemoryCircuitBreaker extends CircuitBreaker {
return heapMemoryThreshold;
}
- /**
- * Calculate the live memory usage for the system. This method has package visibility to allow
- * using for testing.
- *
- * @return Memory usage in bytes.
- */
- protected long calculateLiveMemoryUsage() {
- // NOTE: MemoryUsageGaugeSet provides memory usage statistics but we do not use them
- // here since it will require extra allocations and incur cost, hence it is cheaper to use
- // MemoryMXBean directly. Ideally, this call should not add noticeable
- // latency to a query -- but if it does, please signify on SOLR-14588
- return MEMORY_MX_BEAN.getHeapMemoryUsage().getUsed();
+ @Override
+ public void close() throws IOException {
+ synchronized (MemoryCircuitBreaker.class) {
+ if (averagingMetricProvider != null && averagingMetricProvider.getRefcount() > 0) {
+ averagingMetricProvider.decref();
+ }
+ }
}
}
diff --git a/solr/core/src/test/org/apache/solr/util/BaseTestCircuitBreaker.java b/solr/core/src/test/org/apache/solr/util/BaseTestCircuitBreaker.java
index 71c6fe67f8d..14c83df771a 100644
--- a/solr/core/src/test/org/apache/solr/util/BaseTestCircuitBreaker.java
+++ b/solr/core/src/test/org/apache/solr/util/BaseTestCircuitBreaker.java
@@ -19,6 +19,7 @@ package org.apache.solr.util;
import static org.hamcrest.CoreMatchers.containsString;
+import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.List;
@@ -43,6 +44,8 @@ import org.slf4j.LoggerFactory;
public abstract class BaseTestCircuitBreaker extends SolrTestCaseJ4 {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+ private static final CircuitBreaker dummyMemBreaker = new MemoryCircuitBreaker();
+ private static final CircuitBreaker dummyCBManager = new CircuitBreakerManager();
protected static void indexDocs() {
removeAllExistingCircuitBreakers();
@@ -62,11 +65,13 @@ public abstract class BaseTestCircuitBreaker extends SolrTestCaseJ4 {
@Override
public void tearDown() throws Exception {
super.tearDown();
+ dummyMemBreaker.close();
+ dummyCBManager.close();
}
@After
public void after() {
- h.getCore().getCircuitBreakerRegistry().deregisterAll();
+ removeAllExistingCircuitBreakers();
}
public void testCBAlwaysTrips() {
@@ -116,9 +121,10 @@ public abstract class BaseTestCircuitBreaker extends SolrTestCaseJ4 {
}
public void testBadRequestType() {
+
expectThrows(
IllegalArgumentException.class,
- () -> new MemoryCircuitBreaker().setRequestTypes(List.of("badRequestType")));
+ () -> dummyMemBreaker.setRequestTypes(List.of("badRequestType")));
}
public void testBuildingMemoryPressure() {
@@ -236,17 +242,21 @@ public abstract class BaseTestCircuitBreaker extends SolrTestCaseJ4 {
"//lst[@name='process']/double[@name='time']");
}
- public void testErrorCode() {
+ public void testErrorCode() throws Exception {
assertEquals(
SolrException.ErrorCode.SERVICE_UNAVAILABLE,
- CircuitBreaker.getErrorCode(List.of(new CircuitBreakerManager())));
+ CircuitBreaker.getErrorCode(List.of(dummyCBManager)));
assertEquals(
SolrException.ErrorCode.TOO_MANY_REQUESTS,
- CircuitBreaker.getErrorCode(List.of(new MemoryCircuitBreaker())));
+ CircuitBreaker.getErrorCode(List.of(dummyMemBreaker)));
}
private static void removeAllExistingCircuitBreakers() {
- h.getCore().getCircuitBreakerRegistry().deregisterAll();
+ try {
+ h.getCore().getCircuitBreakerRegistry().deregisterAll();
+ } catch (IOException e) {
+ fail("Failed to unload circuit breakers");
+ }
}
private static class MockCircuitBreaker extends MemoryCircuitBreaker {
@@ -264,10 +274,12 @@ public abstract class BaseTestCircuitBreaker extends SolrTestCaseJ4 {
}
private static class FakeMemoryPressureCircuitBreaker extends MemoryCircuitBreaker {
+ public FakeMemoryPressureCircuitBreaker() {
+ super(1, 1);
+ }
@Override
- protected long calculateLiveMemoryUsage() {
- // Return a number large enough to trigger a pushback from the circuit breaker
+ protected long getAvgMemoryUsage() {
return Long.MAX_VALUE;
}
}
@@ -276,11 +288,12 @@ public abstract class BaseTestCircuitBreaker extends SolrTestCaseJ4 {
private AtomicInteger count;
public BuildingUpMemoryPressureCircuitBreaker() {
+ super(1, 1);
this.count = new AtomicInteger(0);
}
@Override
- protected long calculateLiveMemoryUsage() {
+ protected long getAvgMemoryUsage() {
int localCount = count.getAndIncrement();
if (localCount >= 4) {