You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@solr.apache.org by gu...@apache.org on 2024/03/18 16:52:05 UTC

(solr) branch main updated: SOLR-17203 fix calculation for CpuAllowedLimit (#2347)

This is an automated email from the ASF dual-hosted git repository.

gus pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/solr.git


The following commit(s) were added to refs/heads/main by this push:
     new f67b7183cc5 SOLR-17203 fix calculation for CpuAllowedLimit (#2347)
f67b7183cc5 is described below

commit f67b7183cc57638589b2303185237d6612fb9ad3
Author: Gus Heck <46...@users.noreply.github.com>
AuthorDate: Mon Mar 18 12:51:59 2024 -0400

    SOLR-17203 fix calculation for CpuAllowedLimit (#2347)
---
 .../apache/solr/handler/RequestHandlerBase.java    | 10 ++--
 .../org/apache/solr/search/CpuAllowedLimit.java    | 32 +++++++------
 .../java/org/apache/solr/util/ThreadCpuTimer.java  | 56 ++++++++++++----------
 .../apache/solr/search/TestCpuAllowedLimit.java    |  7 +++
 4 files changed, 59 insertions(+), 46 deletions(-)

diff --git a/solr/core/src/java/org/apache/solr/handler/RequestHandlerBase.java b/solr/core/src/java/org/apache/solr/handler/RequestHandlerBase.java
index b9da9f73156..ff020ddebd3 100644
--- a/solr/core/src/java/org/apache/solr/handler/RequestHandlerBase.java
+++ b/solr/core/src/java/org/apache/solr/handler/RequestHandlerBase.java
@@ -242,15 +242,15 @@ public abstract class RequestHandlerBase
         rsp.setHttpCaching(false);
       }
     } catch (Exception e) {
-      e = normalizeReceivedException(req, e);
-      processErrorMetricsOnException(e, metrics);
-      rsp.setException(e);
+      Exception normalized = normalizeReceivedException(req, e);
+      processErrorMetricsOnException(normalized, metrics);
+      rsp.setException(normalized);
     } finally {
       long elapsed = timer.stop();
       metrics.totalTime.inc(elapsed);
 
-      if (publishCpuTime) {
-        Optional<Long> cpuTime = threadCpuTimer.getCpuTimeMs();
+      if (publishCpuTime && threadCpuTimer != null) {
+        Optional<Long> cpuTime = threadCpuTimer.getElapsedCpuMs();
         if (cpuTime.isPresent()) {
           // add CPU_TIME if not already added by SearchHandler
           NamedList<Object> header = rsp.getResponseHeader();
diff --git a/solr/core/src/java/org/apache/solr/search/CpuAllowedLimit.java b/solr/core/src/java/org/apache/solr/search/CpuAllowedLimit.java
index d63888b2726..aa3a1fae7b2 100644
--- a/solr/core/src/java/org/apache/solr/search/CpuAllowedLimit.java
+++ b/solr/core/src/java/org/apache/solr/search/CpuAllowedLimit.java
@@ -17,25 +17,29 @@
 package org.apache.solr.search;
 
 import com.google.common.annotations.VisibleForTesting;
-import java.lang.invoke.MethodHandles;
 import java.util.concurrent.TimeUnit;
+import net.jcip.annotations.NotThreadSafe;
 import org.apache.lucene.index.QueryTimeout;
 import org.apache.solr.common.params.CommonParams;
 import org.apache.solr.request.SolrQueryRequest;
 import org.apache.solr.request.SolrRequestInfo;
 import org.apache.solr.util.ThreadCpuTimer;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 /**
  * Enforces a CPU-time based timeout on a given SolrQueryRequest, as specified by the {@code
  * cpuAllowed} query parameter.
+ *
+ * <p>Since this class uses {@link ThreadCpuTimer} it is irrevocably lock-hostile and can never be
+ * exposed to multiple threads, even if guarded by synchronization. Normally this is attached to
+ * objects ultimately held by a ThreadLocal in {@link SolrRequestInfo} to provide safe usage on the
+ * assumption that such objects are not shared to other threads.
+ *
+ * @see ThreadCpuTimer
  */
+@NotThreadSafe
 public class CpuAllowedLimit implements QueryTimeout {
-  private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
-
-  private final long limitAtNs;
   private final ThreadCpuTimer threadCpuTimer;
+  private final long requestedTimeoutNs;
 
   /**
    * Create an object to represent a CPU time limit for the current request. NOTE: this
@@ -49,6 +53,8 @@ public class CpuAllowedLimit implements QueryTimeout {
       throw new IllegalArgumentException("Thread CPU time monitoring is not available.");
     }
     SolrRequestInfo solrRequestInfo = SolrRequestInfo.getRequestInfo();
+    // get existing timer if available to ensure sub-queries can't reset/exceed the intended time
+    // constraint.
     threadCpuTimer =
         solrRequestInfo != null ? solrRequestInfo.getThreadCpuTimer() : new ThreadCpuTimer();
     long reqCpuLimit = req.getParams().getLong(CommonParams.CPU_ALLOWED, -1L);
@@ -57,18 +63,14 @@ public class CpuAllowedLimit implements QueryTimeout {
       throw new IllegalArgumentException(
           "Check for limit with hasCpuLimit(req) before creating a CpuAllowedLimit");
     }
-    // calculate when the time limit is reached, account for the time already spent
-    limitAtNs =
-        threadCpuTimer.getStartCpuTimeNs()
-            + TimeUnit.NANOSECONDS.convert(reqCpuLimit, TimeUnit.MILLISECONDS);
+    // calculate the time when the limit is reached, e.g. account for the time already spent
+    requestedTimeoutNs = TimeUnit.NANOSECONDS.convert(reqCpuLimit, TimeUnit.MILLISECONDS);
   }
 
   @VisibleForTesting
   CpuAllowedLimit(long limitMs) {
     this.threadCpuTimer = new ThreadCpuTimer();
-    limitAtNs =
-        threadCpuTimer.getCurrentCpuTimeNs()
-            + TimeUnit.NANOSECONDS.convert(limitMs, TimeUnit.MILLISECONDS);
+    requestedTimeoutNs = TimeUnit.NANOSECONDS.convert(limitMs, TimeUnit.MILLISECONDS);
   }
 
   /** Return true if the current request has a parameter with a valid value of the limit. */
@@ -76,9 +78,9 @@ public class CpuAllowedLimit implements QueryTimeout {
     return req.getParams().getLong(CommonParams.CPU_ALLOWED, -1L) > 0L;
   }
 
-  /** Return true if a max limit value is set and the current usage has exceeded the limit. */
+  /** Return true if usage has exceeded the limit. */
   @Override
   public boolean shouldExit() {
-    return limitAtNs - threadCpuTimer.getCurrentCpuTimeNs() < 0L;
+    return threadCpuTimer.getElapsedCpuNs() > requestedTimeoutNs;
   }
 }
diff --git a/solr/core/src/java/org/apache/solr/util/ThreadCpuTimer.java b/solr/core/src/java/org/apache/solr/util/ThreadCpuTimer.java
index 468df99382b..054a5310d14 100644
--- a/solr/core/src/java/org/apache/solr/util/ThreadCpuTimer.java
+++ b/solr/core/src/java/org/apache/solr/util/ThreadCpuTimer.java
@@ -21,6 +21,7 @@ import java.lang.management.ManagementFactory;
 import java.lang.management.ThreadMXBean;
 import java.util.Optional;
 import java.util.concurrent.TimeUnit;
+import net.jcip.annotations.NotThreadSafe;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -29,8 +30,13 @@ import org.slf4j.LoggerFactory;
  * {@link java.lang.management.ThreadMXBean}.
  *
  * <p>Calling code should create an instance of this class when starting the operation, and then can
- * get the {@link #getCpuTimeMs()} at any time thereafter.
+ * get the {@link #getElapsedCpuMs()} at any time thereafter.
+ *
+ * <p>This class is irrevocably not thread safe. Never allow instances of this class to be exposed
+ * to more than one thread. Acquiring an external lock will not be sufficient. This class can be
+ * considered "lock-hostile" due to its caching of timing information for a specific thread.
  */
+@NotThreadSafe
 public class ThreadCpuTimer {
   private static final long UNSUPPORTED = -1;
   public static final String CPU_TIME = "cpuTime";
@@ -56,15 +62,11 @@ public class ThreadCpuTimer {
   private final long startCpuTimeNanos;
 
   /**
-   * Create an instance to track the current thread's usage of CPU. The usage information can later
-   * be retrieved by any thread by calling {@link #getCpuTimeMs()}.
+   * Create an instance to track the current thread's usage of CPU. Usage information can later be
+   * retrieved by calling {@link #getElapsedCpuMs()}. Timing starts immediately upon construction.
    */
   public ThreadCpuTimer() {
-    if (THREAD_MX_BEAN != null) {
-      this.startCpuTimeNanos = THREAD_MX_BEAN.getCurrentThreadCpuTime();
-    } else {
-      this.startCpuTimeNanos = UNSUPPORTED;
-    }
+    this.startCpuTimeNanos = getThreadTotalCpuNs();
   }
 
   public static boolean isSupported() {
@@ -72,45 +74,47 @@ public class ThreadCpuTimer {
   }
 
   /**
-   * Return the initial value of CPU time for this thread when this instance was first created.
-   * NOTE: absolute value returned by this method has no meaning by itself, it should only be used
-   * when comparing elapsed time between this value and {@link #getCurrentCpuTimeNs()}.
+   * Return CPU time consumed by this thread since the construction of this timer object.
    *
    * @return current value, or {@link #UNSUPPORTED} if not supported.
    */
-  public long getStartCpuTimeNs() {
-    return startCpuTimeNanos;
+  public long getElapsedCpuNs() {
+    return this.startCpuTimeNanos != UNSUPPORTED
+        ? getThreadTotalCpuNs() - this.startCpuTimeNanos
+        : UNSUPPORTED;
   }
 
   /**
-   * Return current value of CPU time for this thread.
+   * Get the cpu time for the current thread since {@link Thread#start()} without throwing an
+   * exception.
    *
-   * @return current value, or {@link #UNSUPPORTED} if not supported.
+   * @see ThreadMXBean#getCurrentThreadCpuTime() for important details
+   * @return the number of nanoseconds of cpu consumed by this thread since {@code Thread.start()}.
    */
-  public long getCurrentCpuTimeNs() {
+  private long getThreadTotalCpuNs() {
     if (THREAD_MX_BEAN != null) {
-      return this.startCpuTimeNanos != UNSUPPORTED
-          ? THREAD_MX_BEAN.getCurrentThreadCpuTime() - this.startCpuTimeNanos
-          : UNSUPPORTED;
+      return THREAD_MX_BEAN.getCurrentThreadCpuTime();
     } else {
       return UNSUPPORTED;
     }
   }
 
   /**
-   * Get the CPU usage information for the thread that created this {@link ThreadCpuTimer}. The
-   * information will track the thread's cpu since the creation of this {@link ThreadCpuTimer}
-   * instance, if the VM's cpu tracking is disabled, returned value will be {@link #UNSUPPORTED}.
+   * Get the CPU usage information for the current thread since it created this {@link
+   * ThreadCpuTimer}. The result is undefined if called by any other thread.
+   *
+   * @return the thread's cpu since the creation of this {@link ThreadCpuTimer} instance. If the
+   *     VM's cpu tracking is disabled, returned value will be {@link #UNSUPPORTED}.
    */
-  public Optional<Long> getCpuTimeMs() {
-    long cpuTimeNs = getCurrentCpuTimeNs();
+  public Optional<Long> getElapsedCpuMs() {
+    long cpuTimeNs = getElapsedCpuNs();
     return cpuTimeNs != UNSUPPORTED
         ? Optional.of(TimeUnit.MILLISECONDS.convert(cpuTimeNs, TimeUnit.NANOSECONDS))
-        : Optional.of(UNSUPPORTED);
+        : Optional.empty();
   }
 
   @Override
   public String toString() {
-    return getCpuTimeMs().toString();
+    return getElapsedCpuMs().map(String::valueOf).orElse("UNSUPPORTED");
   }
 }
diff --git a/solr/core/src/test/org/apache/solr/search/TestCpuAllowedLimit.java b/solr/core/src/test/org/apache/solr/search/TestCpuAllowedLimit.java
index 88c279a5d64..dac4ec5124c 100644
--- a/solr/core/src/test/org/apache/solr/search/TestCpuAllowedLimit.java
+++ b/solr/core/src/test/org/apache/solr/search/TestCpuAllowedLimit.java
@@ -208,4 +208,11 @@ public class TestCpuAllowedLimit extends SolrCloudTestCase {
     // System.err.println("rsp=" + rsp.jsonStr());
     assertNotNull("should have partial results", rsp.getHeader().get("partialResults"));
   }
+
+  @Test
+  public void testDistribLimit2() throws Exception {
+    // This looks silly, but it actually guards against:
+    // https://issues.apache.org/jira/browse/SOLR-17203
+    testDistribLimit();
+  }
 }