You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@solr.apache.org by ab...@apache.org on 2024/02/20 16:34:44 UTC

(solr) branch branch_9x updated: SOLR-17141 branch_9x: Create CpuQueryLimit implementation (#2287)

This is an automated email from the ASF dual-hosted git repository.

ab pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/solr.git


The following commit(s) were added to refs/heads/branch_9x by this push:
     new a667bb4517b SOLR-17141 branch_9x: Create CpuQueryLimit implementation (#2287)
a667bb4517b is described below

commit a667bb4517b9dbcf8a28a3b9da416fdb2fa16ebc
Author: Andrzej BiaƂecki <ab...@apache.org>
AuthorDate: Tue Feb 20 17:34:38 2024 +0100

    SOLR-17141 branch_9x: Create CpuQueryLimit implementation (#2287)
    
    * SOLR-17141: Create CpuQueryLimit implementation (#2244)
    * Refactor to fix ThreadStats / ThreadCpuTimer nesting and use it in CpuQueryTimeLimit.
    * Rename classes to better reflect the type of limit.
---
 solr/CHANGES.txt                                   |   2 +-
 .../org/apache/solr/request/SolrRequestInfo.java   |  28 ++-
 .../org/apache/solr/search/CpuAllowedLimit.java    |  84 ++++++++
 .../java/org/apache/solr/search/QueryLimits.java   |   8 +-
 ...lrQueryTimeLimit.java => TimeAllowedLimit.java} |  10 +-
 .../java/org/apache/solr/util/ThreadCpuTimer.java  | 116 +++++++++++
 .../solr/search/ExpensiveSearchComponent.java      | 207 ++++++++++++++++++++
 .../apache/solr/search/TestCpuAllowedLimit.java    | 211 +++++++++++++++++++++
 .../query-guide/pages/common-query-parameters.adoc |  17 ++
 .../apache/solr/common/params/CommonParams.java    |   8 +-
 10 files changed, 679 insertions(+), 12 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 54c96bc87bb..609164f7cfb 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -7,7 +7,7 @@ https://github.com/apache/solr/blob/main/solr/solr-ref-guide/modules/upgrade-not
 ==================  9.6.0 ==================
 New Features
 ---------------------
-(No changes)
+* SOLR-17141: Implement 'cpuAllowed' query parameter to limit the maximum CPU usage by a running query. (Andrzej Bialecki, Gus Heck, David Smiley)
 
 Improvements
 ---------------------
diff --git a/solr/core/src/java/org/apache/solr/request/SolrRequestInfo.java b/solr/core/src/java/org/apache/solr/request/SolrRequestInfo.java
index 9c00b86a67f..72194da9641 100644
--- a/solr/core/src/java/org/apache/solr/request/SolrRequestInfo.java
+++ b/solr/core/src/java/org/apache/solr/request/SolrRequestInfo.java
@@ -33,6 +33,7 @@ import org.apache.solr.handler.component.ResponseBuilder;
 import org.apache.solr.response.SolrQueryResponse;
 import org.apache.solr.search.QueryLimits;
 import org.apache.solr.servlet.SolrDispatchFilter;
+import org.apache.solr.util.ThreadCpuTimer;
 import org.apache.solr.util.TimeZoneUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -45,6 +46,7 @@ public class SolrRequestInfo {
   private static final ThreadLocal<Deque<SolrRequestInfo>> threadLocal =
       ThreadLocal.withInitial(ArrayDeque::new);
   static final Object LIMITS_KEY = new Object();
+  static final Object CPU_TIMER_KEY = new Object();
 
   private int refCount = 1; // prevent closing when still used
 
@@ -78,11 +80,13 @@ public class SolrRequestInfo {
       assert false : "SolrRequestInfo Stack is full";
       log.error("SolrRequestInfo Stack is full");
     } else if (!stack.isEmpty() && info.req != null) {
-      // New SRI instances inherit limits from prior SRI regardless of parameters.
-      // This ensures limits cannot be changed or removed for a given thread once set.
-      // if req is null limits will be an empty instance with no limits anyway.
+      // New SRI instances inherit limits and thread CPU from prior SRI regardless of parameters.
+      // This ensures these two properties cannot be changed or removed for a given thread once set.
+      // if req is null then limits will be an empty instance with no limits anyway.
+      info.req.getContext().put(CPU_TIMER_KEY, stack.peek().getThreadCpuTimer());
       info.req.getContext().put(LIMITS_KEY, stack.peek().getLimits());
     }
+    // this creates both new QueryLimits and new ThreadCpuTime if not already set
     info.initQueryLimits();
     log.trace("{} {}", info, "setRequestInfo()");
     assert !info.isClosed() : "SRI is already closed (odd).";
@@ -236,14 +240,30 @@ public class SolrRequestInfo {
    * empty) {@link QueryLimits} object if it has not been created, and will then return the same
    * object on every subsequent invocation.
    *
-   * @return The {@code QueryLimits} object for the current requet.
+   * @return The {@code QueryLimits} object for the current request.
    */
   public QueryLimits getLimits() {
+    // make sure the ThreadCpuTime is always initialized
+    getThreadCpuTimer();
     return req == null
         ? QueryLimits.NONE
         : (QueryLimits) req.getContext().computeIfAbsent(LIMITS_KEY, (k) -> new QueryLimits(req));
   }
 
+  /**
+   * Get the thread CPU time monitor for the current request. This will either trigger the creation
+   * of a new instance if it hasn't been yet created, or will retrieve the already existing instance
+   * from the "bottom" of the request stack.
+   *
+   * @return the {@link ThreadCpuTimer} object for the current request.
+   */
+  public ThreadCpuTimer getThreadCpuTimer() {
+    return req == null
+        ? new ThreadCpuTimer()
+        : (ThreadCpuTimer)
+            req.getContext().computeIfAbsent(CPU_TIMER_KEY, k -> new ThreadCpuTimer());
+  }
+
   public SolrDispatchFilter.Action getAction() {
     return action;
   }
diff --git a/solr/core/src/java/org/apache/solr/search/CpuAllowedLimit.java b/solr/core/src/java/org/apache/solr/search/CpuAllowedLimit.java
new file mode 100644
index 00000000000..d63888b2726
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/search/CpuAllowedLimit.java
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.search;
+
+import com.google.common.annotations.VisibleForTesting;
+import java.lang.invoke.MethodHandles;
+import java.util.concurrent.TimeUnit;
+import org.apache.lucene.index.QueryTimeout;
+import org.apache.solr.common.params.CommonParams;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.request.SolrRequestInfo;
+import org.apache.solr.util.ThreadCpuTimer;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Enforces a CPU-time based timeout on a given SolrQueryRequest, as specified by the {@code
+ * cpuAllowed} query parameter.
+ */
+public class CpuAllowedLimit implements QueryTimeout {
+  private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+  private final long limitAtNs;
+  private final ThreadCpuTimer threadCpuTimer;
+
+  /**
+   * Create an object to represent a CPU time limit for the current request. NOTE: this
+   * implementation will attempt to obtain an existing thread CPU time monitor, created when {@link
+   * SolrRequestInfo#getThreadCpuTimer()} is initialized.
+   *
+   * @param req solr request with a {@code cpuAllowed} parameter
+   */
+  public CpuAllowedLimit(SolrQueryRequest req) {
+    if (!ThreadCpuTimer.isSupported()) {
+      throw new IllegalArgumentException("Thread CPU time monitoring is not available.");
+    }
+    SolrRequestInfo solrRequestInfo = SolrRequestInfo.getRequestInfo();
+    threadCpuTimer =
+        solrRequestInfo != null ? solrRequestInfo.getThreadCpuTimer() : new ThreadCpuTimer();
+    long reqCpuLimit = req.getParams().getLong(CommonParams.CPU_ALLOWED, -1L);
+
+    if (reqCpuLimit <= 0L) {
+      throw new IllegalArgumentException(
+          "Check for limit with hasCpuLimit(req) before creating a CpuAllowedLimit");
+    }
+    // calculate when the time limit is reached, account for the time already spent
+    limitAtNs =
+        threadCpuTimer.getStartCpuTimeNs()
+            + TimeUnit.NANOSECONDS.convert(reqCpuLimit, TimeUnit.MILLISECONDS);
+  }
+
+  @VisibleForTesting
+  CpuAllowedLimit(long limitMs) {
+    this.threadCpuTimer = new ThreadCpuTimer();
+    limitAtNs =
+        threadCpuTimer.getCurrentCpuTimeNs()
+            + TimeUnit.NANOSECONDS.convert(limitMs, TimeUnit.MILLISECONDS);
+  }
+
+  /** Return true if the current request has a parameter with a valid value of the limit. */
+  static boolean hasCpuLimit(SolrQueryRequest req) {
+    return req.getParams().getLong(CommonParams.CPU_ALLOWED, -1L) > 0L;
+  }
+
+  /** Return true if a max limit value is set and the current usage has exceeded the limit. */
+  @Override
+  public boolean shouldExit() {
+    return limitAtNs - threadCpuTimer.getCurrentCpuTimeNs() < 0L;
+  }
+}
diff --git a/solr/core/src/java/org/apache/solr/search/QueryLimits.java b/solr/core/src/java/org/apache/solr/search/QueryLimits.java
index 5b04b0033a6..83d91621fa3 100644
--- a/solr/core/src/java/org/apache/solr/search/QueryLimits.java
+++ b/solr/core/src/java/org/apache/solr/search/QueryLimits.java
@@ -16,7 +16,8 @@
  */
 package org.apache.solr.search;
 
-import static org.apache.solr.search.SolrQueryTimeLimit.hasTimeLimit;
+import static org.apache.solr.search.CpuAllowedLimit.hasCpuLimit;
+import static org.apache.solr.search.TimeAllowedLimit.hasTimeLimit;
 
 import java.util.ArrayList;
 import java.util.List;
@@ -44,7 +45,10 @@ public class QueryLimits implements QueryTimeout {
    */
   public QueryLimits(SolrQueryRequest req) {
     if (hasTimeLimit(req)) {
-      limits.add(new SolrQueryTimeLimit(req));
+      limits.add(new TimeAllowedLimit(req));
+    }
+    if (hasCpuLimit(req)) {
+      limits.add(new CpuAllowedLimit(req));
     }
   }
 
diff --git a/solr/core/src/java/org/apache/solr/search/SolrQueryTimeLimit.java b/solr/core/src/java/org/apache/solr/search/TimeAllowedLimit.java
similarity index 86%
rename from solr/core/src/java/org/apache/solr/search/SolrQueryTimeLimit.java
rename to solr/core/src/java/org/apache/solr/search/TimeAllowedLimit.java
index 2080d5a922f..432993d6c43 100644
--- a/solr/core/src/java/org/apache/solr/search/SolrQueryTimeLimit.java
+++ b/solr/core/src/java/org/apache/solr/search/TimeAllowedLimit.java
@@ -28,9 +28,9 @@ import org.apache.solr.request.SolrQueryRequest;
  * the {@code timeAllowed} query parameter. Note that timeAllowed will be ignored for
  * <strong><em>local</em></strong> processing of sub-queries in cases where the parent query already
  * has {@code timeAllowed} set. Essentially only one timeAllowed can be specified for any thread
- * executing a query. This is to ensure that subqueies don't escape from the intended limit
+ * executing a query. This is to ensure that subqueries don't escape from the intended limit
  */
-public class SolrQueryTimeLimit implements QueryTimeout {
+public class TimeAllowedLimit implements QueryTimeout {
 
   private final long timeoutAt;
 
@@ -42,23 +42,25 @@ public class SolrQueryTimeLimit implements QueryTimeout {
    *     should be validated with {@link #hasTimeLimit(SolrQueryRequest)} prior to constructing this
    *     object
    */
-  public SolrQueryTimeLimit(SolrQueryRequest req) {
+  public TimeAllowedLimit(SolrQueryRequest req) {
     // reduce by time already spent
     long reqTimeAllowed = req.getParams().getLong(CommonParams.TIME_ALLOWED, -1L);
 
     if (reqTimeAllowed == -1L) {
       throw new IllegalArgumentException(
-          "Check for limit with hasTimeLimit(req) before creating a SolrQueryTimeLimit");
+          "Check for limit with hasTimeLimit(req) before creating a TimeAllowedLimit");
     }
     long timeAllowed = reqTimeAllowed - (long) req.getRequestTimer().getTime();
     long nanosAllowed = TimeUnit.NANOSECONDS.convert(timeAllowed, TimeUnit.MILLISECONDS);
     timeoutAt = nanoTime() + nanosAllowed;
   }
 
+  /** Return true if the current request has a parameter with a valid value of the limit. */
   static boolean hasTimeLimit(SolrQueryRequest req) {
     return req.getParams().getLong(CommonParams.TIME_ALLOWED, -1L) >= 0L;
   }
 
+  /** Return true if a max limit value is set and the current usage has exceeded the limit. */
   @Override
   public boolean shouldExit() {
     return timeoutAt - nanoTime() < 0L;
diff --git a/solr/core/src/java/org/apache/solr/util/ThreadCpuTimer.java b/solr/core/src/java/org/apache/solr/util/ThreadCpuTimer.java
new file mode 100644
index 00000000000..468df99382b
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/util/ThreadCpuTimer.java
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.util;
+
+import java.lang.invoke.MethodHandles;
+import java.lang.management.ManagementFactory;
+import java.lang.management.ThreadMXBean;
+import java.util.Optional;
+import java.util.concurrent.TimeUnit;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Allows tracking information about the current thread using the JVM's built-in management bean
+ * {@link java.lang.management.ThreadMXBean}.
+ *
+ * <p>Calling code should create an instance of this class when starting the operation, and then can
+ * get the {@link #getCpuTimeMs()} at any time thereafter.
+ */
+public class ThreadCpuTimer {
+  private static final long UNSUPPORTED = -1;
+  public static final String CPU_TIME = "cpuTime";
+  public static final String LOCAL_CPU_TIME = "localCpuTime";
+  public static final String ENABLE_CPU_TIME = "solr.log.cputime";
+
+  private static ThreadMXBean THREAD_MX_BEAN;
+  private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+  static {
+    try {
+      ThreadMXBean threadBean = ManagementFactory.getThreadMXBean();
+      if (!threadBean.isThreadCpuTimeEnabled()) {
+        threadBean.setThreadCpuTimeEnabled(true);
+      }
+      THREAD_MX_BEAN = threadBean;
+    } catch (UnsupportedOperationException | SecurityException e) {
+      THREAD_MX_BEAN = null;
+      log.info("Thread CPU time monitoring is not available.");
+    }
+  }
+
+  private final long startCpuTimeNanos;
+
+  /**
+   * Create an instance to track the current thread's usage of CPU. The usage information can later
+   * be retrieved by any thread by calling {@link #getCpuTimeMs()}.
+   */
+  public ThreadCpuTimer() {
+    if (THREAD_MX_BEAN != null) {
+      this.startCpuTimeNanos = THREAD_MX_BEAN.getCurrentThreadCpuTime();
+    } else {
+      this.startCpuTimeNanos = UNSUPPORTED;
+    }
+  }
+
+  public static boolean isSupported() {
+    return THREAD_MX_BEAN != null;
+  }
+
+  /**
+   * Return the initial value of CPU time for this thread when this instance was first created.
+   * NOTE: absolute value returned by this method has no meaning by itself, it should only be used
+   * when comparing elapsed time between this value and {@link #getCurrentCpuTimeNs()}.
+   *
+   * @return current value, or {@link #UNSUPPORTED} if not supported.
+   */
+  public long getStartCpuTimeNs() {
+    return startCpuTimeNanos;
+  }
+
+  /**
+   * Return current value of CPU time for this thread.
+   *
+   * @return current value, or {@link #UNSUPPORTED} if not supported.
+   */
+  public long getCurrentCpuTimeNs() {
+    if (THREAD_MX_BEAN != null) {
+      return this.startCpuTimeNanos != UNSUPPORTED
+          ? THREAD_MX_BEAN.getCurrentThreadCpuTime() - this.startCpuTimeNanos
+          : UNSUPPORTED;
+    } else {
+      return UNSUPPORTED;
+    }
+  }
+
+  /**
+   * Get the CPU usage information for the thread that created this {@link ThreadCpuTimer}. The
+   * information will track the thread's cpu since the creation of this {@link ThreadCpuTimer}
+   * instance, if the VM's cpu tracking is disabled, returned value will be {@link #UNSUPPORTED}.
+   */
+  public Optional<Long> getCpuTimeMs() {
+    long cpuTimeNs = getCurrentCpuTimeNs();
+    return cpuTimeNs != UNSUPPORTED
+        ? Optional.of(TimeUnit.MILLISECONDS.convert(cpuTimeNs, TimeUnit.NANOSECONDS))
+        : Optional.of(UNSUPPORTED);
+  }
+
+  @Override
+  public String toString() {
+    return getCpuTimeMs().toString();
+  }
+}
diff --git a/solr/core/src/test/org/apache/solr/search/ExpensiveSearchComponent.java b/solr/core/src/test/org/apache/solr/search/ExpensiveSearchComponent.java
new file mode 100644
index 00000000000..a1f20e8cce0
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/search/ExpensiveSearchComponent.java
@@ -0,0 +1,207 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.search;
+
+import java.io.IOException;
+import java.lang.invoke.MethodHandles;
+import java.security.KeyPair;
+import java.security.KeyPairGenerator;
+import java.security.NoSuchAlgorithmException;
+import java.security.interfaces.RSAPublicKey;
+import java.util.ArrayList;
+import org.apache.lucene.tests.util.LuceneTestCase;
+import org.apache.lucene.tests.util.TestUtil;
+import org.apache.solr.handler.component.ResponseBuilder;
+import org.apache.solr.handler.component.SearchComponent;
+import org.apache.solr.handler.component.ShardRequest;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * A search component used for testing "expensive" operations, i.e. those that take long wall-clock
+ * time, or consume a lot of CPU or memory. Depending on the {@link #STAGES_PARAM} this load can be
+ * generated at various stages in the distributed query processing.
+ *
+ * <p>This component can be used in <code>solrconfig.xml</code> like this:
+ *
+ * <pre>{@code
+ * <config>
+ *   ...
+ *   <searchComponent name="expensiveSearchComponent"
+ *                    class="org.apache.solr.search.ExpensiveSearchComponent"/>
+ *   ...
+ *   <requestHandler name="/select" class="solr.SearchHandler">
+ *     <arr name="first-components">
+ *       <str>expensiveSearchComponent</str>
+ *     </arr>
+ *     ...
+ *   </requestHandler>
+ * </config>
+ * }</pre>
+ *
+ * For example, if the request parameters are as follows:
+ *
+ * <pre>{@code
+ * sleepMs=100&memLoadCount=1000&cpuLoadCount=10&stages=prepare,process
+ * }</pre>
+ *
+ * the component will introduce a 100ms delay, allocate ~20kB and consume around 500ms of CPU time
+ * both in the "prepare" and in the "process" stages of the distributed query processing.
+ */
+public class ExpensiveSearchComponent extends SearchComponent {
+  private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+  /**
+   * Generate memory load by allocating this number of random unicode strings, 100 characters each.
+   */
+  public static final String MEM_LOAD_COUNT_PARAM = "memLoadCount";
+
+  /** Generate CPU load by repeatedly running an expensive computation (RSA key-pair generation). */
+  public static final String CPU_LOAD_COUNT_PARAM = "cpuLoadCount";
+
+  /** Generate a wall-clock delay by sleeping this number of milliseconds. */
+  public static final String SLEEP_MS_PARAM = "sleepMs";
+
+  /** Comma-separated list of stages where the load will be generated. */
+  public static final String STAGES_PARAM = "stages";
+
+  public static final String STAGE_PREPARE = "prepare";
+  public static final String STAGE_PROCESS = "process";
+  public static final String STAGE_FINISH = "finish";
+  public static final String STAGE_DISTRIB_PROCESS = "distrib";
+  public static final String STAGE_HANDLE_RESPONSES = "handle";
+
+  private static final KeyPairGenerator kpg;
+
+  static {
+    KeyPairGenerator generator;
+    try {
+      generator = KeyPairGenerator.getInstance("RSA");
+    } catch (NoSuchAlgorithmException e) {
+      generator = null;
+    }
+    kpg = generator;
+  }
+
+  final ArrayList<String> data = new ArrayList<>();
+
+  private void generateLoad(ResponseBuilder rb, String stage) {
+    if (log.isTraceEnabled()) {
+      log.trace(
+          "-- {} generateLoad(): params: {} --\n{}",
+          stage,
+          rb.req.getParams().toString(),
+          new Exception());
+    }
+    final long cpuLoadCount = rb.req.getParams().getLong(CPU_LOAD_COUNT_PARAM, 0L);
+    final long sleepMs = rb.req.getParams().getLong(SLEEP_MS_PARAM, 0);
+    final int memLoadCount = rb.req.getParams().getInt(MEM_LOAD_COUNT_PARAM, 0);
+    data.clear();
+    KeyPair kp = null;
+    // create memory load
+    if (memLoadCount > 0) {
+      if (log.isTraceEnabled()) {
+        log.trace("--- STAGE {}: creating mem load {}", stage, memLoadCount);
+      }
+      for (int j = 0; j < memLoadCount; j++) {
+        String str = TestUtil.randomUnicodeString(LuceneTestCase.random(), 100);
+        data.add(str);
+      }
+    }
+    // create CPU load
+    if (cpuLoadCount > 0) {
+      if (log.isTraceEnabled()) {
+        log.trace("--- STAGE {}: creating CPU load {}", stage, cpuLoadCount);
+      }
+      for (int i = 0; i < cpuLoadCount; i++) {
+        if (kpg == null) {
+          throw new RuntimeException("cannot generate consistent CPU load on this JVM.");
+        }
+        kpg.initialize(1024);
+        kp = kpg.generateKeyPair();
+      }
+    }
+    if (kp != null) {
+      RSAPublicKey key = (RSAPublicKey) kp.getPublic();
+      rb.rsp.add(
+          "keyPair-" + stage,
+          key.getAlgorithm() + " " + key.getFormat() + " " + key.getModulus().bitLength());
+    }
+    // create wall-clock load
+    if (sleepMs > 0) {
+      if (log.isTraceEnabled()) {
+        log.trace("--- STAGE {}: creating wall-clock load {}", stage, sleepMs);
+      }
+      try {
+        Thread.sleep(sleepMs);
+      } catch (InterruptedException e) {
+        throw new RuntimeException(e);
+      }
+    }
+  }
+
+  private static boolean hasStage(ResponseBuilder rb, String stageName) {
+    String stages = rb.req.getParams().get(STAGES_PARAM);
+    if (stages == null) {
+      return false;
+    } else {
+      // no need to split on commas, stage names are unique
+      return stages.contains(stageName);
+    }
+  }
+
+  @Override
+  public void prepare(ResponseBuilder rb) throws IOException {
+    if (hasStage(rb, STAGE_PREPARE)) {
+      generateLoad(rb, STAGE_PREPARE);
+    }
+  }
+
+  @Override
+  public void process(ResponseBuilder rb) throws IOException {
+    if (hasStage(rb, STAGE_PROCESS)) {
+      generateLoad(rb, STAGE_PROCESS);
+    }
+  }
+
+  @Override
+  public void finishStage(ResponseBuilder rb) {
+    if (hasStage(rb, STAGE_FINISH)) {
+      generateLoad(rb, STAGE_FINISH);
+    }
+  }
+
+  @Override
+  public int distributedProcess(ResponseBuilder rb) throws IOException {
+    if (hasStage(rb, STAGE_DISTRIB_PROCESS)) {
+      generateLoad(rb, STAGE_DISTRIB_PROCESS);
+    }
+    return ResponseBuilder.STAGE_DONE;
+  }
+
+  @Override
+  public void handleResponses(ResponseBuilder rb, ShardRequest sreq) {
+    if (hasStage(rb, STAGE_HANDLE_RESPONSES)) {
+      generateLoad(rb, STAGE_HANDLE_RESPONSES + " " + sreq);
+    }
+  }
+
+  @Override
+  public String getDescription() {
+    return "expensive";
+  }
+}
diff --git a/solr/core/src/test/org/apache/solr/search/TestCpuAllowedLimit.java b/solr/core/src/test/org/apache/solr/search/TestCpuAllowedLimit.java
new file mode 100644
index 00000000000..88c279a5d64
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/search/TestCpuAllowedLimit.java
@@ -0,0 +1,211 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.search;
+
+import java.lang.invoke.MethodHandles;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.concurrent.TimeUnit;
+import org.apache.solr.client.solrj.SolrClient;
+import org.apache.solr.client.solrj.request.CollectionAdminRequest;
+import org.apache.solr.client.solrj.response.QueryResponse;
+import org.apache.solr.cloud.CloudUtil;
+import org.apache.solr.cloud.SolrCloudTestCase;
+import org.apache.solr.util.ThreadCpuTimer;
+import org.junit.Assume;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TestCpuAllowedLimit extends SolrCloudTestCase {
+  private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+  private static final String COLLECTION = "test";
+
+  private static Path createConfigSet() throws Exception {
+    Path configSet = createTempDir();
+    copyMinConf(configSet.toFile());
+    // insert an expensive search component
+    Path solrConfig = configSet.resolve("conf/solrconfig.xml");
+    Files.writeString(
+        solrConfig,
+        Files.readString(solrConfig)
+            .replace(
+                "<requestHandler",
+                "<searchComponent name=\"expensiveSearchComponent\"\n"
+                    + "                   class=\"org.apache.solr.search.ExpensiveSearchComponent\"/>\n"
+                    + "\n"
+                    + "  <requestHandler")
+            .replace(
+                "class=\"solr.SearchHandler\">",
+                "class=\"solr.SearchHandler\">\n"
+                    + "    <arr name=\"first-components\">\n"
+                    + "      <str>expensiveSearchComponent</str>\n"
+                    + "    </arr>\n"));
+    return configSet.resolve("conf");
+  }
+
+  @BeforeClass
+  public static void setup() throws Exception {
+    System.setProperty(ThreadCpuTimer.ENABLE_CPU_TIME, "true");
+    Path configset = createConfigSet();
+    configureCluster(1).addConfig("conf", configset).configure();
+    SolrClient solrClient = cluster.getSolrClient();
+    CollectionAdminRequest.Create create =
+        CollectionAdminRequest.createCollection(COLLECTION, "conf", 3, 2);
+    create.process(solrClient);
+    CloudUtil.waitForState(
+        cluster.getOpenOverseer().getSolrCloudManager(), "active", COLLECTION, clusterShape(3, 6));
+    for (int j = 0; j < 100; j++) {
+      solrClient.add(COLLECTION, sdoc("id", "id-" + j, "val_i", j % 5));
+    }
+    solrClient.commit(COLLECTION);
+  }
+
+  @Test
+  public void testCompareToWallClock() throws Exception {
+    Assume.assumeTrue("Thread CPU time monitoring is not available", ThreadCpuTimer.isSupported());
+    long limitMs = 100;
+    CpuAllowedLimit cpuLimit = new CpuAllowedLimit(limitMs);
+    int[] randoms = new int[100];
+    long startNs = System.nanoTime();
+    int wakeups = 0;
+    while (!cpuLimit.shouldExit()) {
+      Thread.sleep(1);
+      // do some busywork
+      for (int i = 0; i < randoms.length; i++) {
+        randoms[i] = random().nextInt();
+      }
+      wakeups++;
+    }
+    long endNs = System.nanoTime();
+    long wallTimeDeltaMs = TimeUnit.MILLISECONDS.convert(endNs - startNs, TimeUnit.NANOSECONDS);
+    log.info(
+        "CPU limit: {} ms, elapsed wall-clock: {} ms, wakeups: {}",
+        limitMs,
+        wallTimeDeltaMs,
+        wakeups);
+    assertTrue(
+        "Elapsed wall-clock time expected much larger than 100ms but was " + wallTimeDeltaMs,
+        limitMs < wallTimeDeltaMs);
+  }
+
+  @Test
+  public void testDistribLimit() throws Exception {
+    Assume.assumeTrue("Thread CPU time monitoring is not available", ThreadCpuTimer.isSupported());
+
+    SolrClient solrClient = cluster.getSolrClient();
+
+    // no limits set - should eventually complete
+    log.info("--- No limits, full results ---");
+    long sleepMs = 1000;
+    QueryResponse rsp =
+        solrClient.query(
+            COLLECTION,
+            params(
+                "q",
+                "id:*",
+                "sort",
+                "id asc",
+                ExpensiveSearchComponent.SLEEP_MS_PARAM,
+                String.valueOf(sleepMs),
+                "stages",
+                "prepare,process"));
+    // System.err.println("rsp=" + rsp.jsonStr());
+    assertEquals(rsp.getHeader().get("status"), 0);
+    Number qtime = (Number) rsp.getHeader().get("QTime");
+    assertTrue("QTime expected " + qtime + " >> " + sleepMs, qtime.longValue() > sleepMs);
+    assertNull("should not have partial results", rsp.getHeader().get("partialResults"));
+
+    // timeAllowed set, should return partial results
+    log.info("--- timeAllowed, partial results ---");
+    rsp =
+        solrClient.query(
+            COLLECTION,
+            params(
+                "q",
+                "id:*",
+                "sort",
+                "id asc",
+                ExpensiveSearchComponent.SLEEP_MS_PARAM,
+                String.valueOf(sleepMs),
+                "stages",
+                "prepare,process",
+                "timeAllowed",
+                "500"));
+    // System.err.println("rsp=" + rsp.jsonStr());
+    assertNotNull("should have partial results", rsp.getHeader().get("partialResults"));
+
+    // cpuAllowed set with large value, should return full results
+    log.info("--- cpuAllowed, full results ---");
+    rsp =
+        solrClient.query(
+            COLLECTION,
+            params(
+                "q",
+                "id:*",
+                "sort",
+                "id desc",
+                ExpensiveSearchComponent.CPU_LOAD_COUNT_PARAM,
+                "1",
+                "stages",
+                "prepare,process",
+                "cpuAllowed",
+                "1000"));
+    // System.err.println("rsp=" + rsp.jsonStr());
+    assertNull("should have full results", rsp.getHeader().get("partialResults"));
+
+    // cpuAllowed set, should return partial results
+    log.info("--- cpuAllowed 1, partial results ---");
+    rsp =
+        solrClient.query(
+            COLLECTION,
+            params(
+                "q",
+                "id:*",
+                "sort",
+                "id desc",
+                ExpensiveSearchComponent.CPU_LOAD_COUNT_PARAM,
+                "10",
+                "stages",
+                "prepare,process",
+                "cpuAllowed",
+                "50"));
+    // System.err.println("rsp=" + rsp.jsonStr());
+    assertNotNull("should have partial results", rsp.getHeader().get("partialResults"));
+
+    // cpuAllowed set, should return partial results
+    log.info("--- cpuAllowed 2, partial results ---");
+    rsp =
+        solrClient.query(
+            COLLECTION,
+            params(
+                "q",
+                "id:*",
+                "sort",
+                "id desc",
+                ExpensiveSearchComponent.CPU_LOAD_COUNT_PARAM,
+                "10",
+                "stages",
+                "prepare,process",
+                "cpuAllowed",
+                "50"));
+    // System.err.println("rsp=" + rsp.jsonStr());
+    assertNotNull("should have partial results", rsp.getHeader().get("partialResults"));
+  }
+}
diff --git a/solr/solr-ref-guide/modules/query-guide/pages/common-query-parameters.adoc b/solr/solr-ref-guide/modules/query-guide/pages/common-query-parameters.adoc
index 23db33dd61d..9498176edb9 100644
--- a/solr/solr-ref-guide/modules/query-guide/pages/common-query-parameters.adoc
+++ b/solr/solr-ref-guide/modules/query-guide/pages/common-query-parameters.adoc
@@ -322,6 +322,23 @@ As this check is periodically performed, the actual time for which a request can
 If the request consumes more time in other stages, custom components, etc., this parameter is not expected to abort the request.
 Regular search, JSON Facet and the Analytics component abandon requests in accordance with this parameter.
 
+== cpuAllowed Parameter
+
+This parameter specifies the amount of CPU time, in milliseconds, allowed for a search to complete.
+In contrast to the `timeAllowed` this parameter monitors the actual CPU usage by the thread that
+executes the query. The same CPU usage limit is applied to the query coordinator as to each replica
+that participates in the distributed search (although reaching this limit first in the query coordinator is unlikely).
+Should any replica locally exceed the allowed CPU time the whole distributed search will be terminated
+(by canceling requests to other shards).
+
+Note: the same CPU limit is applied to each stage in the distributed query processing. Typically this
+involves two or more stages (e.g. getting top document id-s, retrieving their fields, additional stages may be
+required for faceting, grouping, etc). For example, setting `cpuAllowed=500` gives a limit of at most
+500 ms of CPU time for each of these stages - meaning that the total CPU usage by the query may reach a multiple
+of the `cpuAllowed` value depending on the number of stages.
+
+All other considerations regarding partial results listed for the `timeAllowed` parameter apply here, too.
+
 == segmentTerminateEarly Parameter
 
 This parameter may be set to either `true` or `false`.
diff --git a/solr/solrj/src/java/org/apache/solr/common/params/CommonParams.java b/solr/solrj/src/java/org/apache/solr/common/params/CommonParams.java
index 071eb0a0d3d..90bef61db96 100644
--- a/solr/solrj/src/java/org/apache/solr/common/params/CommonParams.java
+++ b/solr/solrj/src/java/org/apache/solr/common/params/CommonParams.java
@@ -158,9 +158,15 @@ public interface CommonParams {
 
   boolean SEGMENT_TERMINATE_EARLY_DEFAULT = false;
 
-  /** Timeout value in milliseconds. If not set, or the value is &gt; 0, there is no timeout. */
+  /** Timeout value in milliseconds. If not set, or the value is &lt; 0, there is no timeout. */
   String TIME_ALLOWED = "timeAllowed";
 
+  /**
+   * Maximum query CPU usage value in milliseconds. If not set, or the value is &lt; 0, there is no
+   * timeout.
+   */
+  String CPU_ALLOWED = "cpuAllowed";
+
   /** Is the query cancellable? */
   String IS_QUERY_CANCELLABLE = "canCancel";