You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@solr.apache.org by el...@apache.org on 2022/10/16 19:43:56 UTC

[solr] branch branch_9x updated: SOLR-8803 Solr now crashes on OOME for all supported platforms.

This is an automated email from the ASF dual-hosted git repository.

elyograg pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/solr.git


The following commit(s) were added to refs/heads/branch_9x by this push:
     new 0a334bda7ea SOLR-8803 Solr now crashes on OOME for all supported platforms.
0a334bda7ea is described below

commit 0a334bda7ea40d5c11195fa5ce33d16c21d0504e
Author: Shawn Heisey <el...@apache.org>
AuthorDate: Sun Oct 16 13:40:49 2022 -0600

    SOLR-8803 Solr now crashes on OOME for all supported platforms.
---
 solr/CHANGES.txt                                   |  5 +++
 solr/bin/oom_solr.sh                               | 36 ----------------------
 solr/bin/solr                                      |  4 ++-
 solr/bin/solr.cmd                                  |  6 +++-
 .../apache/solr/servlet/CoreContainerProvider.java | 34 ++++++++++++++++++++
 5 files changed, 47 insertions(+), 38 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 2c165cee664..30ddf47b678 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -25,6 +25,11 @@ Improvements
 
 * SOLR-16302: WARN when restoring backup if ConfigSet with same name already exists (Albert Moser via Kevin Risden)
 
+* SOLR-8803: Java will now crash the Solr process on OOME and create a crash file that logs the
+  cause.  This capability is now also present on Windows.  Before, with bin/solr but not
+  bin/solr.cmd, Solr would execute a script that killed the Solr pid, leaving a very small time
+  window where Solr would continue to execute in an unpredictable state. (Shawn Heisey, Kevin Risden)
+
 Optimizations
 ---------------------
 (No changes)
diff --git a/solr/bin/oom_solr.sh b/solr/bin/oom_solr.sh
deleted file mode 100755
index ac1800f3164..00000000000
--- a/solr/bin/oom_solr.sh
+++ /dev/null
@@ -1,36 +0,0 @@
-#!/usr/bin/env bash
-
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-SOLR_PORT=$1
-SOLR_LOGS_DIR=$2
-SOLR_PID=$(ps auxww | grep start.jar | grep $SOLR_PORT | grep -v grep | awk '{print $2}' | sort -r)
-if [ -z "$SOLR_PID" ]; then
-  echo "Couldn't find Solr process running on port $SOLR_PORT!"
-  exit
-fi
-NOW=$(date +"%F_%H_%M_%S")
-(
-  echo "Running OOM killer script for process $SOLR_PID for Solr on port $SOLR_PORT"
-  if [[ "$SOLR_PID" == 1 ]]; then
-    # Under Docker when running as pid 1, SIGKILL is ignored, so use the default SIGTERM
-    kill "$SOLR_PID"
-  else
-    # On a real system, or in a container with tini or similar, it is safe to SIGKILL
-    kill -9 "$SOLR_PID"
-  fi
-  echo "Killed process $SOLR_PID"
-) | tee $SOLR_LOGS_DIR/solr_oom_killer-$SOLR_PORT-$NOW.log
diff --git a/solr/bin/solr b/solr/bin/solr
index 9373ee6d01e..03465b4151f 100755
--- a/solr/bin/solr
+++ b/solr/bin/solr
@@ -2245,7 +2245,9 @@ function start_solr() {
     # '-OmitStackTraceInFastThrow' ensures stack traces in errors,
     # users who don't care about useful error msgs can override in SOLR_OPTS with +OmitStackTraceInFastThrow
     "${SOLR_HOST_ARG[@]}" "-Duser.timezone=$SOLR_TIMEZONE" "-XX:-OmitStackTraceInFastThrow" \
-    "-XX:OnOutOfMemoryError=$SOLR_TIP/bin/oom_solr.sh $SOLR_PORT $SOLR_LOGS_DIR" \
+    # '+CrashOnOutOfMemoryError' ensures that Solr crashes whenever
+    # OOME is thrown. Program operation after OOME is unpredictable.
+    "-XX:+CrashOnOutOfMemoryError" "-XX:ErrorFile=${SOLR_LOGS_DIR}/jvm_crash_%p.log" \
     "-Djetty.home=$SOLR_SERVER_DIR" "-Dsolr.solr.home=$SOLR_HOME" "-Dsolr.install.dir=$SOLR_TIP" \
     "-Dsolr.default.confdir=$DEFAULT_CONFDIR" "${LOG4J_CONFIG[@]}" "${SOLR_OPTS[@]}" "${SECURITY_MANAGER_OPTS[@]}" "${SOLR_ADMIN_UI}")
 
diff --git a/solr/bin/solr.cmd b/solr/bin/solr.cmd
index e2f2d4824de..fa2c38efa5c 100755
--- a/solr/bin/solr.cmd
+++ b/solr/bin/solr.cmd
@@ -1319,7 +1319,11 @@ IF "%verbose%"=="1" (
 set START_OPTS=-Duser.timezone=%SOLR_TIMEZONE%
 REM '-OmitStackTraceInFastThrow' ensures stack traces in errors,
 REM users who don't care about useful error msgs can override in SOLR_OPTS with +OmitStackTraceInFastThrow
-set "START_OPTS=%START_OPTS% -XX:-OmitStackTraceInFastThrow"
+set START_OPTS=%START_OPTS% -XX:-OmitStackTraceInFastThrow
+REM '+CrashOnOutOfMemoryError' ensures that Solr crashes whenever
+REM OOME is thrown. Program operation after OOME is unpredictable.
+set START_OPTS=%START_OPTS% -XX:+CrashOnOutOfMemoryError
+set START_OPTS=%START_OPTS% -XX:ErrorFile=%SOLR_LOGS_DIR%\jvm_crash_%%p.log
 set START_OPTS=%START_OPTS% !GC_TUNE! %GC_LOG_OPTS%
 set START_OPTS=%START_OPTS% -DdisableAdminUI=%DISABLE_ADMIN_UI%
 IF NOT "!CLOUD_MODE_OPTS!"=="" set "START_OPTS=%START_OPTS% !CLOUD_MODE_OPTS!"
diff --git a/solr/core/src/java/org/apache/solr/servlet/CoreContainerProvider.java b/solr/core/src/java/org/apache/solr/servlet/CoreContainerProvider.java
index 663f70eeb67..208c948ee12 100644
--- a/solr/core/src/java/org/apache/solr/servlet/CoreContainerProvider.java
+++ b/solr/core/src/java/org/apache/solr/servlet/CoreContainerProvider.java
@@ -29,6 +29,8 @@ import com.codahale.metrics.jvm.MemoryUsageGaugeSet;
 import com.codahale.metrics.jvm.ThreadStatesGaugeSet;
 import com.google.common.annotations.VisibleForTesting;
 import java.lang.invoke.MethodHandles;
+import java.lang.management.ManagementFactory;
+import java.lang.management.RuntimeMXBean;
 import java.nio.file.Path;
 import java.nio.file.Paths;
 import java.time.Instant;
@@ -37,6 +39,7 @@ import java.util.Collections;
 import java.util.Locale;
 import java.util.Map;
 import java.util.Objects;
+import java.util.Optional;
 import java.util.Properties;
 import java.util.Set;
 import java.util.WeakHashMap;
@@ -281,6 +284,37 @@ public class CoreContainerProvider implements ServletContextListener {
     if (log.isInfoEnabled()) {
       log.info("|___/\\___/_|_|    Start time: {}", Instant.now());
     }
+    try {
+      RuntimeMXBean mx = ManagementFactory.getRuntimeMXBean();
+      Optional<String> crashOnOutOfMemoryErrorArg =
+          mx.getInputArguments().stream()
+              .filter(x -> x.startsWith("-XX:+CrashOnOutOfMemoryError"))
+              .findFirst();
+      if (crashOnOutOfMemoryErrorArg.isPresent()) {
+        String errorFileArg =
+            mx.getInputArguments().stream()
+                .filter(x -> x.startsWith("-XX:ErrorFile"))
+                .findFirst()
+                .orElse("-XX:ErrorFile=hs_err_%p.log");
+        String errorFilePath =
+            errorFileArg
+                .substring(errorFileArg.indexOf('=') + 1)
+                .replace("%p", String.valueOf(mx.getPid()));
+        log.info(
+            "Solr started with \"-XX:+CrashOnOutOfMemoryError\" that will crash on any OutOfMemoryError exception. "
+                + "The cause of the OOME will be logged in the crash file at the following path:");
+        log.info(errorFilePath);
+      }
+    } catch (Exception e) {
+      String logMessage =
+          String.format(
+              Locale.ROOT,
+              "Solr typically starts with \"-XX:+CrashOnOutOfMemoryError\" that will crash on any OutOfMemoryError exception. "
+                  + "The cause of the OOME will be logged in a crash file in the logs directory %s. "
+                  + "Unable to get the specific file due to an exception.",
+              System.getProperty("solr.log.dir"));
+      log.info(logMessage, e);
+    }
   }
 
   private String solrVersion() {