You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sling.apache.org by kw...@apache.org on 2016/02/25 09:48:39 UTC

svn commit: r1732261 - in /sling/trunk/bundles/extensions/healthcheck/core/src/main/java/org/apache/sling/hc/core/impl/executor: HealthCheckExecutorImpl.java HealthCheckResultCache.java

Author: kwin
Date: Thu Feb 25 08:48:39 2016
New Revision: 1732261

URL: http://svn.apache.org/viewvc?rev=1732261&view=rev
Log:
SLNG-5415 For timed-out health checks, show result of last finished result

Modified:
    sling/trunk/bundles/extensions/healthcheck/core/src/main/java/org/apache/sling/hc/core/impl/executor/HealthCheckExecutorImpl.java
    sling/trunk/bundles/extensions/healthcheck/core/src/main/java/org/apache/sling/hc/core/impl/executor/HealthCheckResultCache.java

Modified: sling/trunk/bundles/extensions/healthcheck/core/src/main/java/org/apache/sling/hc/core/impl/executor/HealthCheckExecutorImpl.java
URL: http://svn.apache.org/viewvc/sling/trunk/bundles/extensions/healthcheck/core/src/main/java/org/apache/sling/hc/core/impl/executor/HealthCheckExecutorImpl.java?rev=1732261&r1=1732260&r2=1732261&view=diff
==============================================================================
--- sling/trunk/bundles/extensions/healthcheck/core/src/main/java/org/apache/sling/hc/core/impl/executor/HealthCheckExecutorImpl.java (original)
+++ sling/trunk/bundles/extensions/healthcheck/core/src/main/java/org/apache/sling/hc/core/impl/executor/HealthCheckExecutorImpl.java Thu Feb 25 08:48:39 2016
@@ -19,6 +19,8 @@ package org.apache.sling.hc.core.impl.ex
 
 import static org.apache.sling.hc.util.FormattingResultLog.msHumanReadable;
 
+import java.text.DateFormat;
+import java.text.SimpleDateFormat;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
@@ -46,9 +48,11 @@ import org.apache.sling.commons.threads.
 import org.apache.sling.commons.threads.ThreadPoolManager;
 import org.apache.sling.hc.api.HealthCheck;
 import org.apache.sling.hc.api.Result;
+import org.apache.sling.hc.api.ResultLog;
 import org.apache.sling.hc.api.execution.HealthCheckExecutionOptions;
 import org.apache.sling.hc.api.execution.HealthCheckExecutionResult;
 import org.apache.sling.hc.api.execution.HealthCheckExecutor;
+import org.apache.sling.hc.util.FormattingResultLog;
 import org.apache.sling.hc.util.HealthCheckFilter;
 import org.apache.sling.hc.util.HealthCheckMetadata;
 import org.osgi.framework.BundleContext;
@@ -201,10 +205,6 @@ public class HealthCheckExecutorImpl imp
         return createResultsForDescriptor(metadata);
     }
 
-    private List<HealthCheckExecutionResult> execute(final ServiceReference[] healthCheckReferences) {
-        return execute(healthCheckReferences, new HealthCheckExecutionOptions());
-    }
-
     /**
      * Execute a set of health checks
      */
@@ -259,13 +259,12 @@ public class HealthCheckExecutorImpl imp
     }
 
     private HealthCheckExecutionResult createResultsForDescriptor(final HealthCheckMetadata metadata) {
-        // -- All methods below check if they can transform a healthCheckDescriptor into a result
-        // -- if yes the descriptor is removed from the list and the result added
+        // create result for a single descriptor
 
         // reuse cached results where possible
         HealthCheckExecutionResult result;
 
-        result = healthCheckResultCache.useValidCacheResults(metadata, resultCacheTtlInMs);
+        result = healthCheckResultCache.getValidCacheResult(metadata, resultCacheTtlInMs);
 
         if ( result == null ) {
             final HealthCheckFuture future;
@@ -405,37 +404,51 @@ public class HealthCheckExecutorImpl imp
     HealthCheckExecutionResult collectResultFromFuture(final HealthCheckFuture future) {
 
         HealthCheckExecutionResult result;
+        HealthCheckMetadata hcMetadata = future.getHealthCheckMetadata();
         if (future.isDone()) {
-            logger.debug("Health Check is done: {}", future.getHealthCheckMetadata());
+            logger.debug("Health Check is done: {}", hcMetadata);
 
             try {
                 result = future.get();
             } catch (final Exception e) {
                 logger.warn("Unexpected Exception during future.get(): " + e, e);
                 long futureElapsedTimeMs = new Date().getTime() - future.getCreatedTime().getTime();
-                result = new ExecutionResult(future.getHealthCheckMetadata(), Result.Status.HEALTH_CHECK_ERROR,
+                result = new ExecutionResult(hcMetadata, Result.Status.HEALTH_CHECK_ERROR,
                         "Unexpected Exception during future.get(): " + e, futureElapsedTimeMs, false);
             }
 
         } else {
-            logger.debug("Health Check timed out: {}", future.getHealthCheckMetadata());
-            // Futures must not be cancelled as interrupting a health check might could cause a corrupted repository index
-            // (CrxRoundtripCheck) or ugly messages/stack traces in the log file
+            logger.debug("Health Check timed out: {}", hcMetadata);
+            // Futures must not be cancelled as interrupting a health check might leave the system in invalid state 
+            // (worst case could be a corrupted repository index if using write operations)
 
             // normally we turn the check into WARN (normal timeout), but if the threshold time for CRITICAL is reached for a certain
             // future we turn the result CRITICAL
             long futureElapsedTimeMs = new Date().getTime() - future.getCreatedTime().getTime();
+            FormattingResultLog resultLog = new FormattingResultLog();
             if (futureElapsedTimeMs < this.longRunningFutureThresholdForRedMs) {
-                result = new ExecutionResult(future.getHealthCheckMetadata(), Result.Status.WARN,
-                        "Timeout: Check still running after " + msHumanReadable(futureElapsedTimeMs), futureElapsedTimeMs, true);
-
+                resultLog.warn("Timeout: Check still running after " + msHumanReadable(futureElapsedTimeMs));
             } else {
-                result = new ExecutionResult(future.getHealthCheckMetadata(), Result.Status.CRITICAL,
-                        "Timeout: Check still running after " + msHumanReadable(futureElapsedTimeMs)
-                                + " (exceeding the configured threshold for CRITICAL: "
-                                + msHumanReadable(this.longRunningFutureThresholdForRedMs) + ")", futureElapsedTimeMs, true);
+                resultLog.critical("Timeout: Check still running after " + msHumanReadable(futureElapsedTimeMs)
+                        + " (exceeding the configured threshold for CRITICAL: "
+                        + msHumanReadable(this.longRunningFutureThresholdForRedMs) + ")");
             }
+
+            // add logs from previous, cached result if exists (using a 1 year TTL)
+            HealthCheckExecutionResult lastCachedResult = healthCheckResultCache.getValidCacheResult(hcMetadata, 1000 * 60 * 60 * 24 * 365);
+            if (lastCachedResult != null) {
+                DateFormat df = new SimpleDateFormat("HH:mm:ss.SSS");
+                resultLog.info("*** Result log of last execution finished at {} after {} ***",
+                        df.format(lastCachedResult.getFinishedAt()), FormattingResultLog.msHumanReadable(lastCachedResult.getElapsedTimeInMs()));
+                for (ResultLog.Entry entry : lastCachedResult.getHealthCheckResult()) {
+                    resultLog.add(entry);
+                }
+            }
+
+            result = new ExecutionResult(hcMetadata, new Result(resultLog), futureElapsedTimeMs, true);
+
         }
+
         return result;
     }
 

Modified: sling/trunk/bundles/extensions/healthcheck/core/src/main/java/org/apache/sling/hc/core/impl/executor/HealthCheckResultCache.java
URL: http://svn.apache.org/viewvc/sling/trunk/bundles/extensions/healthcheck/core/src/main/java/org/apache/sling/hc/core/impl/executor/HealthCheckResultCache.java?rev=1732261&r1=1732260&r2=1732261&view=diff
==============================================================================
--- sling/trunk/bundles/extensions/healthcheck/core/src/main/java/org/apache/sling/hc/core/impl/executor/HealthCheckResultCache.java (original)
+++ sling/trunk/bundles/extensions/healthcheck/core/src/main/java/org/apache/sling/hc/core/impl/executor/HealthCheckResultCache.java Thu Feb 25 08:48:39 2016
@@ -64,7 +64,7 @@ public class HealthCheckResultCache {
         final Iterator<HealthCheckMetadata> checksIt = metadatas.iterator();
         while (checksIt.hasNext()) {
             final HealthCheckMetadata md = checksIt.next();
-            final HealthCheckExecutionResult result = useValidCacheResults(md, resultCacheTtlInMs);
+            final HealthCheckExecutionResult result = getValidCacheResult(md, resultCacheTtlInMs);
             if (result != null) {
                 cachedResults.add(result);
                 checksIt.remove();
@@ -77,7 +77,7 @@ public class HealthCheckResultCache {
     /**
      * Return the cached result if it's still valid.
      */
-    public HealthCheckExecutionResult useValidCacheResults(final HealthCheckMetadata metadata,
+    public HealthCheckExecutionResult getValidCacheResult(final HealthCheckMetadata metadata,
             final long resultCacheTtlInMs) {
         return get(metadata, resultCacheTtlInMs);
     }
@@ -101,7 +101,7 @@ public class HealthCheckResultCache {
                 return cachedResult;
             } else {
                 logger.debug("Outdated result: validUntil={} cachedResult={}", validUntil, cachedResult);
-                cache.remove(key);
+                // not removing result for key as out-dated results are shown for timed out checks if available
             }
         }