You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sling.apache.org by ro...@apache.org on 2017/10/20 14:39:16 UTC

[sling-org-apache-sling-hc-support] 28/30: SLING-6698 thread usage HC initial commit

This is an automated email from the ASF dual-hosted git repository.

rombert pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sling-org-apache-sling-hc-support.git

commit fed88f433f0a040e237173659d028fd8cb6bb4aa
Author: Georg Henzler <gh...@apache.org>
AuthorDate: Thu Mar 23 06:41:05 2017 +0000

    SLING-6698 thread usage HC initial commit
    
    git-svn-id: https://svn.apache.org/repos/asf/sling/trunk@1788179 13f79535-47bb-0310-9956-ffa450edef68
---
 pom.xml                                            |   6 +
 .../hc/support/impl/ThreadUsageHealthCheck.java    | 192 +++++++++++++++++++++
 .../support/impl/ThreadUsageHealthCheckTest.java   | 119 +++++++++++++
 3 files changed, 317 insertions(+)

diff --git a/pom.xml b/pom.xml
index 1f73c4e..51bc2ff 100644
--- a/pom.xml
+++ b/pom.xml
@@ -78,6 +78,12 @@
         </dependency>
         <dependency>
             <groupId>org.apache.sling</groupId>
+            <artifactId>org.apache.sling.hc.annotations</artifactId>
+            <version>1.0.4</version>
+        </dependency>
+        
+        <dependency>
+            <groupId>org.apache.sling</groupId>
             <artifactId>org.apache.sling.commons.osgi</artifactId>
             <version>2.2.0</version>
             <scope>provided</scope>
diff --git a/src/main/java/org/apache/sling/hc/support/impl/ThreadUsageHealthCheck.java b/src/main/java/org/apache/sling/hc/support/impl/ThreadUsageHealthCheck.java
new file mode 100644
index 0000000..fbea827
--- /dev/null
+++ b/src/main/java/org/apache/sling/hc/support/impl/ThreadUsageHealthCheck.java
@@ -0,0 +1,192 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The SF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ */
+package org.apache.sling.hc.support.impl;
+
+import java.lang.management.ManagementFactory;
+import java.lang.management.ThreadInfo;
+import java.lang.management.ThreadMXBean;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Dictionary;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.felix.scr.annotations.Activate;
+import org.apache.felix.scr.annotations.ConfigurationPolicy;
+import org.apache.felix.scr.annotations.Property;
+import org.apache.sling.commons.osgi.PropertiesUtil;
+import org.apache.sling.hc.annotations.SlingHealthCheck;
+import org.apache.sling.hc.api.HealthCheck;
+import org.apache.sling.hc.api.Result;
+import org.apache.sling.hc.util.FormattingResultLog;
+import org.osgi.service.component.ComponentContext;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Uses ThreadMXBean to show (and potentially WARN about) information about thread usage. Use deadlock detection provided by ThreadMXBean to send CRITICAL. 
+ */
+@SlingHealthCheck(name = ThreadUsageHealthCheck.HC_NAME, label = "Apache Sling "
+        + ThreadUsageHealthCheck.HC_NAME, description = "Checks time threads are using ThreadMXBean", tags = { "resources",
+                "threads" }, configurationPolicy = ConfigurationPolicy.REQUIRE)
+public class ThreadUsageHealthCheck implements HealthCheck {
+    private static final Logger LOG = LoggerFactory.getLogger(ThreadUsageHealthCheck.class);
+
+    public static final String HC_NAME = "Thread Usage Health Check";
+
+    private static final long DEFAULT_SAMPLE_PERIOD_IN_MS = 200;
+
+    static final String PROP_SAMPLE_PERIOD_IN_MS = "samplePeriodInMs";
+    @Property(name = PROP_SAMPLE_PERIOD_IN_MS, label = "Sample Period", description = "Period this HC uses to measure", longValue = DEFAULT_SAMPLE_PERIOD_IN_MS)
+    private long samplePeriodInMs;
+
+    static final String PROP_CPU_TIME_THRESHOLD_WARN = "cpuTimeThresholdWarn";
+    @Property(name = PROP_CPU_TIME_THRESHOLD_WARN, label = "CPU Time Threshold for WARN in %", description = "Will WARN once this threshold is reached. Has to be configured according to cores of the system (e.g. use 400% for 4 cores)")
+    private long cpuTimeThresholdWarn;
+    
+    private int availableProcessors;
+    private boolean cpuTimeThresholdWarnIsConfigured;
+
+    @Activate
+    protected final void activate(final ComponentContext componentContext) {
+        final Dictionary<String, Object> properties = componentContext.getProperties();
+        this.samplePeriodInMs = PropertiesUtil.toLong(properties.get(PROP_SAMPLE_PERIOD_IN_MS), DEFAULT_SAMPLE_PERIOD_IN_MS);
+
+        this.availableProcessors = Runtime.getRuntime().availableProcessors();
+        long defaultThresholdWarn = availableProcessors * 90; // 100% is rarely reached, 90% means a very busy system
+        this.cpuTimeThresholdWarn = PropertiesUtil.toLong(properties.get(PROP_CPU_TIME_THRESHOLD_WARN), defaultThresholdWarn);
+        this.cpuTimeThresholdWarnIsConfigured = properties.get(PROP_CPU_TIME_THRESHOLD_WARN) !=null;
+
+    }
+
+    @Override
+    public Result execute() {
+        FormattingResultLog log = new FormattingResultLog();
+
+        log.debug("Checking threads for exceeding {}% CPU time within time period of {}ms", cpuTimeThresholdWarn, samplePeriodInMs);
+        
+        try {
+            ThreadMXBean threadMxBean = ManagementFactory.getThreadMXBean();
+
+            List<ThreadTimeInfo> threadTimeInfos = collectThreadTimeInfos(log, threadMxBean);
+
+            Collections.sort(threadTimeInfos);
+
+            float totalCpuTimePercentage = 0;
+            for (int i = 0; i < threadTimeInfos.size(); i++) {
+
+                ThreadTimeInfo threadInfo = threadTimeInfos.get(i);
+                float cpuTimePercentage = ((float) threadInfo.getCpuTime() / ((float) samplePeriodInMs * 1000000)) * 100f;
+                totalCpuTimePercentage +=cpuTimePercentage;
+
+                String msg = String.format("%4.1f", cpuTimePercentage) + "% used by thread \"" + threadInfo.name + "\"";
+                if (i<3  || (i<10 && cpuTimePercentage > 15)) {
+                    log.info(msg);
+                } else {
+                    log.debug(msg);
+                }
+            }
+            log.info(threadTimeInfos.size() + " threads using "+String.format("%4.1f", totalCpuTimePercentage)+"% CPU Time");
+            boolean isHighCpuTime = totalCpuTimePercentage > cpuTimeThresholdWarn;
+            if(isHighCpuTime) {
+                log.warn("Threads are consuming significant CPU time "+  ( cpuTimeThresholdWarnIsConfigured ? 
+                        "(more than configured " + cpuTimeThresholdWarn + "% within " + samplePeriodInMs + "ms)" 
+                        : "(more than "+availableProcessors+" cores * 90% = "+cpuTimeThresholdWarn + "%)"));
+            }
+
+            checkForDeadlock(log, threadMxBean);
+
+        } catch (Exception e) {
+            LOG.error("Could not analyse thread usage "+e, e);
+            log.healthCheckError("Could not analyse thread usage", e);
+        }
+      
+        return new Result(log);
+
+    }
+
+    List<ThreadTimeInfo> collectThreadTimeInfos(FormattingResultLog log, ThreadMXBean threadMxBean) {
+        
+        
+        Map<Long, ThreadTimeInfo> threadTimeInfos = new HashMap<Long, ThreadTimeInfo>();
+
+        long[] allThreadIds = threadMxBean.getAllThreadIds();
+        for (long threadId : allThreadIds) {
+            ThreadTimeInfo threadTimeInfo = new ThreadTimeInfo();
+            long threadCpuTimeStart = threadMxBean.getThreadCpuTime(threadId);
+            threadTimeInfo.start = threadCpuTimeStart;
+            ThreadInfo threadInfo = threadMxBean.getThreadInfo(threadId);
+            threadTimeInfo.name = threadInfo!=null?threadInfo.getThreadName():"Thread id "+threadId+" (name not resolvable)";
+            threadTimeInfos.put(threadId, threadTimeInfo);
+        }
+
+        try {
+            Thread.sleep(samplePeriodInMs);
+        } catch (InterruptedException e) {
+            log.warn("Could not sleep configured samplePeriodInMs={} to gather thread load", samplePeriodInMs);
+        }
+
+        for (long threadId : allThreadIds) {
+            ThreadTimeInfo threadTimeInfo = threadTimeInfos.get(threadId);
+            if (threadTimeInfo == null) {
+                continue;
+            }
+            long threadCpuTimeStop = threadMxBean.getThreadCpuTime(threadId);
+            threadTimeInfo.stop = threadCpuTimeStop;
+        }
+        
+        List<ThreadTimeInfo> threads = new ArrayList<ThreadTimeInfo>(threadTimeInfos.values());
+
+        return threads;
+    }
+    
+
+    void checkForDeadlock(FormattingResultLog log, ThreadMXBean threadMxBean) {
+        long[] findDeadlockedThreads = threadMxBean.findDeadlockedThreads();
+        if (findDeadlockedThreads != null) {
+            for (long threadId : findDeadlockedThreads) {
+                log.critical("Thread " + threadMxBean.getThreadInfo(threadId).getThreadName() + " is DEADLOCKED");
+            }
+        }
+    }
+
+    static class ThreadTimeInfo implements Comparable<ThreadTimeInfo> {
+        long start;
+        long stop;
+        String name;
+
+        long getCpuTime() {
+            long cpuTime = stop - start;
+            if(cpuTime < 0) {
+                cpuTime = 0;
+            }
+            return cpuTime;
+        }
+
+        @Override
+        public int compareTo(ThreadTimeInfo otherThreadTimeInfo) {
+            if (otherThreadTimeInfo == null) {
+                return -1;
+            }
+            return (int) (otherThreadTimeInfo.getCpuTime() - this.getCpuTime());
+        }
+
+    }
+
+}
\ No newline at end of file
diff --git a/src/test/java/org/apache/sling/hc/support/impl/ThreadUsageHealthCheckTest.java b/src/test/java/org/apache/sling/hc/support/impl/ThreadUsageHealthCheckTest.java
new file mode 100644
index 0000000..75816ed
--- /dev/null
+++ b/src/test/java/org/apache/sling/hc/support/impl/ThreadUsageHealthCheckTest.java
@@ -0,0 +1,119 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The SF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ */
+package org.apache.sling.hc.support.impl;
+
+import static org.junit.Assert.assertEquals;
+import static org.mockito.Matchers.any;
+import static org.mockito.Mockito.doNothing;
+import static org.mockito.Mockito.doReturn;
+import static org.mockito.MockitoAnnotations.initMocks;
+
+import java.lang.management.ThreadMXBean;
+import java.util.ArrayList;
+import java.util.Date;
+import java.util.Dictionary;
+import java.util.List;
+
+import org.apache.sling.hc.api.Result;
+import org.apache.sling.hc.util.FormattingResultLog;
+import org.junit.Before;
+import org.junit.Test;
+import org.mockito.Mock;
+import org.mockito.Spy;
+import org.osgi.service.component.ComponentContext;
+
+import org.apache.sling.hc.support.impl.ThreadUsageHealthCheck.ThreadTimeInfo;
+
+public class ThreadUsageHealthCheckTest {
+
+    @Spy
+    ThreadUsageHealthCheck threadsHealthCheck;
+
+    @Mock
+    ComponentContext componentContext;
+
+    @Mock
+    Dictionary<String, Object> componentConfig;
+
+    @Before
+    public void setup() {
+        initMocks(this);
+
+        doReturn(componentConfig).when(componentContext).getProperties();
+        doNothing().when(threadsHealthCheck).checkForDeadlock(any(FormattingResultLog.class), any(ThreadMXBean.class));
+
+    }
+
+    @Test
+    public void testThreadActivityWithinThreshold() {
+
+        long samplePeriod = 200L;
+        int processorsAvailable = 4;
+        int busyThreads = 3;
+
+        List<ThreadTimeInfo> resultListOverloaded = getFullLoadThreadInfos(samplePeriod, busyThreads);
+
+        setupExpectations(samplePeriod, processorsAvailable, resultListOverloaded);
+
+        Result result = threadsHealthCheck.execute();
+
+        assertEquals(Result.Status.OK, result.getStatus());
+
+    }
+
+    @Test
+    public void testThreadActivityOverloaded() {
+
+        long samplePeriod = 200L;
+        int processorsAvailable = 4;
+        int busyThreads = 5;
+
+        List<ThreadTimeInfo> resultListOverloaded = getFullLoadThreadInfos(samplePeriod, busyThreads);
+
+        setupExpectations(samplePeriod, processorsAvailable, resultListOverloaded);
+
+        Result result = threadsHealthCheck.execute();
+
+        assertEquals(Result.Status.WARN, result.getStatus());
+
+    }
+
+    private List<ThreadTimeInfo> getFullLoadThreadInfos(long samplePeriod, int count) {
+        List<ThreadTimeInfo> resultList = new ArrayList<ThreadTimeInfo>();
+        for (int i = 0; i < count; i++) {
+            resultList.add(getThreadTimeInfo((long) (samplePeriod * .95)));
+        }
+        return resultList;
+    }
+
+    private void setupExpectations(long samplePeriod, int processorsAvailable, List<ThreadTimeInfo> resultListOk) {
+        doReturn(processorsAvailable * 90).when(componentConfig).get(ThreadUsageHealthCheck.PROP_CPU_TIME_THRESHOLD_WARN);
+        doReturn(samplePeriod).when(componentConfig).get(ThreadUsageHealthCheck.PROP_SAMPLE_PERIOD_IN_MS);
+        threadsHealthCheck.activate(componentContext);
+        doReturn(resultListOk).when(threadsHealthCheck).collectThreadTimeInfos(any(FormattingResultLog.class), any(ThreadMXBean.class));
+    }
+
+    private ThreadUsageHealthCheck.ThreadTimeInfo getThreadTimeInfo(long diffInMs) {
+        ThreadUsageHealthCheck.ThreadTimeInfo threadTimeInfo = new ThreadUsageHealthCheck.ThreadTimeInfo();
+        threadTimeInfo.start = new Date().getTime();
+        threadTimeInfo.stop = threadTimeInfo.start + (diffInMs * 1000000);
+        threadTimeInfo.name = "Unit Test Thread";
+        return threadTimeInfo;
+    }
+
+}

-- 
To stop receiving notification emails like this one, please contact
"commits@sling.apache.org" <co...@sling.apache.org>.