You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@geode.apache.org by ja...@apache.org on 2018/01/31 19:45:46 UTC

[geode] branch develop updated: [GEODE-4371] Add lurker process that captures callstacks. (#1337)

This is an automated email from the ASF dual-hosted git repository.

jasonhuynh pushed a commit to branch develop
in repository https://gitbox.apache.org/repos/asf/geode.git


The following commit(s) were added to refs/heads/develop by this push:
     new adfd511  [GEODE-4371] Add lurker process that captures callstacks. (#1337)
adfd511 is described below

commit adfd511b5e36f949c80e45925427e104e4f925f2
Author: Sean Goller <se...@goller.net>
AuthorDate: Wed Jan 31 11:45:42 2018 -0800

    [GEODE-4371] Add lurker process that captures callstacks. (#1337)
    
    * Add callstacks capture script that waits until an hour before the job
     times out then captures all callstacks and writes them out to the output
     container.
    
    Signed-off-by: Owen Nichols <on...@pivotal.io>
---
 ci/pipelines/develop.yml          | 11 ++++---
 ci/scripts/capture-call-stacks.sh | 61 +++++++++++++++++++++++++++++++++++++++
 ci/scripts/test-run.sh            |  3 ++
 3 files changed, 71 insertions(+), 4 deletions(-)

diff --git a/ci/pipelines/develop.yml b/ci/pipelines/develop.yml
index b0ac22c..fb9593b 100644
--- a/ci/pipelines/develop.yml
+++ b/ci/pipelines/develop.yml
@@ -142,7 +142,7 @@ jobs:
     - task: runtests
       tags: [large]
       privileged: true
-      timeout: 7h
+      timeout: 8h
       config:
         inputs:
           - name: geode
@@ -159,6 +159,7 @@ jobs:
           PUBLIC_BUCKET: ((!public-bucket))
           PARALLEL_DUNIT: true
           DUNIT_PARALLEL_FORKS: 7
+          CALL_STACK_TIMEOUT: 25200
         run:
           args:
           - distributedTest
@@ -207,7 +208,7 @@ jobs:
           rootfs: true
     - task: runtests
       privileged: true
-      timeout: 7h
+      timeout: 8h
       config:
         inputs:
           - name: geode
@@ -224,6 +225,7 @@ jobs:
           PUBLIC_BUCKET: ((!public-bucket))
           PARALLEL_DUNIT: true
           DUNIT_PARALLEL_FORKS: 1
+          CALL_STACK_TIMEOUT: 25200
         run:
           args:
           - :geode-assembly:acceptanceTest
@@ -272,7 +274,7 @@ jobs:
           rootfs: true
     - task: runtests
       privileged: true
-      timeout: 7h
+      timeout: 8h
       config:
         inputs:
           - name: geode
@@ -289,6 +291,7 @@ jobs:
           PUBLIC_BUCKET: ((!public-bucket))
           PARALLEL_DUNIT: true
           DUNIT_PARALLEL_FORKS: 1
+          CALL_STACK_TIMEOUT: 25200
         run:
           args:
           - integrationTest
@@ -337,7 +340,7 @@ jobs:
           rootfs: true
     - task: runtests
       privileged: true
-      timeout: 7h
+      timeout: 8h
       config:
         inputs:
           - name: geode
diff --git a/ci/scripts/capture-call-stacks.sh b/ci/scripts/capture-call-stacks.sh
new file mode 100755
index 0000000..a7d1dcf
--- /dev/null
+++ b/ci/scripts/capture-call-stacks.sh
@@ -0,0 +1,61 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+export TERM=${TERM:-dumb}
+export PAGER=cat
+export BUILDROOT=$(pwd)
+export DEST_DIR=${BUILDROOT}/built-geode
+export GEODE_BUILD=${DEST_DIR}/test
+export CALLSTACKS_DIR=${GEODE_BUILD}/callstacks
+
+#SLEEP_TIME is in seconds
+SLEEP_TIME=${1}
+COUNT=3
+STACK_INTERVAL=5
+
+
+mkdir -p ${CALLSTACKS_DIR}
+
+sleep ${SLEEP_TIME}
+
+echo "Capturing call stacks"
+for (( h=0; h<${COUNT}; h++)); do
+    today=`date +%Y-%m-%d-%H-%M-%S`
+    logfile=${CALLSTACKS_DIR}/callstacks-${today}.txt
+    mapfile -t containers < <(docker ps --format '{{.Names}}')
+
+    for (( i=0; i<${#containers[@]}; i++ )); do
+        echo "Container: ${containers[i]}" | tee -a ${logfile};
+        mapfile -t processes < <(docker exec ${containers[i]} jps | grep ChildVM | cut -d ' ' -f 1)
+        echo "Got past processes."
+        for ((j=0; j<${#processes[@]}; j++ )); do
+              echo "********* Dumping stack for process ${processes[j]}:" | tee -a ${logfile}
+                  docker exec ${containers[i]} jstack -l ${processes[j]} >> ${logfile}
+        done
+    done
+    sleep ${STACK_INTERVAL}
+done
+
+echo "Checking progress files:"
+mapfile -t progressfiles < <(find /tmp/gemfire-build -name test-progress.txt)
+for (( i=0; i<${#progressfiles[@]}; i++)); do
+    echo "Checking progress file: ${progressfiles[i]}"
+    /usr/local/bin/dunit-progress hang ${progressfiles[i]} | tee -a ${CALLSTACKS_DIR}/dunit-hangs.txt
+done
diff --git a/ci/scripts/test-run.sh b/ci/scripts/test-run.sh
index 447fcc6..774ed6c 100755
--- a/ci/scripts/test-run.sh
+++ b/ci/scripts/test-run.sh
@@ -104,6 +104,9 @@ fi
 DEFAULT_GRADLE_TASK_OPTIONS="--no-daemon -x javadoc -x spotlessCheck"
 
 mkdir -p ${GEODE_BUILD}
+if [ -v CALL_STACK_TIMEOUT ]; then
+  geode-ci/ci/scripts/capture-call-stacks.sh  ${CALL_STACK_TIMEOUT} &
+fi
 
 pushd geode
   tar cf - * | (cd ${GEODE_BUILD}; tar xpf -)

-- 
To stop receiving notification emails like this one, please contact
jasonhuynh@apache.org.