You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@geode.apache.org by ja...@apache.org on 2018/01/31 19:45:46 UTC
[geode] branch develop updated: [GEODE-4371] Add lurker process
that captures callstacks. (#1337)
This is an automated email from the ASF dual-hosted git repository.
jasonhuynh pushed a commit to branch develop
in repository https://gitbox.apache.org/repos/asf/geode.git
The following commit(s) were added to refs/heads/develop by this push:
new adfd511 [GEODE-4371] Add lurker process that captures callstacks. (#1337)
adfd511 is described below
commit adfd511b5e36f949c80e45925427e104e4f925f2
Author: Sean Goller <se...@goller.net>
AuthorDate: Wed Jan 31 11:45:42 2018 -0800
[GEODE-4371] Add lurker process that captures callstacks. (#1337)
* Add callstacks capture script that waits until an hour before the job
times out then captures all callstacks and writes them out to the output
container.
Signed-off-by: Owen Nichols <on...@pivotal.io>
---
ci/pipelines/develop.yml | 11 ++++---
ci/scripts/capture-call-stacks.sh | 61 +++++++++++++++++++++++++++++++++++++++
ci/scripts/test-run.sh | 3 ++
3 files changed, 71 insertions(+), 4 deletions(-)
diff --git a/ci/pipelines/develop.yml b/ci/pipelines/develop.yml
index b0ac22c..fb9593b 100644
--- a/ci/pipelines/develop.yml
+++ b/ci/pipelines/develop.yml
@@ -142,7 +142,7 @@ jobs:
- task: runtests
tags: [large]
privileged: true
- timeout: 7h
+ timeout: 8h
config:
inputs:
- name: geode
@@ -159,6 +159,7 @@ jobs:
PUBLIC_BUCKET: ((!public-bucket))
PARALLEL_DUNIT: true
DUNIT_PARALLEL_FORKS: 7
+ CALL_STACK_TIMEOUT: 25200
run:
args:
- distributedTest
@@ -207,7 +208,7 @@ jobs:
rootfs: true
- task: runtests
privileged: true
- timeout: 7h
+ timeout: 8h
config:
inputs:
- name: geode
@@ -224,6 +225,7 @@ jobs:
PUBLIC_BUCKET: ((!public-bucket))
PARALLEL_DUNIT: true
DUNIT_PARALLEL_FORKS: 1
+ CALL_STACK_TIMEOUT: 25200
run:
args:
- :geode-assembly:acceptanceTest
@@ -272,7 +274,7 @@ jobs:
rootfs: true
- task: runtests
privileged: true
- timeout: 7h
+ timeout: 8h
config:
inputs:
- name: geode
@@ -289,6 +291,7 @@ jobs:
PUBLIC_BUCKET: ((!public-bucket))
PARALLEL_DUNIT: true
DUNIT_PARALLEL_FORKS: 1
+ CALL_STACK_TIMEOUT: 25200
run:
args:
- integrationTest
@@ -337,7 +340,7 @@ jobs:
rootfs: true
- task: runtests
privileged: true
- timeout: 7h
+ timeout: 8h
config:
inputs:
- name: geode
diff --git a/ci/scripts/capture-call-stacks.sh b/ci/scripts/capture-call-stacks.sh
new file mode 100755
index 0000000..a7d1dcf
--- /dev/null
+++ b/ci/scripts/capture-call-stacks.sh
@@ -0,0 +1,61 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+export TERM=${TERM:-dumb}
+export PAGER=cat
+export BUILDROOT=$(pwd)
+export DEST_DIR=${BUILDROOT}/built-geode
+export GEODE_BUILD=${DEST_DIR}/test
+export CALLSTACKS_DIR=${GEODE_BUILD}/callstacks
+
+#SLEEP_TIME is in seconds
+SLEEP_TIME=${1}
+COUNT=3
+STACK_INTERVAL=5
+
+
+mkdir -p ${CALLSTACKS_DIR}
+
+sleep ${SLEEP_TIME}
+
+echo "Capturing call stacks"
+for (( h=0; h<${COUNT}; h++)); do
+ today=`date +%Y-%m-%d-%H-%M-%S`
+ logfile=${CALLSTACKS_DIR}/callstacks-${today}.txt
+ mapfile -t containers < <(docker ps --format '{{.Names}}')
+
+ for (( i=0; i<${#containers[@]}; i++ )); do
+ echo "Container: ${containers[i]}" | tee -a ${logfile};
+ mapfile -t processes < <(docker exec ${containers[i]} jps | grep ChildVM | cut -d ' ' -f 1)
+ echo "Got past processes."
+ for ((j=0; j<${#processes[@]}; j++ )); do
+ echo "********* Dumping stack for process ${processes[j]}:" | tee -a ${logfile}
+ docker exec ${containers[i]} jstack -l ${processes[j]} >> ${logfile}
+ done
+ done
+ sleep ${STACK_INTERVAL}
+done
+
+echo "Checking progress files:"
+mapfile -t progressfiles < <(find /tmp/gemfire-build -name test-progress.txt)
+for (( i=0; i<${#progressfiles[@]}; i++)); do
+ echo "Checking progress file: ${progressfiles[i]}"
+ /usr/local/bin/dunit-progress hang ${progressfiles[i]} | tee -a ${CALLSTACKS_DIR}/dunit-hangs.txt
+done
diff --git a/ci/scripts/test-run.sh b/ci/scripts/test-run.sh
index 447fcc6..774ed6c 100755
--- a/ci/scripts/test-run.sh
+++ b/ci/scripts/test-run.sh
@@ -104,6 +104,9 @@ fi
DEFAULT_GRADLE_TASK_OPTIONS="--no-daemon -x javadoc -x spotlessCheck"
mkdir -p ${GEODE_BUILD}
+if [ -v CALL_STACK_TIMEOUT ]; then
+ geode-ci/ci/scripts/capture-call-stacks.sh ${CALL_STACK_TIMEOUT} &
+fi
pushd geode
tar cf - * | (cd ${GEODE_BUILD}; tar xpf -)
--
To stop receiving notification emails like this one, please contact
jasonhuynh@apache.org.