You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by bu...@apache.org on 2018/08/17 05:05:38 UTC

[1/4] hbase git commit: HBASE-20387 ADDENDUM backport findHangingTests.py changes from master to branches-1.

Repository: hbase
Updated Branches:
  refs/heads/branch-1 18840e951 -> 8716ac256
  refs/heads/branch-1.2 9037405d7 -> 2676d498f
  refs/heads/branch-1.3 0c0c723e2 -> d95e66424
  refs/heads/branch-1.4 a71231651 -> 9f78a1dd6


HBASE-20387 ADDENDUM backport findHangingTests.py changes from master to branches-1.


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/8716ac25
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/8716ac25
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/8716ac25

Branch: refs/heads/branch-1
Commit: 8716ac2568966c1a193dcc98b567008f2292d537
Parents: 18840e9
Author: Sean Busbey <bu...@apache.org>
Authored: Thu Aug 16 23:55:28 2018 -0500
Committer: Sean Busbey <bu...@apache.org>
Committed: Thu Aug 16 23:55:28 2018 -0500

----------------------------------------------------------------------
 dev-support/flaky-tests/findHangingTests.py | 159 ++++++++++++++---------
 1 file changed, 96 insertions(+), 63 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/8716ac25/dev-support/flaky-tests/findHangingTests.py
----------------------------------------------------------------------
diff --git a/dev-support/flaky-tests/findHangingTests.py b/dev-support/flaky-tests/findHangingTests.py
old mode 100644
new mode 100755
index deccc8b..328516e
--- a/dev-support/flaky-tests/findHangingTests.py
+++ b/dev-support/flaky-tests/findHangingTests.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 ##
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
@@ -15,68 +15,101 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-##
-# script to find hanging test from Jenkins build output
+
+# pylint: disable=invalid-name
+# To disable 'invalid constant name' warnings.
+
+"""
+# Script to find hanging test from Jenkins build output
 # usage: ./findHangingTests.py <url of Jenkins build console>
-#
-import urllib2
+"""
+
+import re
 import sys
-import string
-if len(sys.argv) != 2 :
-  print "ERROR : Provide the jenkins job console URL as the only argument."
-  exit(1)
-print "Fetching " + sys.argv[1]
-response = urllib2.urlopen(sys.argv[1])
-i = 0;
-tests = {}
-failed_tests = {}
-summary = 0
-host = False
-patch = False
-branch = False
-while True:
-  n = response.readline()
-  if n == "" :
-    break
-  if not host and n.find("Building remotely on") >= 0:
-    host = True
-    print n.strip()    
-    continue
-  if not patch and n.find("Testing patch for ") >= 0:
-    patch = True
-    print n.strip()    
-    continue
-  if not branch and n.find("Testing patch on branch ") >= 0:
-    branch = True
-    print n.strip()    
-    continue
-  if n.find("PATCH APPLICATION FAILED") >= 0:
-    print "PATCH APPLICATION FAILED"
-    sys.exit(1) 
-  if summary == 0 and n.find("Running tests.") >= 0:
-    summary = summary + 1
-    continue
-  if summary == 1 and n.find("[INFO] Reactor Summary:") >= 0:
-    summary = summary + 1
-    continue
-  if summary == 2 and n.find("[INFO] Apache HBase ") >= 0:
-    sys.stdout.write(n)
-    continue
-  if n.find("org.apache.hadoop.hbase") < 0:
-    continue 
-  test_name = string.strip(n[n.find("org.apache.hadoop.hbase"):len(n)])
-  if n.find("Running org.apache.hadoop.hbase") > -1 :
-    tests[test_name] = False
-  if n.find("Tests run:") > -1 :
-    if n.find("FAILURE") > -1 or n.find("ERROR") > -1:
-      failed_tests[test_name] = True
-    tests[test_name] = True
-response.close()
+import requests
+
+# If any of these strings appear in the console output, it's a build one should probably ignore
+# for analyzing failed/hanging tests.
+BAD_RUN_STRINGS = [
+    "Slave went offline during the build",  # Machine went down, can't do anything about it.
+    "The forked VM terminated without properly saying goodbye",  # JVM crashed.
+]
+
+
+def get_bad_tests(console_url):
+    """
+    Returns [[all tests], [failed tests], [timeout tests], [hanging tests]] if successfully gets
+    the build information.
+    If there is error getting console text or if there are blacklisted strings in console text,
+    then returns None.
+    """
+    response = requests.get(console_url)
+    if response.status_code != 200:
+        print "Error getting consoleText. Response = {} {}".format(
+            response.status_code, response.reason)
+        return
+
+    # All tests: All testcases which were run.
+    # Hanging test: A testcase which started but never finished.
+    # Failed test: Testcase which encountered any kind of failure. It can be failing atomic tests,
+    #   timed out tests, etc
+    # Timeout test: A Testcase which encountered timeout. Naturally, all timeout tests will be
+    #   included in failed tests.
+    all_tests_set = set()
+    hanging_tests_set = set()
+    failed_tests_set = set()
+    timeout_tests_set = set()
+    for line in response.content.splitlines():
+        result1 = re.findall("Running org.apache.hadoop.hbase.(.*)", line)
+        if len(result1) == 1:
+            test_case = result1[0]
+            if test_case in all_tests_set:
+                print ("ERROR! Multiple tests with same name '{}'. Might get wrong results "
+                       "for this test.".format(test_case))
+            else:
+                hanging_tests_set.add(test_case)
+                all_tests_set.add(test_case)
+        result2 = re.findall("Tests run:.*?- in org.apache.hadoop.hbase.(.*)", line)
+        if len(result2) == 1:
+            test_case = result2[0]
+            if "FAILURE!" in line:
+                failed_tests_set.add(test_case)
+            if test_case not in hanging_tests_set:
+                print ("ERROR! No test '{}' found in hanging_tests. Might get wrong results "
+                       "for this test. This may also happen if maven is set to retry failing "
+                       "tests.".format(test_case))
+            else:
+                hanging_tests_set.remove(test_case)
+        result3 = re.match("^\\s+(\\w*).*\\sTestTimedOut", line)
+        if result3:
+            test_case = result3.group(1)
+            timeout_tests_set.add(test_case)
+        for bad_string in BAD_RUN_STRINGS:
+            if re.match(".*" + bad_string + ".*", line):
+                print "Bad string found in build:\n > {}".format(line)
+    print "Result > total tests: {:4}   failed : {:4}  timedout : {:4}  hanging : {:4}".format(
+        len(all_tests_set), len(failed_tests_set), len(timeout_tests_set), len(hanging_tests_set))
+    return [all_tests_set, failed_tests_set, timeout_tests_set, hanging_tests_set]
+
+if __name__ == "__main__":
+    if len(sys.argv) != 2:
+        print "ERROR : Provide the jenkins job console URL as the only argument."
+        sys.exit(1)
+
+    print "Fetching {}".format(sys.argv[1])
+    result = get_bad_tests(sys.argv[1])
+    if not result:
+        sys.exit(1)
+    [all_tests, failed_tests, timedout_tests, hanging_tests] = result
+
+    print "Found {} hanging tests:".format(len(hanging_tests))
+    for test in hanging_tests:
+        print test
+    print "\n"
+    print "Found {} failed tests of which {} timed out:".format(
+        len(failed_tests), len(timedout_tests))
+    for test in failed_tests:
+        print "{0} {1}".format(test, ("(Timed Out)" if test in timedout_tests else ""))
 
-print "Printing hanging tests"
-for key, value in tests.iteritems():
-  if value == False:
-    print "Hanging test : " + key
-print "Printing Failing tests"
-for key, value in failed_tests.iteritems():
-  print "Failing test : " + key
+    print ("\nA test may have had 0 or more atomic test failures before it timed out. So a "
+           "'Timed Out' test may have other errors too.")


[3/4] hbase git commit: HBASE-20387 ADDENDUM backport findHangingTests.py changes from master to branches-1.

Posted by bu...@apache.org.
HBASE-20387 ADDENDUM backport findHangingTests.py changes from master to branches-1.


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/d95e6642
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/d95e6642
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/d95e6642

Branch: refs/heads/branch-1.3
Commit: d95e664245b886da97f8ea3d0fbd080c37ef9db1
Parents: 0c0c723
Author: Sean Busbey <bu...@apache.org>
Authored: Thu Aug 16 23:55:28 2018 -0500
Committer: Sean Busbey <bu...@apache.org>
Committed: Fri Aug 17 00:02:29 2018 -0500

----------------------------------------------------------------------
 dev-support/flaky-tests/findHangingTests.py | 159 ++++++++++++++---------
 1 file changed, 96 insertions(+), 63 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/d95e6642/dev-support/flaky-tests/findHangingTests.py
----------------------------------------------------------------------
diff --git a/dev-support/flaky-tests/findHangingTests.py b/dev-support/flaky-tests/findHangingTests.py
old mode 100644
new mode 100755
index deccc8b..328516e
--- a/dev-support/flaky-tests/findHangingTests.py
+++ b/dev-support/flaky-tests/findHangingTests.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 ##
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
@@ -15,68 +15,101 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-##
-# script to find hanging test from Jenkins build output
+
+# pylint: disable=invalid-name
+# To disable 'invalid constant name' warnings.
+
+"""
+# Script to find hanging test from Jenkins build output
 # usage: ./findHangingTests.py <url of Jenkins build console>
-#
-import urllib2
+"""
+
+import re
 import sys
-import string
-if len(sys.argv) != 2 :
-  print "ERROR : Provide the jenkins job console URL as the only argument."
-  exit(1)
-print "Fetching " + sys.argv[1]
-response = urllib2.urlopen(sys.argv[1])
-i = 0;
-tests = {}
-failed_tests = {}
-summary = 0
-host = False
-patch = False
-branch = False
-while True:
-  n = response.readline()
-  if n == "" :
-    break
-  if not host and n.find("Building remotely on") >= 0:
-    host = True
-    print n.strip()    
-    continue
-  if not patch and n.find("Testing patch for ") >= 0:
-    patch = True
-    print n.strip()    
-    continue
-  if not branch and n.find("Testing patch on branch ") >= 0:
-    branch = True
-    print n.strip()    
-    continue
-  if n.find("PATCH APPLICATION FAILED") >= 0:
-    print "PATCH APPLICATION FAILED"
-    sys.exit(1) 
-  if summary == 0 and n.find("Running tests.") >= 0:
-    summary = summary + 1
-    continue
-  if summary == 1 and n.find("[INFO] Reactor Summary:") >= 0:
-    summary = summary + 1
-    continue
-  if summary == 2 and n.find("[INFO] Apache HBase ") >= 0:
-    sys.stdout.write(n)
-    continue
-  if n.find("org.apache.hadoop.hbase") < 0:
-    continue 
-  test_name = string.strip(n[n.find("org.apache.hadoop.hbase"):len(n)])
-  if n.find("Running org.apache.hadoop.hbase") > -1 :
-    tests[test_name] = False
-  if n.find("Tests run:") > -1 :
-    if n.find("FAILURE") > -1 or n.find("ERROR") > -1:
-      failed_tests[test_name] = True
-    tests[test_name] = True
-response.close()
+import requests
+
+# If any of these strings appear in the console output, it's a build one should probably ignore
+# for analyzing failed/hanging tests.
+BAD_RUN_STRINGS = [
+    "Slave went offline during the build",  # Machine went down, can't do anything about it.
+    "The forked VM terminated without properly saying goodbye",  # JVM crashed.
+]
+
+
+def get_bad_tests(console_url):
+    """
+    Returns [[all tests], [failed tests], [timeout tests], [hanging tests]] if successfully gets
+    the build information.
+    If there is error getting console text or if there are blacklisted strings in console text,
+    then returns None.
+    """
+    response = requests.get(console_url)
+    if response.status_code != 200:
+        print "Error getting consoleText. Response = {} {}".format(
+            response.status_code, response.reason)
+        return
+
+    # All tests: All testcases which were run.
+    # Hanging test: A testcase which started but never finished.
+    # Failed test: Testcase which encountered any kind of failure. It can be failing atomic tests,
+    #   timed out tests, etc
+    # Timeout test: A Testcase which encountered timeout. Naturally, all timeout tests will be
+    #   included in failed tests.
+    all_tests_set = set()
+    hanging_tests_set = set()
+    failed_tests_set = set()
+    timeout_tests_set = set()
+    for line in response.content.splitlines():
+        result1 = re.findall("Running org.apache.hadoop.hbase.(.*)", line)
+        if len(result1) == 1:
+            test_case = result1[0]
+            if test_case in all_tests_set:
+                print ("ERROR! Multiple tests with same name '{}'. Might get wrong results "
+                       "for this test.".format(test_case))
+            else:
+                hanging_tests_set.add(test_case)
+                all_tests_set.add(test_case)
+        result2 = re.findall("Tests run:.*?- in org.apache.hadoop.hbase.(.*)", line)
+        if len(result2) == 1:
+            test_case = result2[0]
+            if "FAILURE!" in line:
+                failed_tests_set.add(test_case)
+            if test_case not in hanging_tests_set:
+                print ("ERROR! No test '{}' found in hanging_tests. Might get wrong results "
+                       "for this test. This may also happen if maven is set to retry failing "
+                       "tests.".format(test_case))
+            else:
+                hanging_tests_set.remove(test_case)
+        result3 = re.match("^\\s+(\\w*).*\\sTestTimedOut", line)
+        if result3:
+            test_case = result3.group(1)
+            timeout_tests_set.add(test_case)
+        for bad_string in BAD_RUN_STRINGS:
+            if re.match(".*" + bad_string + ".*", line):
+                print "Bad string found in build:\n > {}".format(line)
+    print "Result > total tests: {:4}   failed : {:4}  timedout : {:4}  hanging : {:4}".format(
+        len(all_tests_set), len(failed_tests_set), len(timeout_tests_set), len(hanging_tests_set))
+    return [all_tests_set, failed_tests_set, timeout_tests_set, hanging_tests_set]
+
+if __name__ == "__main__":
+    if len(sys.argv) != 2:
+        print "ERROR : Provide the jenkins job console URL as the only argument."
+        sys.exit(1)
+
+    print "Fetching {}".format(sys.argv[1])
+    result = get_bad_tests(sys.argv[1])
+    if not result:
+        sys.exit(1)
+    [all_tests, failed_tests, timedout_tests, hanging_tests] = result
+
+    print "Found {} hanging tests:".format(len(hanging_tests))
+    for test in hanging_tests:
+        print test
+    print "\n"
+    print "Found {} failed tests of which {} timed out:".format(
+        len(failed_tests), len(timedout_tests))
+    for test in failed_tests:
+        print "{0} {1}".format(test, ("(Timed Out)" if test in timedout_tests else ""))
 
-print "Printing hanging tests"
-for key, value in tests.iteritems():
-  if value == False:
-    print "Hanging test : " + key
-print "Printing Failing tests"
-for key, value in failed_tests.iteritems():
-  print "Failing test : " + key
+    print ("\nA test may have had 0 or more atomic test failures before it timed out. So a "
+           "'Timed Out' test may have other errors too.")


[4/4] hbase git commit: HBASE-20387 ADDENDUM backport findHangingTests.py changes from master to branches-1.

Posted by bu...@apache.org.
HBASE-20387 ADDENDUM backport findHangingTests.py changes from master to branches-1.


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/2676d498
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/2676d498
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/2676d498

Branch: refs/heads/branch-1.2
Commit: 2676d498f5b515dfb7cb6b08d3458480b1b6bfbc
Parents: 9037405
Author: Sean Busbey <bu...@apache.org>
Authored: Thu Aug 16 23:55:28 2018 -0500
Committer: Sean Busbey <bu...@apache.org>
Committed: Fri Aug 17 00:04:05 2018 -0500

----------------------------------------------------------------------
 dev-support/flaky-tests/findHangingTests.py | 159 ++++++++++++++---------
 1 file changed, 96 insertions(+), 63 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/2676d498/dev-support/flaky-tests/findHangingTests.py
----------------------------------------------------------------------
diff --git a/dev-support/flaky-tests/findHangingTests.py b/dev-support/flaky-tests/findHangingTests.py
old mode 100644
new mode 100755
index deccc8b..328516e
--- a/dev-support/flaky-tests/findHangingTests.py
+++ b/dev-support/flaky-tests/findHangingTests.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 ##
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
@@ -15,68 +15,101 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-##
-# script to find hanging test from Jenkins build output
+
+# pylint: disable=invalid-name
+# To disable 'invalid constant name' warnings.
+
+"""
+# Script to find hanging test from Jenkins build output
 # usage: ./findHangingTests.py <url of Jenkins build console>
-#
-import urllib2
+"""
+
+import re
 import sys
-import string
-if len(sys.argv) != 2 :
-  print "ERROR : Provide the jenkins job console URL as the only argument."
-  exit(1)
-print "Fetching " + sys.argv[1]
-response = urllib2.urlopen(sys.argv[1])
-i = 0;
-tests = {}
-failed_tests = {}
-summary = 0
-host = False
-patch = False
-branch = False
-while True:
-  n = response.readline()
-  if n == "" :
-    break
-  if not host and n.find("Building remotely on") >= 0:
-    host = True
-    print n.strip()    
-    continue
-  if not patch and n.find("Testing patch for ") >= 0:
-    patch = True
-    print n.strip()    
-    continue
-  if not branch and n.find("Testing patch on branch ") >= 0:
-    branch = True
-    print n.strip()    
-    continue
-  if n.find("PATCH APPLICATION FAILED") >= 0:
-    print "PATCH APPLICATION FAILED"
-    sys.exit(1) 
-  if summary == 0 and n.find("Running tests.") >= 0:
-    summary = summary + 1
-    continue
-  if summary == 1 and n.find("[INFO] Reactor Summary:") >= 0:
-    summary = summary + 1
-    continue
-  if summary == 2 and n.find("[INFO] Apache HBase ") >= 0:
-    sys.stdout.write(n)
-    continue
-  if n.find("org.apache.hadoop.hbase") < 0:
-    continue 
-  test_name = string.strip(n[n.find("org.apache.hadoop.hbase"):len(n)])
-  if n.find("Running org.apache.hadoop.hbase") > -1 :
-    tests[test_name] = False
-  if n.find("Tests run:") > -1 :
-    if n.find("FAILURE") > -1 or n.find("ERROR") > -1:
-      failed_tests[test_name] = True
-    tests[test_name] = True
-response.close()
+import requests
+
+# If any of these strings appear in the console output, it's a build one should probably ignore
+# for analyzing failed/hanging tests.
+BAD_RUN_STRINGS = [
+    "Slave went offline during the build",  # Machine went down, can't do anything about it.
+    "The forked VM terminated without properly saying goodbye",  # JVM crashed.
+]
+
+
+def get_bad_tests(console_url):
+    """
+    Returns [[all tests], [failed tests], [timeout tests], [hanging tests]] if successfully gets
+    the build information.
+    If there is error getting console text or if there are blacklisted strings in console text,
+    then returns None.
+    """
+    response = requests.get(console_url)
+    if response.status_code != 200:
+        print "Error getting consoleText. Response = {} {}".format(
+            response.status_code, response.reason)
+        return
+
+    # All tests: All testcases which were run.
+    # Hanging test: A testcase which started but never finished.
+    # Failed test: Testcase which encountered any kind of failure. It can be failing atomic tests,
+    #   timed out tests, etc
+    # Timeout test: A Testcase which encountered timeout. Naturally, all timeout tests will be
+    #   included in failed tests.
+    all_tests_set = set()
+    hanging_tests_set = set()
+    failed_tests_set = set()
+    timeout_tests_set = set()
+    for line in response.content.splitlines():
+        result1 = re.findall("Running org.apache.hadoop.hbase.(.*)", line)
+        if len(result1) == 1:
+            test_case = result1[0]
+            if test_case in all_tests_set:
+                print ("ERROR! Multiple tests with same name '{}'. Might get wrong results "
+                       "for this test.".format(test_case))
+            else:
+                hanging_tests_set.add(test_case)
+                all_tests_set.add(test_case)
+        result2 = re.findall("Tests run:.*?- in org.apache.hadoop.hbase.(.*)", line)
+        if len(result2) == 1:
+            test_case = result2[0]
+            if "FAILURE!" in line:
+                failed_tests_set.add(test_case)
+            if test_case not in hanging_tests_set:
+                print ("ERROR! No test '{}' found in hanging_tests. Might get wrong results "
+                       "for this test. This may also happen if maven is set to retry failing "
+                       "tests.".format(test_case))
+            else:
+                hanging_tests_set.remove(test_case)
+        result3 = re.match("^\\s+(\\w*).*\\sTestTimedOut", line)
+        if result3:
+            test_case = result3.group(1)
+            timeout_tests_set.add(test_case)
+        for bad_string in BAD_RUN_STRINGS:
+            if re.match(".*" + bad_string + ".*", line):
+                print "Bad string found in build:\n > {}".format(line)
+    print "Result > total tests: {:4}   failed : {:4}  timedout : {:4}  hanging : {:4}".format(
+        len(all_tests_set), len(failed_tests_set), len(timeout_tests_set), len(hanging_tests_set))
+    return [all_tests_set, failed_tests_set, timeout_tests_set, hanging_tests_set]
+
+if __name__ == "__main__":
+    if len(sys.argv) != 2:
+        print "ERROR : Provide the jenkins job console URL as the only argument."
+        sys.exit(1)
+
+    print "Fetching {}".format(sys.argv[1])
+    result = get_bad_tests(sys.argv[1])
+    if not result:
+        sys.exit(1)
+    [all_tests, failed_tests, timedout_tests, hanging_tests] = result
+
+    print "Found {} hanging tests:".format(len(hanging_tests))
+    for test in hanging_tests:
+        print test
+    print "\n"
+    print "Found {} failed tests of which {} timed out:".format(
+        len(failed_tests), len(timedout_tests))
+    for test in failed_tests:
+        print "{0} {1}".format(test, ("(Timed Out)" if test in timedout_tests else ""))
 
-print "Printing hanging tests"
-for key, value in tests.iteritems():
-  if value == False:
-    print "Hanging test : " + key
-print "Printing Failing tests"
-for key, value in failed_tests.iteritems():
-  print "Failing test : " + key
+    print ("\nA test may have had 0 or more atomic test failures before it timed out. So a "
+           "'Timed Out' test may have other errors too.")


[2/4] hbase git commit: HBASE-20387 ADDENDUM backport findHangingTests.py changes from master to branches-1.

Posted by bu...@apache.org.
HBASE-20387 ADDENDUM backport findHangingTests.py changes from master to branches-1.


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/9f78a1dd
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/9f78a1dd
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/9f78a1dd

Branch: refs/heads/branch-1.4
Commit: 9f78a1dd638069c62dce77a5a6dee1d977bbe4d9
Parents: a712316
Author: Sean Busbey <bu...@apache.org>
Authored: Thu Aug 16 23:55:28 2018 -0500
Committer: Sean Busbey <bu...@apache.org>
Committed: Fri Aug 17 00:00:17 2018 -0500

----------------------------------------------------------------------
 dev-support/flaky-tests/findHangingTests.py | 159 ++++++++++++++---------
 1 file changed, 96 insertions(+), 63 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/9f78a1dd/dev-support/flaky-tests/findHangingTests.py
----------------------------------------------------------------------
diff --git a/dev-support/flaky-tests/findHangingTests.py b/dev-support/flaky-tests/findHangingTests.py
old mode 100644
new mode 100755
index deccc8b..328516e
--- a/dev-support/flaky-tests/findHangingTests.py
+++ b/dev-support/flaky-tests/findHangingTests.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 ##
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
@@ -15,68 +15,101 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-##
-# script to find hanging test from Jenkins build output
+
+# pylint: disable=invalid-name
+# To disable 'invalid constant name' warnings.
+
+"""
+# Script to find hanging test from Jenkins build output
 # usage: ./findHangingTests.py <url of Jenkins build console>
-#
-import urllib2
+"""
+
+import re
 import sys
-import string
-if len(sys.argv) != 2 :
-  print "ERROR : Provide the jenkins job console URL as the only argument."
-  exit(1)
-print "Fetching " + sys.argv[1]
-response = urllib2.urlopen(sys.argv[1])
-i = 0;
-tests = {}
-failed_tests = {}
-summary = 0
-host = False
-patch = False
-branch = False
-while True:
-  n = response.readline()
-  if n == "" :
-    break
-  if not host and n.find("Building remotely on") >= 0:
-    host = True
-    print n.strip()    
-    continue
-  if not patch and n.find("Testing patch for ") >= 0:
-    patch = True
-    print n.strip()    
-    continue
-  if not branch and n.find("Testing patch on branch ") >= 0:
-    branch = True
-    print n.strip()    
-    continue
-  if n.find("PATCH APPLICATION FAILED") >= 0:
-    print "PATCH APPLICATION FAILED"
-    sys.exit(1) 
-  if summary == 0 and n.find("Running tests.") >= 0:
-    summary = summary + 1
-    continue
-  if summary == 1 and n.find("[INFO] Reactor Summary:") >= 0:
-    summary = summary + 1
-    continue
-  if summary == 2 and n.find("[INFO] Apache HBase ") >= 0:
-    sys.stdout.write(n)
-    continue
-  if n.find("org.apache.hadoop.hbase") < 0:
-    continue 
-  test_name = string.strip(n[n.find("org.apache.hadoop.hbase"):len(n)])
-  if n.find("Running org.apache.hadoop.hbase") > -1 :
-    tests[test_name] = False
-  if n.find("Tests run:") > -1 :
-    if n.find("FAILURE") > -1 or n.find("ERROR") > -1:
-      failed_tests[test_name] = True
-    tests[test_name] = True
-response.close()
+import requests
+
+# If any of these strings appear in the console output, it's a build one should probably ignore
+# for analyzing failed/hanging tests.
+BAD_RUN_STRINGS = [
+    "Slave went offline during the build",  # Machine went down, can't do anything about it.
+    "The forked VM terminated without properly saying goodbye",  # JVM crashed.
+]
+
+
+def get_bad_tests(console_url):
+    """
+    Returns [[all tests], [failed tests], [timeout tests], [hanging tests]] if successfully gets
+    the build information.
+    If there is error getting console text or if there are blacklisted strings in console text,
+    then returns None.
+    """
+    response = requests.get(console_url)
+    if response.status_code != 200:
+        print "Error getting consoleText. Response = {} {}".format(
+            response.status_code, response.reason)
+        return
+
+    # All tests: All testcases which were run.
+    # Hanging test: A testcase which started but never finished.
+    # Failed test: Testcase which encountered any kind of failure. It can be failing atomic tests,
+    #   timed out tests, etc
+    # Timeout test: A Testcase which encountered timeout. Naturally, all timeout tests will be
+    #   included in failed tests.
+    all_tests_set = set()
+    hanging_tests_set = set()
+    failed_tests_set = set()
+    timeout_tests_set = set()
+    for line in response.content.splitlines():
+        result1 = re.findall("Running org.apache.hadoop.hbase.(.*)", line)
+        if len(result1) == 1:
+            test_case = result1[0]
+            if test_case in all_tests_set:
+                print ("ERROR! Multiple tests with same name '{}'. Might get wrong results "
+                       "for this test.".format(test_case))
+            else:
+                hanging_tests_set.add(test_case)
+                all_tests_set.add(test_case)
+        result2 = re.findall("Tests run:.*?- in org.apache.hadoop.hbase.(.*)", line)
+        if len(result2) == 1:
+            test_case = result2[0]
+            if "FAILURE!" in line:
+                failed_tests_set.add(test_case)
+            if test_case not in hanging_tests_set:
+                print ("ERROR! No test '{}' found in hanging_tests. Might get wrong results "
+                       "for this test. This may also happen if maven is set to retry failing "
+                       "tests.".format(test_case))
+            else:
+                hanging_tests_set.remove(test_case)
+        result3 = re.match("^\\s+(\\w*).*\\sTestTimedOut", line)
+        if result3:
+            test_case = result3.group(1)
+            timeout_tests_set.add(test_case)
+        for bad_string in BAD_RUN_STRINGS:
+            if re.match(".*" + bad_string + ".*", line):
+                print "Bad string found in build:\n > {}".format(line)
+    print "Result > total tests: {:4}   failed : {:4}  timedout : {:4}  hanging : {:4}".format(
+        len(all_tests_set), len(failed_tests_set), len(timeout_tests_set), len(hanging_tests_set))
+    return [all_tests_set, failed_tests_set, timeout_tests_set, hanging_tests_set]
+
+if __name__ == "__main__":
+    if len(sys.argv) != 2:
+        print "ERROR : Provide the jenkins job console URL as the only argument."
+        sys.exit(1)
+
+    print "Fetching {}".format(sys.argv[1])
+    result = get_bad_tests(sys.argv[1])
+    if not result:
+        sys.exit(1)
+    [all_tests, failed_tests, timedout_tests, hanging_tests] = result
+
+    print "Found {} hanging tests:".format(len(hanging_tests))
+    for test in hanging_tests:
+        print test
+    print "\n"
+    print "Found {} failed tests of which {} timed out:".format(
+        len(failed_tests), len(timedout_tests))
+    for test in failed_tests:
+        print "{0} {1}".format(test, ("(Timed Out)" if test in timedout_tests else ""))
 
-print "Printing hanging tests"
-for key, value in tests.iteritems():
-  if value == False:
-    print "Hanging test : " + key
-print "Printing Failing tests"
-for key, value in failed_tests.iteritems():
-  print "Failing test : " + key
+    print ("\nA test may have had 0 or more atomic test failures before it timed out. So a "
+           "'Timed Out' test may have other errors too.")