You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by "Josh McKenzie (Jira)" <ji...@apache.org> on 2022/01/26 18:07:00 UTC

[jira] [Created] (CASSANDRA-17305) Test Failure: dtest-upgrade.upgrade_tests.upgrade_through_versions_test.TestProtoV3Upgrade_AllVersions_RandomPartitioner_EndsAt_3_11_X_HEAD.test_parallel_upgrade

Josh McKenzie created CASSANDRA-17305:
-----------------------------------------

             Summary: Test Failure: dtest-upgrade.upgrade_tests.upgrade_through_versions_test.TestProtoV3Upgrade_AllVersions_RandomPartitioner_EndsAt_3_11_X_HEAD.test_parallel_upgrade
                 Key: CASSANDRA-17305
                 URL: https://issues.apache.org/jira/browse/CASSANDRA-17305
             Project: Cassandra
          Issue Type: Bug
          Components: Test/dtest/python
            Reporter: Josh McKenzie


Failed 2 times in the last 29 runs. Flakiness: 7%, Stability: 93%
Error Message
Failed: Timeout >900.0s

{code}
Stacktrace
self = <upgrade_tests.upgrade_through_versions_test.TestProtoV3Upgrade_AllVersions_RandomPartitioner_EndsAt_3_11_X_HEAD object at 0x7f19858d59d0>

    def test_parallel_upgrade(self):
        """
            Test upgrading cluster all at once (requires cluster downtime).
            """
>       self.upgrade_scenario()

upgrade_tests/upgrade_through_versions_test.py:313: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
upgrade_tests/upgrade_through_versions_test.py:435: in upgrade_scenario
    cluster.stop()
../venv/lib/python3.8/site-packages/ccmlib/cluster.py:576: in stop
    if not node.stop(wait=wait, signal_event=signal_event, **kwargs):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <ccmlib.node.Node object at 0x7f1985bba040>, wait = True
wait_other_notice = False, signal_event = <Signals.SIGTERM: 15>, kwargs = {}
still_running = True, wait_time_sec = 16, i = 4

    def stop(self, wait=True, wait_other_notice=False, signal_event=signal.SIGTERM, **kwargs):
        """
            Stop the node.
              - wait: if True (the default), wait for the Cassandra process to be
                really dead. Otherwise return after having sent the kill signal.
              - wait_other_notice: return only when the other live nodes of the
                cluster have marked this node has dead.
              - signal_event: Signal event to send to Cassandra; default is to
                let Cassandra clean up and shut down properly (SIGTERM [15])
              - Optional:
                 + gently: Let Cassandra clean up and shut down properly; unless
                           false perform a 'kill -9' which shuts down faster.
            """
        if self.is_running():
            if wait_other_notice:
                marks = [(node, node.mark_log()) for node in list(self.cluster.nodes.values()) if node.is_live() and node is not self]
    
            if common.is_win():
                # Just taskkill the instance, don't bother trying to shut it down gracefully.
                # Node recovery should prevent data loss from hard shutdown.
                # We have recurring issues with nodes not stopping / releasing files in the CI
                # environment so it makes more sense just to murder it hard since there's
                # really little downside.
    
                # We want the node to flush its data before shutdown as some tests rely on small writes being present.
                # The default Periodic sync at 10 ms may not have flushed data yet, causing tests to fail.
                # This is not a hard requirement, however, so we swallow any exceptions this may throw and kill anyway.
                if signal_event is signal.SIGTERM:
                    try:
                        self.flush()
                    except:
                        common.warning("Failed to flush node: {0} on shutdown.".format(self.name))
                        pass
    
                os.system("taskkill /F /PID " + str(self.pid))
                if self._find_pid_on_windows():
                    common.warning("Failed to terminate node: {0} with pid: {1}".format(self.name, self.pid))
            else:
                # Determine if the signal event should be updated to keep API compatibility
                if 'gently' in kwargs and kwargs['gently'] is False:
                    signal_event = signal.SIGKILL
    
                os.kill(self.pid, signal_event)
    
            if wait_other_notice:
                for node, mark in marks:
                    node.watch_log_for_death(self, from_mark=mark)
            else:
                time.sleep(.1)
    
            still_running = self.is_running()
            if still_running and wait:
                wait_time_sec = 1
                for i in xrange(0, 7):
                    # we'll double the wait time each try and cassandra should
                    # not take more than 1 minute to shutdown
>                   time.sleep(wait_time_sec)
E                   Failed: Timeout >900.0s

../venv/lib/python3.8/site-packages/ccmlib/node.py:970: Failed
{code}



--
This message was sent by Atlassian Jira
(v8.20.1#820001)

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@cassandra.apache.org
For additional commands, e-mail: commits-help@cassandra.apache.org