You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by "Ekaterina Dimitrova (Jira)" <ji...@apache.org> on 2022/09/14 14:59:00 UTC

[jira] [Updated] (CASSANDRA-17891) Fix hintedhandoff_test.py::TestHintedHandoff::test_hintedhandoff_window

     [ https://issues.apache.org/jira/browse/CASSANDRA-17891?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Ekaterina Dimitrova updated CASSANDRA-17891:
--------------------------------------------
    Description: 
hintedhandoff_test.py::TestHintedHandoff::test_hintedhandoff_window is flaky

 
{code:java}
self = <hintedhandoff_test.TestHintedHandoff object at 0x7f2d903b8630>

    @since('4.1')
    def test_hintedhandoff_window(self):
        """
            Test that we only store at a maximum the hint window worth of hints.
            Prior to CASSANDRA-14309 we would store another window worth of hints
            if the down node was brought up and then taken back down immediately.
            We would also store another window of hints on a live node if the live
            node was restarted.
            @jira_ticket CASSANDRA-14309
            """
    
        # hint_window_persistent_enabled is set to true by default
        self.cluster.set_configuration_options({'max_hint_window_in_ms': 10000,
                                                'hinted_handoff_enabled': True,
                                                'max_hints_delivery_threads': 1,
                                                'hints_flush_period_in_ms': 100, })
        self.cluster.populate(2).start()
        node1, node2 = self.cluster.nodelist()
        session = self.patient_cql_connection(node1)
        create_ks(session, 'ks', 2)
        create_c1c2_table(self, session)
    
        # Stop handoff until very end and take node2 down for first round of hints
        node1.nodetool('pausehandoff')
    
        node2.nodetool('disablebinary')
        node2.watch_log_for(["Stop listening for CQL clients"], timeout=120)
    
        node2.nodetool('disablegossip')
        node2.watch_log_for(["Announcing shutdown", "state jump to shutdown"], timeout=120)
        node1.watch_log_for(["state jump to shutdown"], timeout=120)
    
        log_mark_node_1 = node1.mark_log()
        log_mark_node_2 = node2.mark_log()
    
        # First round of hints. We expect these to be replayed and the only
        # hints within the window
        insert_c1c2(session, n=(0, 100), consistency=ConsistencyLevel.ONE)
    
        # Let hint window pass
        time.sleep(15)
    
        # Re-enable and disable the node. Prior to CASSANDRA-14215 this should make the hint window on node1 reset.
        node2.nodetool('enablegossip')
        node2.watch_log_for(["state jump to NORMAL"], timeout=120, from_mark=log_mark_node_2)
        node1.watch_log_for(["state jump to NORMAL"], timeout=120, from_mark=log_mark_node_1)
    
        log_mark_node_1 = node1.mark_log()
        log_mark_node_2 = node2.mark_log()
    
        node2.nodetool('disablegossip')
    
        node2.watch_log_for(["Announcing shutdown", "state jump to shutdown"], timeout=120, from_mark=log_mark_node_2)
        node1.watch_log_for(["state jump to shutdown"], timeout=120, from_mark=log_mark_node_1)
    
        log_mark_node_1 = node1.mark_log()
        log_mark_node_2 = node2.mark_log()
    
        def endpoint_downtime(node_to_query, node):
            mbean = make_mbean('net', type='Gossiper')
            with JolokiaAgent(node_to_query) as jmx:
                return jmx.execute_method(mbean, 'getEndpointDowntime(java.lang.String)', [node])
    
        while endpoint_downtime(node1, "127.0.0.2") <= 5000:
            time.sleep(1)
    
        # Second round of inserts. We do not expect hints to be stored.
        insert_c1c2(session, n=(100, 200), consistency=ConsistencyLevel.ONE)
    
        # Restart node1. Prior to CASSANDRA-14215 this would reset node1's hint window.
        node1.stop()
        node1.start(wait_for_binary_proto=True, wait_other_notice=False)
        session = self.patient_exclusive_cql_connection(node1)
        session.execute('USE ks')
        # Third round of inserts. We do not expect hints to be stored.
        insert_c1c2(session, n=(200, 300), consistency=ConsistencyLevel.ONE)
    
        # Enable node2 and wait for hints to be replayed
        node2.nodetool('enablegossip')
        node2.watch_log_for(["state jump to NORMAL"], timeout=120, from_mark=log_mark_node_2)
    
        node2.nodetool('enablebinary')
        node2.watch_log_for(["Starting listening for CQL clients"], timeout=120, from_mark=log_mark_node_2)
        node1.nodetool('resumehandoff')
        node1.watch_log_for('Finished hinted handoff')
        # Stop node1 so that we only query node2
        node1.stop()
    
        session = self.patient_exclusive_cql_connection(node2)
        session.execute('USE ks')
        # Ensure first dataset is present (through replayed hints)
        for x in range(0, 100):
            query_c1c2(session, x, ConsistencyLevel.ONE)
    
        # Ensure second and third datasets are not present
        for x in range(100, 300):
>           query_c1c2(session, x, ConsistencyLevel.ONE, tolerate_missing=True, must_be_missing=True)

hintedhandoff_test.py:303: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
tools/data.py:47: in query_c1c2
    assertions.assert_length_equal(rows, 0)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

object_with_length = [Row(c1='value1', c2='value2')], expected_length = 0

    def assert_length_equal(object_with_length, expected_length):
        """
        Assert an object has a specific length.
        @param object_with_length The object whose length will be checked
        @param expected_length The expected length of the object
    
        Examples:
        assert_length_equal(res, nb_counter)
        """
        assert len(object_with_length) == expected_length, \
            "Expected {} to have length {}, but instead is of length {}"\
>           .format(object_with_length, expected_length, len(object_with_length))
E       AssertionError: Expected [Row(c1='value1', c2='value2')] to have length 0, but instead is of length 1

tools/assertions.py:269: AssertionError
{code}


 

  was:hintedhandoff_test.py::TestHintedHandoff::test_hintedhandoff_window is flaky


> Fix hintedhandoff_test.py::TestHintedHandoff::test_hintedhandoff_window
> -----------------------------------------------------------------------
>
>                 Key: CASSANDRA-17891
>                 URL: https://issues.apache.org/jira/browse/CASSANDRA-17891
>             Project: Cassandra
>          Issue Type: Bug
>            Reporter: Ekaterina Dimitrova
>            Priority: Normal
>
> hintedhandoff_test.py::TestHintedHandoff::test_hintedhandoff_window is flaky
>  
> {code:java}
> self = <hintedhandoff_test.TestHintedHandoff object at 0x7f2d903b8630>
>     @since('4.1')
>     def test_hintedhandoff_window(self):
>         """
>             Test that we only store at a maximum the hint window worth of hints.
>             Prior to CASSANDRA-14309 we would store another window worth of hints
>             if the down node was brought up and then taken back down immediately.
>             We would also store another window of hints on a live node if the live
>             node was restarted.
>             @jira_ticket CASSANDRA-14309
>             """
>     
>         # hint_window_persistent_enabled is set to true by default
>         self.cluster.set_configuration_options({'max_hint_window_in_ms': 10000,
>                                                 'hinted_handoff_enabled': True,
>                                                 'max_hints_delivery_threads': 1,
>                                                 'hints_flush_period_in_ms': 100, })
>         self.cluster.populate(2).start()
>         node1, node2 = self.cluster.nodelist()
>         session = self.patient_cql_connection(node1)
>         create_ks(session, 'ks', 2)
>         create_c1c2_table(self, session)
>     
>         # Stop handoff until very end and take node2 down for first round of hints
>         node1.nodetool('pausehandoff')
>     
>         node2.nodetool('disablebinary')
>         node2.watch_log_for(["Stop listening for CQL clients"], timeout=120)
>     
>         node2.nodetool('disablegossip')
>         node2.watch_log_for(["Announcing shutdown", "state jump to shutdown"], timeout=120)
>         node1.watch_log_for(["state jump to shutdown"], timeout=120)
>     
>         log_mark_node_1 = node1.mark_log()
>         log_mark_node_2 = node2.mark_log()
>     
>         # First round of hints. We expect these to be replayed and the only
>         # hints within the window
>         insert_c1c2(session, n=(0, 100), consistency=ConsistencyLevel.ONE)
>     
>         # Let hint window pass
>         time.sleep(15)
>     
>         # Re-enable and disable the node. Prior to CASSANDRA-14215 this should make the hint window on node1 reset.
>         node2.nodetool('enablegossip')
>         node2.watch_log_for(["state jump to NORMAL"], timeout=120, from_mark=log_mark_node_2)
>         node1.watch_log_for(["state jump to NORMAL"], timeout=120, from_mark=log_mark_node_1)
>     
>         log_mark_node_1 = node1.mark_log()
>         log_mark_node_2 = node2.mark_log()
>     
>         node2.nodetool('disablegossip')
>     
>         node2.watch_log_for(["Announcing shutdown", "state jump to shutdown"], timeout=120, from_mark=log_mark_node_2)
>         node1.watch_log_for(["state jump to shutdown"], timeout=120, from_mark=log_mark_node_1)
>     
>         log_mark_node_1 = node1.mark_log()
>         log_mark_node_2 = node2.mark_log()
>     
>         def endpoint_downtime(node_to_query, node):
>             mbean = make_mbean('net', type='Gossiper')
>             with JolokiaAgent(node_to_query) as jmx:
>                 return jmx.execute_method(mbean, 'getEndpointDowntime(java.lang.String)', [node])
>     
>         while endpoint_downtime(node1, "127.0.0.2") <= 5000:
>             time.sleep(1)
>     
>         # Second round of inserts. We do not expect hints to be stored.
>         insert_c1c2(session, n=(100, 200), consistency=ConsistencyLevel.ONE)
>     
>         # Restart node1. Prior to CASSANDRA-14215 this would reset node1's hint window.
>         node1.stop()
>         node1.start(wait_for_binary_proto=True, wait_other_notice=False)
>         session = self.patient_exclusive_cql_connection(node1)
>         session.execute('USE ks')
>         # Third round of inserts. We do not expect hints to be stored.
>         insert_c1c2(session, n=(200, 300), consistency=ConsistencyLevel.ONE)
>     
>         # Enable node2 and wait for hints to be replayed
>         node2.nodetool('enablegossip')
>         node2.watch_log_for(["state jump to NORMAL"], timeout=120, from_mark=log_mark_node_2)
>     
>         node2.nodetool('enablebinary')
>         node2.watch_log_for(["Starting listening for CQL clients"], timeout=120, from_mark=log_mark_node_2)
>         node1.nodetool('resumehandoff')
>         node1.watch_log_for('Finished hinted handoff')
>         # Stop node1 so that we only query node2
>         node1.stop()
>     
>         session = self.patient_exclusive_cql_connection(node2)
>         session.execute('USE ks')
>         # Ensure first dataset is present (through replayed hints)
>         for x in range(0, 100):
>             query_c1c2(session, x, ConsistencyLevel.ONE)
>     
>         # Ensure second and third datasets are not present
>         for x in range(100, 300):
> >           query_c1c2(session, x, ConsistencyLevel.ONE, tolerate_missing=True, must_be_missing=True)
> hintedhandoff_test.py:303: 
> _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
> tools/data.py:47: in query_c1c2
>     assertions.assert_length_equal(rows, 0)
> _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
> object_with_length = [Row(c1='value1', c2='value2')], expected_length = 0
>     def assert_length_equal(object_with_length, expected_length):
>         """
>         Assert an object has a specific length.
>         @param object_with_length The object whose length will be checked
>         @param expected_length The expected length of the object
>     
>         Examples:
>         assert_length_equal(res, nb_counter)
>         """
>         assert len(object_with_length) == expected_length, \
>             "Expected {} to have length {}, but instead is of length {}"\
> >           .format(object_with_length, expected_length, len(object_with_length))
> E       AssertionError: Expected [Row(c1='value1', c2='value2')] to have length 0, but instead is of length 1
> tools/assertions.py:269: AssertionError
> {code}
>  



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@cassandra.apache.org
For additional commands, e-mail: commits-help@cassandra.apache.org