You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by "Joseph Lynch (Jira)" <ji...@apache.org> on 2019/09/06 15:33:00 UTC
[jira] [Updated] (CASSANDRA-15307) Fix flakey test_remote_query - cql_test.TestCQLSlowQuery test

     [ https://issues.apache.org/jira/browse/CASSANDRA-15307?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Joseph Lynch updated CASSANDRA-15307:
-------------------------------------
    Fix Version/s: 4.0-alpha

> Fix flakey  test_remote_query - cql_test.TestCQLSlowQuery test
> --------------------------------------------------------------
>
>                 Key: CASSANDRA-15307
>                 URL: https://issues.apache.org/jira/browse/CASSANDRA-15307
>             Project: Cassandra
>          Issue Type: Bug
>          Components: Test/dtest
>            Reporter: Joseph Lynch
>            Priority: Normal
>             Fix For: 4.0-alpha
>
>
> Example failure: [https://circleci.com/gh/jolynch/cassandra/554#tests/containers/61]
>  
> {noformat}
> Your job ran 959 tests with 1 failure
> - test_remote_query cql_test.TestCQLSlowQuerycql_test.py
> ccmlib.node.TimeoutError: 05 Sep 2019 23:05:07 [node2] Missing: ['operations were slow', 'SELECT \\* FROM ks.test2 WHERE id = 1']: DEBUG [BatchlogTasks:1] 2019-09-05 23:04:24,437 Ba..... See debug.log for remainder
> self = <cql_test.TestCQLSlowQuery object at 0x7f4309528898>
>     def test_remote_query(self):
>         """
>             Check that a query running on a node other than the coordinator is reported as slow:
>     
>             - populate the cluster with 2 nodes
>             - start one node without having it join the ring
>             - start the other one node with slow_query_log_timeout_in_ms set to a small value
>               and the read request timeouts set to a large value (to ensure the query is not aborted) and
>               read_iteration_delay set to a value big enough for the query to exceed slow_query_log_timeout_in_ms
>               (this will cause read queries to take longer than the slow query timeout)
>             - CREATE a table
>             - INSERT 5000 rows on a session on the node that is not a member of the ring
>             - run SELECT statements and check that the slow query messages are present in the debug logs
>               (we cannot check the logs at info level because the no spam logger has unpredictable results)
>     
>             @jira_ticket CASSANDRA-12403
>             """
>         cluster = self.cluster
>         cluster.set_configuration_options(values={'slow_query_log_timeout_in_ms': 10,
>                                                   'request_timeout_in_ms': 120000,
>                                                   'read_request_timeout_in_ms': 120000,
>                                                   'range_request_timeout_in_ms': 120000})
>     
>         cluster.populate(2)
>         node1, node2 = cluster.nodelist()
>     
>         node1.start(wait_for_binary_proto=True, join_ring=False)  # ensure other node executes queries
>         node2.start(wait_for_binary_proto=True,
>                     jvm_args=["-Dcassandra.monitoring_report_interval_ms=10",
>                               "-Dcassandra.test.read_iteration_delay_ms=1"])  # see above for explanation
>     
>         session = self.patient_exclusive_cql_connection(node1)
>     
>         create_ks(session, 'ks', 1)
>         session.execute("""
>                 CREATE TABLE test2 (
>                     id int,
>                     col int,
>                     val text,
>                     PRIMARY KEY(id, col)
>                 );
>             """)
>     
>         for i, j in itertools.product(list(range(100)), list(range(10))):
>             session.execute("INSERT INTO test2 (id, col, val) VALUES ({}, {}, 'foo')".format(i, j))
>     
>         # only check debug logs because at INFO level the no-spam logger has unpredictable results
>         mark = node2.mark_log(filename='debug.log')
>         session.execute(SimpleStatement("SELECT * from test2",
>                                         consistency_level=ConsistencyLevel.ONE,
>                                         retry_policy=FallthroughRetryPolicy()))
>         node2.watch_log_for(["operations were slow", "SELECT \* FROM ks.test2"],
>                             from_mark=mark, filename='debug.log', timeout=60)
>     
>     
>         mark = node2.mark_log(filename='debug.log')
>         session.execute(SimpleStatement("SELECT * from test2 where id = 1",
>                                         consistency_level=ConsistencyLevel.ONE,
>                                         retry_policy=FallthroughRetryPolicy()))
>         node2.watch_log_for(["operations were slow", "SELECT \* FROM ks.test2 WHERE id = 1"],
> >                           from_mark=mark, filename='debug.log', timeout=60)
> cql_test.py:1150: 
> _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
> self = <ccmlib.node.Node object at 0x7f430b0cfcc0>
> exprs = ['operations were slow', 'SELECT \\* FROM ks.test2 WHERE id = 1']
> from_mark = 166214, timeout = 60, process = None, verbose = False
> filename = 'debug.log'
>     def watch_log_for(self, exprs, from_mark=None, timeout=600, process=None, verbose=False, filename='system.log'):
>         """
>             Watch the log until one or more (regular) expression are found.
>             This methods when all the expressions have been found or the method
>             timeouts (a TimeoutError is then raised). On successful completion,
>             a list of pair (line matched, match object) is returned.
>             """
>         start = time.time()
>         tofind = [exprs] if isinstance(exprs, string_types) else exprs
>         tofind = [re.compile(e) for e in tofind]
>         matchings = []
>         reads = ""
>         if len(tofind) == 0:
>             return None
>     
>         log_file = os.path.join(self.get_path(), 'logs', filename)
>         output_read = False
>         while not os.path.exists(log_file):
>             time.sleep(.5)
>             if start + timeout < time.time():
>                 raise TimeoutError(time.strftime("%d %b %Y %H:%M:%S", time.gmtime()) + " [" + self.name + "] Timed out waiting for {} to be created.".format(log_file))
>             if process and not output_read:
>                 process.poll()
>                 if process.returncode is not None:
>                     self.print_process_output(self.name, process, verbose)
>                     output_read = True
>                     if process.returncode != 0:
>                         raise RuntimeError()  # Shouldn't reuse RuntimeError but I'm lazy
>     
>         with open(log_file) as f:
>             if from_mark:
>                 f.seek(from_mark)
>     
>             while True:
>                 # First, if we have a process to check, then check it.
>                 # Skip on Windows - stdout/stderr is cassandra.bat
>                 if not common.is_win() and not output_read:
>                     if process:
>                         process.poll()
>                         if process.returncode is not None:
>                             self.print_process_output(self.name, process, verbose)
>                             output_read = True
>                             if process.returncode != 0:
>                                 raise RuntimeError()  # Shouldn't reuse RuntimeError but I'm lazy
>     
>                 line = f.readline()
>                 if line:
>                     reads = reads + line
>                     for e in tofind:
>                         m = e.search(line)
>                         if m:
>                             matchings.append((line, m))
>                             tofind.remove(e)
>                             if len(tofind) == 0:
>                                 return matchings[0] if isinstance(exprs, string_types) else matchings
>                 else:
>                     # yep, it's ugly
>                     time.sleep(1)
>                     if start + timeout < time.time():
> >                       raise TimeoutError(time.strftime("%d %b %Y %H:%M:%S", time.gmtime()) + " [" + self.name + "] Missing: " + str([e.pattern for e in tofind]) + ":\n" + reads[:50] + ".....\nSee {} for remainder".format(filename))
> E                       ccmlib.node.TimeoutError: 05 Sep 2019 23:05:07 [node2] Missing: ['operations were slow', 'SELECT \\* FROM ks.test2 WHERE id = 1']:
> E                       DEBUG [BatchlogTasks:1] 2019-09-05 23:04:24,437 Ba.....
> E                       See debug.log for remainder
> ../env/src/ccm/ccmlib/node.py:536: TimeoutError {noformat}



--
This message was sent by Atlassian Jira
(v8.3.2#803003)

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@cassandra.apache.org
For additional commands, e-mail: commits-help@cassandra.apache.org