You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by "Joseph Lynch (Jira)" <ji...@apache.org> on 2019/09/06 15:33:00 UTC
[jira] [Updated] (CASSANDRA-15307) Fix flakey test_remote_query -
cql_test.TestCQLSlowQuery test
[ https://issues.apache.org/jira/browse/CASSANDRA-15307?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Joseph Lynch updated CASSANDRA-15307:
-------------------------------------
Fix Version/s: 4.0-alpha
> Fix flakey test_remote_query - cql_test.TestCQLSlowQuery test
> --------------------------------------------------------------
>
> Key: CASSANDRA-15307
> URL: https://issues.apache.org/jira/browse/CASSANDRA-15307
> Project: Cassandra
> Issue Type: Bug
> Components: Test/dtest
> Reporter: Joseph Lynch
> Priority: Normal
> Fix For: 4.0-alpha
>
>
> Example failure: [https://circleci.com/gh/jolynch/cassandra/554#tests/containers/61]
>
> {noformat}
> Your job ran 959 tests with 1 failure
> - test_remote_query cql_test.TestCQLSlowQuerycql_test.py
> ccmlib.node.TimeoutError: 05 Sep 2019 23:05:07 [node2] Missing: ['operations were slow', 'SELECT \\* FROM ks.test2 WHERE id = 1']: DEBUG [BatchlogTasks:1] 2019-09-05 23:04:24,437 Ba..... See debug.log for remainder
> self = <cql_test.TestCQLSlowQuery object at 0x7f4309528898>
> def test_remote_query(self):
> """
> Check that a query running on a node other than the coordinator is reported as slow:
>
> - populate the cluster with 2 nodes
> - start one node without having it join the ring
> - start the other one node with slow_query_log_timeout_in_ms set to a small value
> and the read request timeouts set to a large value (to ensure the query is not aborted) and
> read_iteration_delay set to a value big enough for the query to exceed slow_query_log_timeout_in_ms
> (this will cause read queries to take longer than the slow query timeout)
> - CREATE a table
> - INSERT 5000 rows on a session on the node that is not a member of the ring
> - run SELECT statements and check that the slow query messages are present in the debug logs
> (we cannot check the logs at info level because the no spam logger has unpredictable results)
>
> @jira_ticket CASSANDRA-12403
> """
> cluster = self.cluster
> cluster.set_configuration_options(values={'slow_query_log_timeout_in_ms': 10,
> 'request_timeout_in_ms': 120000,
> 'read_request_timeout_in_ms': 120000,
> 'range_request_timeout_in_ms': 120000})
>
> cluster.populate(2)
> node1, node2 = cluster.nodelist()
>
> node1.start(wait_for_binary_proto=True, join_ring=False) # ensure other node executes queries
> node2.start(wait_for_binary_proto=True,
> jvm_args=["-Dcassandra.monitoring_report_interval_ms=10",
> "-Dcassandra.test.read_iteration_delay_ms=1"]) # see above for explanation
>
> session = self.patient_exclusive_cql_connection(node1)
>
> create_ks(session, 'ks', 1)
> session.execute("""
> CREATE TABLE test2 (
> id int,
> col int,
> val text,
> PRIMARY KEY(id, col)
> );
> """)
>
> for i, j in itertools.product(list(range(100)), list(range(10))):
> session.execute("INSERT INTO test2 (id, col, val) VALUES ({}, {}, 'foo')".format(i, j))
>
> # only check debug logs because at INFO level the no-spam logger has unpredictable results
> mark = node2.mark_log(filename='debug.log')
> session.execute(SimpleStatement("SELECT * from test2",
> consistency_level=ConsistencyLevel.ONE,
> retry_policy=FallthroughRetryPolicy()))
> node2.watch_log_for(["operations were slow", "SELECT \* FROM ks.test2"],
> from_mark=mark, filename='debug.log', timeout=60)
>
>
> mark = node2.mark_log(filename='debug.log')
> session.execute(SimpleStatement("SELECT * from test2 where id = 1",
> consistency_level=ConsistencyLevel.ONE,
> retry_policy=FallthroughRetryPolicy()))
> node2.watch_log_for(["operations were slow", "SELECT \* FROM ks.test2 WHERE id = 1"],
> > from_mark=mark, filename='debug.log', timeout=60)
> cql_test.py:1150:
> _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
> self = <ccmlib.node.Node object at 0x7f430b0cfcc0>
> exprs = ['operations were slow', 'SELECT \\* FROM ks.test2 WHERE id = 1']
> from_mark = 166214, timeout = 60, process = None, verbose = False
> filename = 'debug.log'
> def watch_log_for(self, exprs, from_mark=None, timeout=600, process=None, verbose=False, filename='system.log'):
> """
> Watch the log until one or more (regular) expression are found.
> This methods when all the expressions have been found or the method
> timeouts (a TimeoutError is then raised). On successful completion,
> a list of pair (line matched, match object) is returned.
> """
> start = time.time()
> tofind = [exprs] if isinstance(exprs, string_types) else exprs
> tofind = [re.compile(e) for e in tofind]
> matchings = []
> reads = ""
> if len(tofind) == 0:
> return None
>
> log_file = os.path.join(self.get_path(), 'logs', filename)
> output_read = False
> while not os.path.exists(log_file):
> time.sleep(.5)
> if start + timeout < time.time():
> raise TimeoutError(time.strftime("%d %b %Y %H:%M:%S", time.gmtime()) + " [" + self.name + "] Timed out waiting for {} to be created.".format(log_file))
> if process and not output_read:
> process.poll()
> if process.returncode is not None:
> self.print_process_output(self.name, process, verbose)
> output_read = True
> if process.returncode != 0:
> raise RuntimeError() # Shouldn't reuse RuntimeError but I'm lazy
>
> with open(log_file) as f:
> if from_mark:
> f.seek(from_mark)
>
> while True:
> # First, if we have a process to check, then check it.
> # Skip on Windows - stdout/stderr is cassandra.bat
> if not common.is_win() and not output_read:
> if process:
> process.poll()
> if process.returncode is not None:
> self.print_process_output(self.name, process, verbose)
> output_read = True
> if process.returncode != 0:
> raise RuntimeError() # Shouldn't reuse RuntimeError but I'm lazy
>
> line = f.readline()
> if line:
> reads = reads + line
> for e in tofind:
> m = e.search(line)
> if m:
> matchings.append((line, m))
> tofind.remove(e)
> if len(tofind) == 0:
> return matchings[0] if isinstance(exprs, string_types) else matchings
> else:
> # yep, it's ugly
> time.sleep(1)
> if start + timeout < time.time():
> > raise TimeoutError(time.strftime("%d %b %Y %H:%M:%S", time.gmtime()) + " [" + self.name + "] Missing: " + str([e.pattern for e in tofind]) + ":\n" + reads[:50] + ".....\nSee {} for remainder".format(filename))
> E ccmlib.node.TimeoutError: 05 Sep 2019 23:05:07 [node2] Missing: ['operations were slow', 'SELECT \\* FROM ks.test2 WHERE id = 1']:
> E DEBUG [BatchlogTasks:1] 2019-09-05 23:04:24,437 Ba.....
> E See debug.log for remainder
> ../env/src/ccm/ccmlib/node.py:536: TimeoutError {noformat}
--
This message was sent by Atlassian Jira
(v8.3.2#803003)
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@cassandra.apache.org
For additional commands, e-mail: commits-help@cassandra.apache.org