You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by ja...@apache.org on 2020/04/26 22:22:53 UTC
[couchdb] 01/01: Improve robustness of couch expiring cache test
This is an automated email from the ASF dual-hosted git repository.
jaydoane pushed a commit to branch improve-expiring-cache-test-robustness
in repository https://gitbox.apache.org/repos/asf/couchdb.git
commit 042e401cc92fd74c3e7e60521ff392d136fa782f
Author: Jay Doane <ja...@apache.org>
AuthorDate: Sun Apr 26 15:22:04 2020 -0700
Improve robustness of couch expiring cache test
In its current incarnation, the so-called "simple lifecycle" test is
prone to numerous failures in the CI system [1], doubtless because it's
riddled with race conditions. The original author makes many assumptions
about how quickly an (actual, unmocked) FDB instance will respond to a
request.
The primary goal is to stop failing CI builds, while other
considerations include: keeping the run time of the test as low as
possible, keeping the code coverage high, and documenting the known
races.
Specifically:
- Increase the `stale` and `expired` times by a factor of 5 to decrease
sensitivity to poor FDB performance.
- Change default timer from `erlang:system_time/1` to `os:timestamp` on
the assumption that the latter is less prone to warping [2].
- Decrease the period of the cache server reaper by half to increase
accuracy of eviction time.
- Inline and modify the `test_util:wait` code to make the timer
explicit, and emphasize that `timer:delay/1` only works with millisecond
resolution.
- Don't fail the test if it can't get a fresh lookup immediately after
insertion, but let it continue on to the next race, at least to the
point of expiration and deletion, which continue to be asserted.
- Factor `Timeout` and `Interval` to allow declarations near the other
hard-coded parameters.
- Move cache server `Opts` into `setup/0` and eliminate `start_link/0`.
- Double the overall test timeout to 20 seconds.
This has soaked for hundreds of runs on a 5 year old laptop, but the
real test is the CI system.
Should this test continue to fail CI builds, additional improvements
could include mocking the timer and/or FDB layer to eliminate the
variability of an integrated system.
[1] https://ci-couchdb.apache.org/blue/organizations/jenkins/jenkins-cm1%2FPullRequests/detail/PR-2813/10/pipeline
[2] http://erlang.org/doc/apps/erts/time_correction.html#terminology
---
.../src/couch_expiring_cache_server.erl | 10 +-
.../test/couch_expiring_cache_tests.erl | 101 ++++++++++++++-------
2 files changed, 77 insertions(+), 34 deletions(-)
diff --git a/src/couch_expiring_cache/src/couch_expiring_cache_server.erl b/src/couch_expiring_cache/src/couch_expiring_cache_server.erl
index eb74e6e..74c432e 100644
--- a/src/couch_expiring_cache/src/couch_expiring_cache_server.erl
+++ b/src/couch_expiring_cache/src/couch_expiring_cache_server.erl
@@ -17,6 +17,7 @@
-callback start_link() -> {ok, pid()} | ignore | {error, term()}.
-export([
+ now_ts/0,
start_link/2
]).
@@ -80,10 +81,10 @@ handle_info(remove_expired, St) ->
largest_elapsed := LargestElapsed
} = St,
- NowTS = erlang:system_time(?TIME_UNIT),
+ NowTS = now_ts(),
OldestTS = max(OldestTS0,
couch_expiring_cache_fdb:clear_range_to(Name, NowTS, BatchSize)),
- Elapsed = erlang:system_time(?TIME_UNIT) - NowTS,
+ Elapsed = now_ts() - NowTS,
{noreply, St#{
timer_ref := schedule_remove_expired(Period, MaxJitter),
@@ -108,6 +109,11 @@ code_change(_OldVsn, St, _Extra) ->
{ok, St}.
+now_ts() ->
+ {Mega, Sec, Micro} = os:timestamp(),
+ ((Mega * 1000000) + Sec) * 1000 + Micro div 1000.
+
+
%% Private
diff --git a/src/couch_expiring_cache/test/couch_expiring_cache_tests.erl b/src/couch_expiring_cache/test/couch_expiring_cache_tests.erl
index 2e06fcc..0780b88 100644
--- a/src/couch_expiring_cache/test/couch_expiring_cache_tests.erl
+++ b/src/couch_expiring_cache/test/couch_expiring_cache_tests.erl
@@ -18,16 +18,9 @@
-include_lib("couch_expiring_cache/include/couch_expiring_cache.hrl").
--define(CACHE_NAME, <<"test">>).
+-define(CACHE_NAME, atom_to_binary(?MODULE, utf8)).
-
-start_link() ->
- Opts = #{
- cache_name => ?CACHE_NAME,
- period => 20,
- max_jitter => 0
- },
- couch_expiring_cache_server:start_link(?MODULE, Opts).
+-define(FOREVER, 576460752303423488). % max int 64 bit
couch_expiring_cache_basic_test_() ->
@@ -56,7 +49,11 @@ teardown_couch(Ctx) ->
setup() ->
- {ok, Pid} = start_link(),
+ Opts = #{
+ cache_name => ?CACHE_NAME,
+ period => 10,
+ max_jitter => 0},
+ {ok, Pid} = couch_expiring_cache_server:start_link(?MODULE, Opts),
true = unlink(Pid),
#{pid => Pid}.
@@ -66,10 +63,18 @@ teardown(#{pid := Pid}) ->
simple_lifecycle(_) ->
- {timeout, 10, ?_test(begin
- Now = erlang:system_time(?TIME_UNIT),
- StaleTS = Now + 100,
- ExpiresTS = Now + 200,
+ % The entire test is racing against FDB being faster than timeout seconds
+ {timeout, 20, ?_test(begin
+ Start = couch_expiring_cache_server:now_ts(),
+ % Race Alert!
+ % We're betting on FDB returning a lookup faster than these:
+ Stale = 500,
+ Expires = 1000,
+ Timeout = 5000,
+ Interval = 5,
+
+ StaleTS = Start + Stale,
+ ExpiresTS = Start + Expires,
Name = ?CACHE_NAME,
Key = <<"key">>,
Val = <<"val">>,
@@ -77,34 +82,66 @@ simple_lifecycle(_) ->
?assertEqual(ok, couch_expiring_cache_fdb:clear_all(Name)),
?assertEqual(not_found, couch_expiring_cache:lookup(Name, Key)),
?assertEqual([], entries(Name)),
- ?assertEqual(ok,
- couch_expiring_cache:insert(Name, Key, Val, StaleTS, ExpiresTS)),
- ?assertEqual({fresh, Val}, couch_expiring_cache:lookup(Name, Key)),
- ok = wait_lookup(Name, Key, {stale, Val}),
+ ?assertEqual(ok, couch_expiring_cache:insert(Name, Key, Val,
+ StaleTS, ExpiresTS)),
+ ok = attempt_fresh_and_stale_lookups(Name, Key, Timeout, Interval),
% Refresh the existing key with updated timestamps
- ?assertEqual(ok,
- couch_expiring_cache:insert(Name, Key, Val,
- StaleTS + 100, ExpiresTS + 100)),
- ?assertEqual({fresh, Val}, couch_expiring_cache:lookup(Name, Key)),
+ Refresh = couch_expiring_cache_server:now_ts(),
+ ?assertEqual(ok, couch_expiring_cache:insert(Name, Key, Val,
+ Refresh + Stale, Refresh + Expires)),
+ ok = attempt_fresh_and_stale_lookups(Name, Key, Timeout, Interval),
?assertEqual(1, length(entries(Name))),
- ok = wait_lookup(Name, Key, {stale, Val}),
- ok = wait_lookup(Name, Key, expired),
- ok = wait_lookup(Name, Key, not_found),
- ?assertEqual([], entries(Name)),
- ?assertEqual(not_found, couch_expiring_cache:lookup(Name, Key))
+ % These last 2 are also races, betting on FDB to be reasonably
+ % fast on the home stretch
+ ok = wait_lookup(Name, Key, expired, Timeout, Interval),
+ ok = wait_lookup(Name, Key, not_found, Timeout, Interval),
+ ?assertEqual([], entries(Name))
end)}.
+% In this race we're betting on FDB to take less than `Stale` and then
+% `Expired` milliseconds to respond
+attempt_fresh_and_stale_lookups(Name, Key, Timeout, Interval) ->
+ case couch_expiring_cache:lookup(Name, Key) of
+ {fresh, Val} ->
+ % We won that race, let's bet on another!
+ ok = wait_lookup(Name, Key, {stale, Val}, Timeout, Interval);
+ _ ->
+ % Unlucky! But don't fail the test just yet...
+ ok
+ end.
+
+
entries(Name) ->
- FarFuture = erlang:system_time(?TIME_UNIT) * 2,
- couch_expiring_cache_fdb:get_range_to(Name, FarFuture, _Limit=100).
+ couch_expiring_cache_fdb:get_range_to(Name, ?FOREVER, _Limit=100).
-wait_lookup(Name, Key, Expect) ->
- test_util:wait(fun() ->
+% This lookup races against Timeout
+wait_lookup(Name, Key, Expect, Timeout, Interval) ->
+ wait(fun() ->
case couch_expiring_cache:lookup(Name, Key) of
Expect -> ok;
_ -> wait
end
- end, _Timeout = 1000, _PollingInterval = 10).
+ end, Timeout, Interval).
+
+
+wait(Fun, Timeout, Delay) ->
+ Now = couch_expiring_cache_server:now_ts(),
+ wait(Fun, Timeout, Delay, Now, Now).
+
+
+wait(_Fun, Timeout, _Delay, Started, Prev) when Prev - Started > Timeout ->
+ timeout;
+
+wait(Fun, Timeout, Delay, Started, _Prev) ->
+ case Fun() of
+ wait ->
+ % http://erlang.org/doc/man/timer.html#sleep-1
+ ok = timer:sleep(Delay), % always millisecond
+ wait(Fun, Timeout, Delay, Started,
+ couch_expiring_cache_server:now_ts());
+ Else ->
+ Else
+ end.