You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by da...@apache.org on 2017/06/16 21:40:29 UTC

[couchdb] 03/03: Refresh cache entries periodically

This is an automated email from the ASF dual-hosted git repository.

davisp pushed a commit to branch optimize-ddoc-cache
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit db1bef3b3a7b53ee255e061413c4ab58c554b81b
Author: Paul J. Davis <pa...@gmail.com>
AuthorDate: Fri Jun 16 16:35:15 2017 -0500

    Refresh cache entries periodically
    
    This patch restores the general runtime correctness of ddoc_cache. The
    refresher is responsible for updating cache entries either when they
    change or when they need to be removed. This clears up the previous
    possible race condition between ddoc_cache_opener and ddoc_cache_lru
    that could have left a stale entry in the cache. This same exact race
    condition existed with ets_lru but the max_age of ets_lru would cover up
    the issue if it ever happened (which would be fairly rare).
    
    This change also finally addresses the major issue in ddoc_cache where
    entries were forcibly evicted after max_age. This lead to situations
    where we would have a thundering herd of clients trying to all insert
    the same cache item just after it expired. In some extreme cases this
    would lead to a backup of the ddoc_cache message queue that was
    unrecoverable. Hopefully this change addresses that behavior.
---
 src/ddoc_cache/src/ddoc_cache.hrl           |   3 +-
 src/ddoc_cache/src/ddoc_cache_lru.erl       |  87 +++++++++++++++++----
 src/ddoc_cache/src/ddoc_cache_refresher.erl | 117 ++++++++++++++++++++++++++++
 3 files changed, 192 insertions(+), 15 deletions(-)

diff --git a/src/ddoc_cache/src/ddoc_cache.hrl b/src/ddoc_cache/src/ddoc_cache.hrl
index 8545914..6986211 100644
--- a/src/ddoc_cache/src/ddoc_cache.hrl
+++ b/src/ddoc_cache/src/ddoc_cache.hrl
@@ -22,7 +22,8 @@
 
 -record(entry, {
     key,
-    val
+    val,
+    pid
 }).
 
 -record(opener, {
diff --git a/src/ddoc_cache/src/ddoc_cache_lru.erl b/src/ddoc_cache/src/ddoc_cache_lru.erl
index ce53d1d..dbcaa28 100644
--- a/src/ddoc_cache/src/ddoc_cache_lru.erl
+++ b/src/ddoc_cache/src/ddoc_cache_lru.erl
@@ -20,6 +20,8 @@
 
     insert/2,
     accessed/1,
+    refresh/2,
+    remove/1,
     evict/2
 ]).
 
@@ -61,6 +63,14 @@ accessed(Key) ->
     gen_server:cast(?MODULE, {accessed, Key}).
 
 
+refresh(Key, Val) ->
+    gen_server:call(?MODULE, {refresh, Key, Val}).
+
+
+remove(Key) ->
+    gen_server:call(?MODULE, {remove, Key}).
+
+
 -spec evict(dbname(), [docid()]) -> ok.
 evict(DbName, DDocIds) ->
     gen_server:cast(?MODULE, {evict, DbName, DDocIds}).
@@ -97,12 +107,51 @@ handle_call({insert, Key, Val}, _From, St) ->
         time = Time
     } = St,
     NewTime = Time + 1,
-    true = ets:insert(?CACHE, #entry{key = Key, val = Val}),
+    Pid = ddoc_cache_refresher:spawn(Key),
+    true = ets:insert(?CACHE, #entry{key = Key, val = Val, pid = Pid}),
     true = ets:insert(?ATIMES, {NewTime, Key}),
     ok = khash:put(Keys, NewTime),
     store_key(Dbs, Key),
     {reply, ok, trim(St#st{time = NewTime})};
 
+handle_call({refresh, Key, Val}, _From, St) ->
+    #st{
+        keys = Keys
+    } = St,
+    case khash:lookup(Keys, Key) of
+        {value, _} ->
+            ets:update_element(?CACHE, Key, {#entry.val, Val}),
+            {reply, ok, St};
+        not_found ->
+            {reply, evicted, St}
+    end;
+
+handle_call({remove, Key}, _From, St) ->
+    #st{
+        keys = TimeKeys,
+        dbs = Dbs
+    } = St,
+    case khash:lookup(TimeKeys, Key) of
+        {value, ATime} ->
+            remove_entry(St, Key, ATime),
+            DbName = ddoc_cache_entry:dbname(Key),
+            DDocId = ddoc_cache_entry:ddocid(Key),
+            {value, DDocIds} = khash:lookup(Dbs, DbName),
+            {value, Keys} = khash:lookup(DDocIds, DDocId),
+            ok = khash:del(Keys, Key),
+            case khash:size(Keys) of
+                0 -> khash:del(DDocIds, DDocId);
+                _ -> ok
+            end,
+            case khash:size(DDocIds) of
+                0 -> khash:del(Dbs, DDocId);
+                _ -> ok
+            end;
+        not_found ->
+            ok
+    end,
+    {reply, ok, St};
+
 handle_call(Msg, _From, St) ->
     {stop, {invalid_call, Msg}, {invalid_call, Msg}, St}.
 
@@ -115,6 +164,8 @@ handle_cast({accessed, Key}, St) ->
     NewTime = Time + 1,
     case khash:lookup(Keys, Key) of
         {value, OldTime} ->
+            [#entry{pid = Pid}] = ets:lookup(?CACHE, Key),
+            true = is_process_alive(Pid),
             true = ets:delete(?ATIMES, OldTime),
             true = ets:insert(?ATIMES, {NewTime, Key}),
             ok = khash:put(Keys, NewTime);
@@ -135,15 +186,15 @@ handle_cast({evict, _, _} = Msg, St) ->
 
 handle_cast({do_evict, DbName}, St) ->
     #st{
-        keys = KeyTimes,
         dbs = Dbs
     } = St,
     case khash:lookup(Dbs, DbName) of
         {value, DDocIds} ->
             khash:fold(DDocIds, fun(_, Keys, _) ->
                 khash:fold(Keys, fun(Key, _, _) ->
-                    {value, Time} = khash:lookup(KeyTimes, Key),
-                    remove(St, Time)
+                    [#entry{pid = Pid}] = ets:lookup(?CACHE, Key),
+                    ddoc_cache_refresher:stop(Pid),
+                    remove_entry(St, Key)
                 end, nil)
             end, nil),
             khash:del(Dbs, DbName);
@@ -154,7 +205,6 @@ handle_cast({do_evict, DbName}, St) ->
 
 handle_cast({do_evict, DbName, DDocIds}, St) ->
     #st{
-        keys = KeyTimes,
         dbs = Dbs
     } = St,
     case khash:lookup(Dbs, DbName) of
@@ -163,8 +213,9 @@ handle_cast({do_evict, DbName, DDocIds}, St) ->
                 case khash:lookup(DDocIds, DDocId) of
                     {value, Keys} ->
                         khash:fold(Keys, fun(Key, _, _) ->
-                            {value, Time} = khash:lookup(KeyTimes, Key),
-                            remove(St, Time)
+                            [#entry{pid = Pid}] = ets:lookup(?CACHE, Key),
+                            ddoc_cache_refresher:stop(Pid),
+                            remove_entry(St, Key)
                         end, nil);
                     not_found ->
                         ok
@@ -239,21 +290,29 @@ trim(St) ->
         true ->
             case ets:first(?ATIMES) of
                 '$end_of_table' ->
-                    St;
+                    ok;
                 ATime ->
-                    trim(remove(St, ATime))
+                    [{ATime, Key}] = ets:lookup(?ATIMES, ATime),
+                    remove_entry(St, Key, ATime),
+                    trim(St)
             end;
         false ->
-            St
+            ok
     end.
 
 
-remove(St, ATime) ->
+remove_entry(St, Key) ->
+    #st{
+        keys = Keys
+    } = St,
+    {value, ATime} = khash:lookup(Keys, Key),
+    remove_entry(St, Key, ATime).
+
+
+remove_entry(St, Key, ATime) ->
     #st{
         keys = Keys
     } = St,
-    {value, Key} = khash:lookup(Keys, ATime),
     true = ets:delete(?CACHE, Key),
     true = ets:delete(?ATIMES, ATime),
-    ok = khash:del(Keys, Key),
-    St.
+    ok = khash:del(Keys, Key).
diff --git a/src/ddoc_cache/src/ddoc_cache_refresher.erl b/src/ddoc_cache/src/ddoc_cache_refresher.erl
new file mode 100644
index 0000000..8e8a6ef
--- /dev/null
+++ b/src/ddoc_cache/src/ddoc_cache_refresher.erl
@@ -0,0 +1,117 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+%   http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(ddoc_cache_refresher).
+-behaviour(gen_server).
+-vsn(1).
+
+
+-export([
+    spawn/1,
+    stop/1
+]).
+
+-export([
+    init/1,
+    terminate/2,
+    handle_call/3,
+    handle_cast/2,
+    handle_info/2,
+    code_change/3
+]).
+
+
+-include("ddoc_cache.hrl").
+
+
+-record(st, {
+    key
+}).
+
+
+-define(REFRESH_TIMEOUT, 67000).
+
+
+spawn(Key) ->
+    proc_lib:spawn(?MODULE, init, [{self(), Key}]).
+
+
+stop(Pid) ->
+    gen_server:cast(Pid, stop).
+
+
+init({Parent, Key}) ->
+    process_flag(trap_exit, true),
+    erlang:monitor(process, Parent),
+    gen_server:enter_loop(?MODULE, [], #st{key = Key}, ?REFRESH_TIMEOUT).
+
+
+terminate(_Reason, _St) ->
+    ok.
+
+
+handle_call(Msg, _From, St) ->
+    {stop, {invalid_call, Msg}, {invalid_call, Msg}, St}.
+
+
+handle_cast(stop, St) ->
+    {stop, normal, St};
+
+handle_cast(Msg, St) ->
+    {stop, {invalid_cast, Msg}, St}.
+
+
+handle_info(timeout, St) ->
+    ddoc_cache_entry:spawn_link(St#st.key),
+    {noreply, St};
+
+handle_info({'EXIT', _, {open_ok, Key, Resp}}, #st{key = Key} = St) ->
+    Self = self(),
+    case Resp of
+        {ok, Val} ->
+            case ets:lookup(?CACHE, Key) of
+                [] ->
+                    % We were evicted
+                    {stop, normal, St};
+                [#entry{key = Key, val = Val, pid = Self}] ->
+                    % Value hasn't changed, do nothing
+                    {noreply, St};
+                [#entry{key = Key, pid = Self}] ->
+                    % Value changed, update cache
+                    case ddoc_cache_lru:refresh(Key, Val) of
+                        ok ->
+                            {noreply, St};
+                        evicted ->
+                            {stop, normal, St}
+                    end
+            end;
+        _Else ->
+            ddoc_cache_lru:remove(Key),
+            {stop, normal, St}
+    end;
+
+handle_info({'EXIT', _, _}, #st{key = Key} = St) ->
+    % Somethign went wrong trying to refresh the cache
+    % so bail in the interest of safety.
+    ddoc_cache_lru:remove(Key),
+    {stop, normal, St};
+
+handle_info({'DOWN', _, _, _, _}, St) ->
+    % ddoc_cache_lru died, so we will as well
+    {stop, normal, St};
+
+handle_info(Msg, St) ->
+    {stop, {invalid_info, Msg}, St}.
+
+
+code_change(_OldVsn, St, _Extra) ->
+    {ok, St}.

-- 
To stop receiving notification emails like this one, please contact
"commits@couchdb.apache.org" <co...@couchdb.apache.org>.