You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by ch...@apache.org on 2021/08/24 22:00:41 UTC

[couchdb] 01/03: Add couch_file cache

This is an automated email from the ASF dual-hosted git repository.

chewbranca pushed a commit to branch chewbranca-ioq-experiments
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 2cdfb8f967c0860ea2f04554757925731b78106e
Author: Russell Branca <ch...@apache.org>
AuthorDate: Thu Aug 12 14:32:56 2021 -0700

    Add couch_file cache
---
 src/couch/src/couch_bt_engine.erl |  2 ++
 src/couch/src/couch_db_engine.erl |  7 +++++++
 src/couch/src/couch_file.erl      | 41 +++++++++++++++++++++++++++++++++++++--
 3 files changed, 48 insertions(+), 2 deletions(-)

diff --git a/src/couch/src/couch_bt_engine.erl b/src/couch/src/couch_bt_engine.erl
index 48e751a..60f79c3 100644
--- a/src/couch/src/couch_bt_engine.erl
+++ b/src/couch/src/couch_bt_engine.erl
@@ -846,6 +846,8 @@ copy_props(#st{header = Header} = St, Props) ->
 
 
 open_db_file(FilePath, Options) ->
+    Hash = list_to_atom(integer_to_list(mem3_hash:crc32(FilePath))),
+    erlang:put(couch_file_hash, Hash),
     case couch_file:open(FilePath, Options) of
         {ok, Fd} ->
             {ok, Fd};
diff --git a/src/couch/src/couch_db_engine.erl b/src/couch/src/couch_db_engine.erl
index 918dabc..f1ba81c 100644
--- a/src/couch/src/couch_db_engine.erl
+++ b/src/couch/src/couch_db_engine.erl
@@ -944,6 +944,13 @@ set_update_seq(#db{} = Db, UpdateSeq) ->
 
 
 open_docs(#db{} = Db, DocIds) ->
+    case erlang:get(couch_file_hash) of
+        undefined ->
+            Hash = list_to_atom(integer_to_list(mem3_hash:crc32(Db#db.filepath))),
+            erlang:put(couch_file_hash, Hash);
+        _ ->
+            ok
+    end,
     #db{engine = {Engine, EngineState}} = Db,
     Engine:open_docs(EngineState, DocIds).
 
diff --git a/src/couch/src/couch_file.erl b/src/couch/src/couch_file.erl
index b1e3555..b29c54d 100644
--- a/src/couch/src/couch_file.erl
+++ b/src/couch/src/couch_file.erl
@@ -38,7 +38,7 @@
 
 % public API
 -export([open/1, open/2, close/1, bytes/1, sync/1, truncate/2, set_db_pid/2]).
--export([pread_term/2, pread_iolist/2, pread_binary/2]).
+-export([pread_term/2, pread_term/3, pread_iolist/2, pread_binary/2]).
 -export([append_binary/2, append_binary_md5/2]).
 -export([append_raw_chunk/2, assemble_file_chunk/1, assemble_file_chunk/2]).
 -export([append_term/2, append_term/3, append_term_md5/2, append_term_md5/3]).
@@ -155,12 +155,40 @@ assemble_file_chunk(Bin, Md5) ->
 %%  or {error, Reason}.
 %%----------------------------------------------------------------------
 
-
 pread_term(Fd, Pos) ->
+    UseCache = config:get_boolean("couchdb", "use_couch_file_cache", true),
+    pread_term(Fd, Pos, UseCache).
+
+
+pread_term(Fd, Pos, true) ->
+    case erlang:get(couch_file_hash) of
+        undefined ->
+            pread_term(Fd, Pos, false);
+        _ ->
+            load_from_cache(Fd, Pos)
+    end;
+pread_term(Fd, Pos, false) ->
     {ok, Bin} = pread_binary(Fd, Pos),
     {ok, couch_compress:decompress(Bin)}.
 
 
+%% TODO: add purpose docs
+load_from_cache(Fd, Pos) ->
+    Hash = erlang:get(couch_file_hash),
+    case ets:lookup(Hash, Pos) of
+        [{Pos, {ok, Res}}] ->
+            {ok, Res};
+        [] ->
+            %% TODO: don't repeat this, but avoid circular recursion
+            %% pread_term(Fd, Pos, false),
+            {ok, Bin} = pread_binary(Fd, Pos),
+            Val = {ok, couch_compress:decompress(Bin)},
+            %% TODO: should probably be inserted directly by the gen_server
+            gen_server:cast(Fd, {cache, Pos, Val}),
+            Val
+    end.
+
+
 %%----------------------------------------------------------------------
 %% Purpose: Reads a binrary from a file that was written with append_binary
 %% Args:    Pos, the offset into the file where the term is serialized.
@@ -407,6 +435,9 @@ init({Filepath, Options, ReturnPid, Ref}) ->
     Limit = get_pread_limit(),
     IsSys = lists:member(sys_db, Options),
     update_read_timestamp(),
+    Tab = list_to_atom(integer_to_list(mem3_hash:crc32(Filepath))),
+    erlang:put(couch_file_cache, Tab),
+    ets:new(Tab, [set, protected, named_table, {read_concurrency, true}]),
     case lists:member(create, Options) of
     true ->
         filelib:ensure_dir(Filepath),
@@ -600,6 +631,12 @@ handle_call({write_header, Bin}, _From, #file{fd = Fd, eof = Pos} = File) ->
 handle_call(find_header, _From, #file{fd = Fd, eof = Pos} = File) ->
     {reply, find_header(Fd, Pos div ?SIZE_BLOCK), File}.
 
+
+handle_cast({cache, Key, Val}, Fd) ->
+    %% TODO: should we skip if value exists?
+    Tab = erlang:get(couch_file_cache),
+    ets:insert(Tab, {Key, Val}),
+    {noreply, Fd};
 handle_cast(close, Fd) ->
     {stop,normal,Fd}.