You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by va...@apache.org on 2023/05/16 22:56:10 UTC

[couchdb] 01/01: Optimize mem3:dbname/1 function

This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a commit to branch optimize-dbname-function
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 19c638dbc009099aeda78d2e1ee2003f87088fea
Author: Nick Vatamaniuc <va...@gmail.com>
AuthorDate: Tue May 16 18:42:30 2023 -0400

    Optimize mem3:dbname/1 function
    
    `mem3:dbname/1` with a `<<"shard/...">>` binary is called quite a few times as seen when profiling with fprof:
    https://gist.github.com/nickva/38760462c1545bf55d98f4898ae1983d
    
    In that case `mem3:dbname` is removing the timestamp suffix. However, because
    it uses `filename:rootname/1` which handles cases pertaining to file system
    paths and such, it ends up being a bit more expensive than necessary.
    
    To optimize it, try avoid translating it a list and parse the extension
    starting at the end of the binary. To avoid surprises, attempt to emulate exact
    behavior of the previously used filename:rootname/1 calls for various edge
    cases (`.`, `/.` etc). Most of those should be asserted in eunit test.
    
    A quick speed comparison test shows a 4x speedup or so:
    ```
    speed_compare_test() ->
        DbName = <<"fabricbenchdb-1684269710714470.1684269710">>,
        Ids = lists:seq(1, 1000),
        {LocalUSec, ok} = timer:tc(fun() -> lists:foreach(fun(_) -> strip_suffix(DbName) end, Ids) end),
        {FilenameUSec, ok} = timer:tc(fun() -> lists:foreach(fun(_) -> list_to_binary(filename:rootname(binary_to_list(DbName))) end, Ids) end),
    
    > mem3:speed_compare_test().
    {0.327,1.324}
    ```
---
 src/mem3/src/mem3.erl | 91 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 91 insertions(+)

diff --git a/src/mem3/src/mem3.erl b/src/mem3/src/mem3.erl
index 7b4a8eca2..d71cdffdf 100644
--- a/src/mem3/src/mem3.erl
+++ b/src/mem3/src/mem3.erl
@@ -462,6 +462,33 @@ db_is_current(Name) when is_binary(Name) ->
     % for unit tests that either test or use mem3_rep logic
     couch_server:exists(Name).
 
+strip_suffix(<<>>) ->
+    <<>>;
+strip_suffix(DbName) when is_binary(DbName) ->
+    Pos = byte_size(DbName) - 1,
+    strip_suffix(DbName, binary:at(DbName, Pos), Pos).
+
+strip_suffix(_, $., 0) ->
+    <<>>;
+strip_suffix(DbName, $., Pos) when Pos > 0 ->
+    % The first . from the end. Return everything from start until Pos,
+    % effectively stripping out the extension, unless it is preceeded by / to
+    % preserve the previously used filename:rootname/1 behavior.
+    case binary:at(DbName, Pos - 1) of
+        $/ -> DbName;
+        _ -> binary_part(DbName, 0, Pos)
+    end;
+strip_suffix(DbName, $/, Pos) when Pos >= 0 ->
+    % Found the first / from the end, we stop here to be compatible with the
+    % previously used filename:rootname/1 behavior.
+    DbName;
+strip_suffix(DbName, _, 0) ->
+    % Reached the front of the string
+    DbName;
+strip_suffix(DbName, _, Pos) when Pos > 0 ->
+    Pos1 = Pos - 1,
+    strip_suffix(DbName, binary:at(DbName, Pos1), Pos1).
+
 -ifdef(TEST).
 
 -include_lib("eunit/include/eunit.hrl").
@@ -505,4 +532,68 @@ rotate_rand_distribution_test() ->
     Cases = [rotate_rand([1, 2, 3]) || _ <- lists:seq(1, 100)],
     ?assertEqual(3, length(lists:usort(Cases))).
 
+strip_suffix_test() ->
+    % Assert that strip_suffix function matches the behavior of
+    % the previously used filename:rootname/1
+
+    ?assertEqual(<<>>, strip_suffix(<<>>)),
+    ?assertEqual(strip_suffix(<<>>), filename:rootname(<<>>)),
+
+    ?assertEqual(<<"a">>, strip_suffix(<<"a">>)),
+    ?assertEqual(strip_suffix(<<"a">>), filename:rootname(<<"a">>)),
+
+    ?assertEqual(<<"abc">>, strip_suffix(<<"abc">>)),
+    ?assertEqual(strip_suffix(<<"abc">>), filename:rootname(<<"abc">>)),
+
+    ?assertEqual(<<"/">>, strip_suffix(<<"/">>)),
+    ?assertEqual(strip_suffix(<<"/">>), filename:rootname(<<"/">>)),
+
+    ?assertEqual(<<"//">>, strip_suffix(<<"//">>)),
+    ?assertEqual(strip_suffix(<<"//">>), filename:rootname(<<"//">>)),
+
+    ?assertEqual(<<"/a">>, strip_suffix(<<"/a">>)),
+    ?assertEqual(strip_suffix(<<"/a">>), filename:rootname(<<"/a">>)),
+
+    ?assertEqual(<<"a/">>, strip_suffix(<<"a/">>)),
+    ?assertEqual(strip_suffix(<<"a/">>), filename:rootname(<<"a/">>)),
+
+    ?assertEqual(<<>>, strip_suffix(<<".">>)),
+    ?assertEqual(strip_suffix(<<".">>), filename:rootname(<<".">>)),
+
+    ?assertEqual(<<".">>, strip_suffix(<<"..">>)),
+    ?assertEqual(strip_suffix(<<"..">>), filename:rootname(<<"..">>)),
+
+    ?assertEqual(<<"/.">>, strip_suffix(<<"/.">>)),
+    ?assertEqual(strip_suffix(<<"/.">>), filename:rootname(<<"/.">>)),
+
+    ?assertEqual(<<"./">>, strip_suffix(<<"./">>)),
+    ?assertEqual(strip_suffix(<<"./">>), filename:rootname(<<"./">>)),
+
+    ?assertEqual(<<>>, strip_suffix(<<".a">>)),
+    ?assertEqual(strip_suffix(<<".a">>), filename:rootname(<<".a">>)),
+
+    ?assertEqual(<<>>, strip_suffix(<<".ab">>)),
+    ?assertEqual(strip_suffix(<<".ab">>), filename:rootname(<<".ab">>)),
+
+    ?assertEqual(<<"a">>, strip_suffix(<<"a.b">>)),
+    ?assertEqual(strip_suffix(<<"a.b">>), filename:rootname(<<"a.b">>)),
+
+    ?assertEqual(<<"a">>, strip_suffix(<<"a.bc">>)),
+    ?assertEqual(strip_suffix(<<"a.bc">>), filename:rootname(<<"a.bc">>)),
+
+    ?assertEqual(<<"a./b">>, strip_suffix(<<"a./b">>)),
+    ?assertEqual(strip_suffix(<<"a./b">>), filename:rootname(<<"a./b">>)),
+
+    ?assertEqual(<<"a/.b">>, strip_suffix(<<"a/.b">>)),
+    ?assertEqual(strip_suffix(<<"a/.b">>), filename:rootname(<<"a/.b">>)),
+
+    ?assertEqual(<<"a/.">>, strip_suffix(<<"a/..b">>)),
+    ?assertEqual(strip_suffix(<<"a/..b">>), filename:rootname(<<"a/..b">>)),
+
+    ?assertEqual(<<"/a/b">>, strip_suffix(<<"/a/b">>)),
+    ?assertEqual(strip_suffix(<<"/a/b">>), filename:rootname(<<"/a/b">>)),
+
+    ?assertEqual(<<"/a/b">>, strip_suffix(<<"/a/b.c">>)),
+    ?assertEqual(strip_suffix(<<"/a/b.c">>), filename:rootname(<<"/a/b.c">>)).
+
 -endif.