You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by va...@apache.org on 2023/05/16 22:56:09 UTC

[couchdb] branch optimize-dbname-function created (now 19c638dbc)

This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a change to branch optimize-dbname-function
in repository https://gitbox.apache.org/repos/asf/couchdb.git


      at 19c638dbc Optimize mem3:dbname/1 function

This branch includes the following new commits:

     new 19c638dbc Optimize mem3:dbname/1 function

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



[couchdb] 01/01: Optimize mem3:dbname/1 function

Posted by va...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a commit to branch optimize-dbname-function
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 19c638dbc009099aeda78d2e1ee2003f87088fea
Author: Nick Vatamaniuc <va...@gmail.com>
AuthorDate: Tue May 16 18:42:30 2023 -0400

    Optimize mem3:dbname/1 function
    
    `mem3:dbname/1` with a `<<"shard/...">>` binary is called quite a few times as seen when profiling with fprof:
    https://gist.github.com/nickva/38760462c1545bf55d98f4898ae1983d
    
    In that case `mem3:dbname` is removing the timestamp suffix. However, because
    it uses `filename:rootname/1` which handles cases pertaining to file system
    paths and such, it ends up being a bit more expensive than necessary.
    
    To optimize it, try avoid translating it a list and parse the extension
    starting at the end of the binary. To avoid surprises, attempt to emulate exact
    behavior of the previously used filename:rootname/1 calls for various edge
    cases (`.`, `/.` etc). Most of those should be asserted in eunit test.
    
    A quick speed comparison test shows a 4x speedup or so:
    ```
    speed_compare_test() ->
        DbName = <<"fabricbenchdb-1684269710714470.1684269710">>,
        Ids = lists:seq(1, 1000),
        {LocalUSec, ok} = timer:tc(fun() -> lists:foreach(fun(_) -> strip_suffix(DbName) end, Ids) end),
        {FilenameUSec, ok} = timer:tc(fun() -> lists:foreach(fun(_) -> list_to_binary(filename:rootname(binary_to_list(DbName))) end, Ids) end),
    
    > mem3:speed_compare_test().
    {0.327,1.324}
    ```
---
 src/mem3/src/mem3.erl | 91 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 91 insertions(+)

diff --git a/src/mem3/src/mem3.erl b/src/mem3/src/mem3.erl
index 7b4a8eca2..d71cdffdf 100644
--- a/src/mem3/src/mem3.erl
+++ b/src/mem3/src/mem3.erl
@@ -462,6 +462,33 @@ db_is_current(Name) when is_binary(Name) ->
     % for unit tests that either test or use mem3_rep logic
     couch_server:exists(Name).
 
+strip_suffix(<<>>) ->
+    <<>>;
+strip_suffix(DbName) when is_binary(DbName) ->
+    Pos = byte_size(DbName) - 1,
+    strip_suffix(DbName, binary:at(DbName, Pos), Pos).
+
+strip_suffix(_, $., 0) ->
+    <<>>;
+strip_suffix(DbName, $., Pos) when Pos > 0 ->
+    % The first . from the end. Return everything from start until Pos,
+    % effectively stripping out the extension, unless it is preceeded by / to
+    % preserve the previously used filename:rootname/1 behavior.
+    case binary:at(DbName, Pos - 1) of
+        $/ -> DbName;
+        _ -> binary_part(DbName, 0, Pos)
+    end;
+strip_suffix(DbName, $/, Pos) when Pos >= 0 ->
+    % Found the first / from the end, we stop here to be compatible with the
+    % previously used filename:rootname/1 behavior.
+    DbName;
+strip_suffix(DbName, _, 0) ->
+    % Reached the front of the string
+    DbName;
+strip_suffix(DbName, _, Pos) when Pos > 0 ->
+    Pos1 = Pos - 1,
+    strip_suffix(DbName, binary:at(DbName, Pos1), Pos1).
+
 -ifdef(TEST).
 
 -include_lib("eunit/include/eunit.hrl").
@@ -505,4 +532,68 @@ rotate_rand_distribution_test() ->
     Cases = [rotate_rand([1, 2, 3]) || _ <- lists:seq(1, 100)],
     ?assertEqual(3, length(lists:usort(Cases))).
 
+strip_suffix_test() ->
+    % Assert that strip_suffix function matches the behavior of
+    % the previously used filename:rootname/1
+
+    ?assertEqual(<<>>, strip_suffix(<<>>)),
+    ?assertEqual(strip_suffix(<<>>), filename:rootname(<<>>)),
+
+    ?assertEqual(<<"a">>, strip_suffix(<<"a">>)),
+    ?assertEqual(strip_suffix(<<"a">>), filename:rootname(<<"a">>)),
+
+    ?assertEqual(<<"abc">>, strip_suffix(<<"abc">>)),
+    ?assertEqual(strip_suffix(<<"abc">>), filename:rootname(<<"abc">>)),
+
+    ?assertEqual(<<"/">>, strip_suffix(<<"/">>)),
+    ?assertEqual(strip_suffix(<<"/">>), filename:rootname(<<"/">>)),
+
+    ?assertEqual(<<"//">>, strip_suffix(<<"//">>)),
+    ?assertEqual(strip_suffix(<<"//">>), filename:rootname(<<"//">>)),
+
+    ?assertEqual(<<"/a">>, strip_suffix(<<"/a">>)),
+    ?assertEqual(strip_suffix(<<"/a">>), filename:rootname(<<"/a">>)),
+
+    ?assertEqual(<<"a/">>, strip_suffix(<<"a/">>)),
+    ?assertEqual(strip_suffix(<<"a/">>), filename:rootname(<<"a/">>)),
+
+    ?assertEqual(<<>>, strip_suffix(<<".">>)),
+    ?assertEqual(strip_suffix(<<".">>), filename:rootname(<<".">>)),
+
+    ?assertEqual(<<".">>, strip_suffix(<<"..">>)),
+    ?assertEqual(strip_suffix(<<"..">>), filename:rootname(<<"..">>)),
+
+    ?assertEqual(<<"/.">>, strip_suffix(<<"/.">>)),
+    ?assertEqual(strip_suffix(<<"/.">>), filename:rootname(<<"/.">>)),
+
+    ?assertEqual(<<"./">>, strip_suffix(<<"./">>)),
+    ?assertEqual(strip_suffix(<<"./">>), filename:rootname(<<"./">>)),
+
+    ?assertEqual(<<>>, strip_suffix(<<".a">>)),
+    ?assertEqual(strip_suffix(<<".a">>), filename:rootname(<<".a">>)),
+
+    ?assertEqual(<<>>, strip_suffix(<<".ab">>)),
+    ?assertEqual(strip_suffix(<<".ab">>), filename:rootname(<<".ab">>)),
+
+    ?assertEqual(<<"a">>, strip_suffix(<<"a.b">>)),
+    ?assertEqual(strip_suffix(<<"a.b">>), filename:rootname(<<"a.b">>)),
+
+    ?assertEqual(<<"a">>, strip_suffix(<<"a.bc">>)),
+    ?assertEqual(strip_suffix(<<"a.bc">>), filename:rootname(<<"a.bc">>)),
+
+    ?assertEqual(<<"a./b">>, strip_suffix(<<"a./b">>)),
+    ?assertEqual(strip_suffix(<<"a./b">>), filename:rootname(<<"a./b">>)),
+
+    ?assertEqual(<<"a/.b">>, strip_suffix(<<"a/.b">>)),
+    ?assertEqual(strip_suffix(<<"a/.b">>), filename:rootname(<<"a/.b">>)),
+
+    ?assertEqual(<<"a/.">>, strip_suffix(<<"a/..b">>)),
+    ?assertEqual(strip_suffix(<<"a/..b">>), filename:rootname(<<"a/..b">>)),
+
+    ?assertEqual(<<"/a/b">>, strip_suffix(<<"/a/b">>)),
+    ?assertEqual(strip_suffix(<<"/a/b">>), filename:rootname(<<"/a/b">>)),
+
+    ?assertEqual(<<"/a/b">>, strip_suffix(<<"/a/b.c">>)),
+    ?assertEqual(strip_suffix(<<"/a/b.c">>), filename:rootname(<<"/a/b.c">>)).
+
 -endif.