You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by va...@apache.org on 2023/05/16 22:56:10 UTC
[couchdb] 01/01: Optimize mem3:dbname/1 function
This is an automated email from the ASF dual-hosted git repository.
vatamane pushed a commit to branch optimize-dbname-function
in repository https://gitbox.apache.org/repos/asf/couchdb.git
commit 19c638dbc009099aeda78d2e1ee2003f87088fea
Author: Nick Vatamaniuc <va...@gmail.com>
AuthorDate: Tue May 16 18:42:30 2023 -0400
Optimize mem3:dbname/1 function
`mem3:dbname/1` with a `<<"shard/...">>` binary is called quite a few times as seen when profiling with fprof:
https://gist.github.com/nickva/38760462c1545bf55d98f4898ae1983d
In that case `mem3:dbname` is removing the timestamp suffix. However, because
it uses `filename:rootname/1` which handles cases pertaining to file system
paths and such, it ends up being a bit more expensive than necessary.
To optimize it, try avoid translating it a list and parse the extension
starting at the end of the binary. To avoid surprises, attempt to emulate exact
behavior of the previously used filename:rootname/1 calls for various edge
cases (`.`, `/.` etc). Most of those should be asserted in eunit test.
A quick speed comparison test shows a 4x speedup or so:
```
speed_compare_test() ->
DbName = <<"fabricbenchdb-1684269710714470.1684269710">>,
Ids = lists:seq(1, 1000),
{LocalUSec, ok} = timer:tc(fun() -> lists:foreach(fun(_) -> strip_suffix(DbName) end, Ids) end),
{FilenameUSec, ok} = timer:tc(fun() -> lists:foreach(fun(_) -> list_to_binary(filename:rootname(binary_to_list(DbName))) end, Ids) end),
> mem3:speed_compare_test().
{0.327,1.324}
```
---
src/mem3/src/mem3.erl | 91 +++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 91 insertions(+)
diff --git a/src/mem3/src/mem3.erl b/src/mem3/src/mem3.erl
index 7b4a8eca2..d71cdffdf 100644
--- a/src/mem3/src/mem3.erl
+++ b/src/mem3/src/mem3.erl
@@ -462,6 +462,33 @@ db_is_current(Name) when is_binary(Name) ->
% for unit tests that either test or use mem3_rep logic
couch_server:exists(Name).
+strip_suffix(<<>>) ->
+ <<>>;
+strip_suffix(DbName) when is_binary(DbName) ->
+ Pos = byte_size(DbName) - 1,
+ strip_suffix(DbName, binary:at(DbName, Pos), Pos).
+
+strip_suffix(_, $., 0) ->
+ <<>>;
+strip_suffix(DbName, $., Pos) when Pos > 0 ->
+ % The first . from the end. Return everything from start until Pos,
+ % effectively stripping out the extension, unless it is preceeded by / to
+ % preserve the previously used filename:rootname/1 behavior.
+ case binary:at(DbName, Pos - 1) of
+ $/ -> DbName;
+ _ -> binary_part(DbName, 0, Pos)
+ end;
+strip_suffix(DbName, $/, Pos) when Pos >= 0 ->
+ % Found the first / from the end, we stop here to be compatible with the
+ % previously used filename:rootname/1 behavior.
+ DbName;
+strip_suffix(DbName, _, 0) ->
+ % Reached the front of the string
+ DbName;
+strip_suffix(DbName, _, Pos) when Pos > 0 ->
+ Pos1 = Pos - 1,
+ strip_suffix(DbName, binary:at(DbName, Pos1), Pos1).
+
-ifdef(TEST).
-include_lib("eunit/include/eunit.hrl").
@@ -505,4 +532,68 @@ rotate_rand_distribution_test() ->
Cases = [rotate_rand([1, 2, 3]) || _ <- lists:seq(1, 100)],
?assertEqual(3, length(lists:usort(Cases))).
+strip_suffix_test() ->
+ % Assert that strip_suffix function matches the behavior of
+ % the previously used filename:rootname/1
+
+ ?assertEqual(<<>>, strip_suffix(<<>>)),
+ ?assertEqual(strip_suffix(<<>>), filename:rootname(<<>>)),
+
+ ?assertEqual(<<"a">>, strip_suffix(<<"a">>)),
+ ?assertEqual(strip_suffix(<<"a">>), filename:rootname(<<"a">>)),
+
+ ?assertEqual(<<"abc">>, strip_suffix(<<"abc">>)),
+ ?assertEqual(strip_suffix(<<"abc">>), filename:rootname(<<"abc">>)),
+
+ ?assertEqual(<<"/">>, strip_suffix(<<"/">>)),
+ ?assertEqual(strip_suffix(<<"/">>), filename:rootname(<<"/">>)),
+
+ ?assertEqual(<<"//">>, strip_suffix(<<"//">>)),
+ ?assertEqual(strip_suffix(<<"//">>), filename:rootname(<<"//">>)),
+
+ ?assertEqual(<<"/a">>, strip_suffix(<<"/a">>)),
+ ?assertEqual(strip_suffix(<<"/a">>), filename:rootname(<<"/a">>)),
+
+ ?assertEqual(<<"a/">>, strip_suffix(<<"a/">>)),
+ ?assertEqual(strip_suffix(<<"a/">>), filename:rootname(<<"a/">>)),
+
+ ?assertEqual(<<>>, strip_suffix(<<".">>)),
+ ?assertEqual(strip_suffix(<<".">>), filename:rootname(<<".">>)),
+
+ ?assertEqual(<<".">>, strip_suffix(<<"..">>)),
+ ?assertEqual(strip_suffix(<<"..">>), filename:rootname(<<"..">>)),
+
+ ?assertEqual(<<"/.">>, strip_suffix(<<"/.">>)),
+ ?assertEqual(strip_suffix(<<"/.">>), filename:rootname(<<"/.">>)),
+
+ ?assertEqual(<<"./">>, strip_suffix(<<"./">>)),
+ ?assertEqual(strip_suffix(<<"./">>), filename:rootname(<<"./">>)),
+
+ ?assertEqual(<<>>, strip_suffix(<<".a">>)),
+ ?assertEqual(strip_suffix(<<".a">>), filename:rootname(<<".a">>)),
+
+ ?assertEqual(<<>>, strip_suffix(<<".ab">>)),
+ ?assertEqual(strip_suffix(<<".ab">>), filename:rootname(<<".ab">>)),
+
+ ?assertEqual(<<"a">>, strip_suffix(<<"a.b">>)),
+ ?assertEqual(strip_suffix(<<"a.b">>), filename:rootname(<<"a.b">>)),
+
+ ?assertEqual(<<"a">>, strip_suffix(<<"a.bc">>)),
+ ?assertEqual(strip_suffix(<<"a.bc">>), filename:rootname(<<"a.bc">>)),
+
+ ?assertEqual(<<"a./b">>, strip_suffix(<<"a./b">>)),
+ ?assertEqual(strip_suffix(<<"a./b">>), filename:rootname(<<"a./b">>)),
+
+ ?assertEqual(<<"a/.b">>, strip_suffix(<<"a/.b">>)),
+ ?assertEqual(strip_suffix(<<"a/.b">>), filename:rootname(<<"a/.b">>)),
+
+ ?assertEqual(<<"a/.">>, strip_suffix(<<"a/..b">>)),
+ ?assertEqual(strip_suffix(<<"a/..b">>), filename:rootname(<<"a/..b">>)),
+
+ ?assertEqual(<<"/a/b">>, strip_suffix(<<"/a/b">>)),
+ ?assertEqual(strip_suffix(<<"/a/b">>), filename:rootname(<<"/a/b">>)),
+
+ ?assertEqual(<<"/a/b">>, strip_suffix(<<"/a/b.c">>)),
+ ?assertEqual(strip_suffix(<<"/a/b.c">>), filename:rootname(<<"/a/b.c">>)).
+
-endif.