You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by da...@apache.org on 2018/10/25 22:46:09 UTC

[couchdb] 06/11: Implement configurable hash functions

This is an automated email from the ASF dual-hosted git repository.

davisp pushed a commit to branch feature/user-partitioned-databases-davisp
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 0dccfd9662316649d47546b9b9bdcecefe2973ac
Author: Paul J. Davis <pa...@gmail.com>
AuthorDate: Thu Oct 25 16:58:48 2018 -0500

    Implement configurable hash functions
    
    This provides the capability for features to specify alternative hash
    functions for placing documents in a given shard range. While the
    functionality exists with this implementation it is not yet actually
    used.
---
 src/mem3/src/mem3.erl        |  8 ++---
 src/mem3/src/mem3_hash.erl   | 76 ++++++++++++++++++++++++++++++++++++++++++++
 src/mem3/src/mem3_shards.erl |  4 +--
 src/mem3/src/mem3_util.erl   | 21 ++++++++----
 4 files changed, 96 insertions(+), 13 deletions(-)

diff --git a/src/mem3/src/mem3.erl b/src/mem3/src/mem3.erl
index de63300..ae52104 100644
--- a/src/mem3/src/mem3.erl
+++ b/src/mem3/src/mem3.erl
@@ -234,15 +234,15 @@ dbname(_) ->
 %% @doc Determine if DocId belongs in shard (identified by record or filename)
 belongs(#shard{}=Shard, DocId) when is_binary(DocId) ->
     [Begin, End] = range(Shard),
-    belongs(Begin, End, DocId);
+    belongs(Begin, End, Shard, DocId);
 belongs(<<"shards/", _/binary>> = ShardName, DocId) when is_binary(DocId) ->
     [Begin, End] = range(ShardName),
-    belongs(Begin, End, DocId);
+    belongs(Begin, End, ShardName, DocId);
 belongs(DbName, DocId) when is_binary(DbName), is_binary(DocId) ->
     true.
 
-belongs(Begin, End, DocId) ->
-    HashKey = mem3_util:hash(DocId),
+belongs(Begin, End, Shard, DocId) ->
+    HashKey = mem3_hash:calculate(Shard, DocId),
     Begin =< HashKey andalso HashKey =< End.
 
 range(#shard{range = Range}) ->
diff --git a/src/mem3/src/mem3_hash.erl b/src/mem3/src/mem3_hash.erl
new file mode 100644
index 0000000..4003aca
--- /dev/null
+++ b/src/mem3/src/mem3_hash.erl
@@ -0,0 +1,76 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+%   http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(mem3_hash).
+
+-export([
+    calculate/2,
+
+    get_hash_fun/1,
+
+    crc32/1
+]).
+
+
+-include_lib("mem3/include/mem3.hrl").
+
+
+calculate(#shard{opts = Opts} = Shard, DocId) ->
+    Props = couch_util:get_value(props, Opts, []),
+    MFA = get_hash_fun_int(Props),
+    hash(MFA, DocId);
+
+calculate(#ordered_shard{opts = Opts}, DocId) ->
+    Props = couch_util:get_value(props, Opts, []),
+    MFA = get_hash_fun_int(Props),
+    hash(MFA, DocId);
+
+calculate(DbName, DocId) when is_binary(DbName) ->
+    MFA = get_hash_fun(DbName),
+    hash(MFA, DocId);
+
+calculate({Mod, Fun, Args}, DocId) ->
+    erlang:apply(Mod, Fun, [DocId | Args]).
+
+
+get_hash_fun(#shard{opts = Opts}) ->
+    get_hash_fun_int(Opts);
+
+get_hash_fun(#ordered_shard{opts = Opts}) ->
+    get_hash_fun_int(Opts);
+
+get_hash_fun(DbName0) when is_binary(DbName0) ->
+    DbName = mem3:dbname(DbName0),
+    try
+        [Shard | _] = mem3_shards:for_db(DbName),
+        get_hash_fun_int(Shard#shard.opts)
+    catch error:database_does_not_exist ->
+        {?MODULE, crc32, []}
+    end.
+
+
+crc32(Item) when is_binary(Item) ->
+    erlang:crc32(Item);
+crc32(Item) ->
+    erlang:crc32(term_to_binary(Item)).
+
+
+
+get_hash_fun_int(Opts) when is_list(Opts) ->
+    case lists:keyfind(hash, 1, Opts) of
+        {hash, [Mod, Fun, Args]} ->
+            {Mod, Fun, Args};
+        _ ->
+            {?MODULE, hash, []}
+    end.
+
+
diff --git a/src/mem3/src/mem3_shards.erl b/src/mem3/src/mem3_shards.erl
index 183f28f..18fca23 100644
--- a/src/mem3/src/mem3_shards.erl
+++ b/src/mem3/src/mem3_shards.erl
@@ -67,7 +67,7 @@ for_docid(DbName, DocId) ->
     for_docid(DbName, DocId, []).
 
 for_docid(DbName, DocId, Options) ->
-    HashKey = mem3_util:hash(DocId),
+    HashKey = mem3_hash:hash(DbName, DocId),
     ShardHead = #shard{
         dbname = DbName,
         range = ['$1', '$2'],
@@ -397,7 +397,7 @@ load_shards_from_db(ShardDb, DbName) ->
 
 load_shards_from_disk(DbName, DocId)->
     Shards = load_shards_from_disk(DbName),
-    HashKey = mem3_util:hash(DocId),
+    HashKey = mem3_hash:hash(hd(Shards), Options),
     [S || S <- Shards, in_range(S, HashKey)].
 
 in_range(Shard, HashKey) ->
diff --git a/src/mem3/src/mem3_util.erl b/src/mem3/src/mem3_util.erl
index 254a6df..c6a8494 100644
--- a/src/mem3/src/mem3_util.erl
+++ b/src/mem3/src/mem3_util.erl
@@ -12,7 +12,7 @@
 
 -module(mem3_util).
 
--export([hash/1, name_shard/2, create_partition_map/5, build_shards/2,
+-export([name_shard/2, create_partition_map/5, build_shards/2,
     n_val/2, q_val/1, to_atom/1, to_integer/1, write_db_doc/1, delete_db_doc/1,
     shard_info/1, ensure_exists/1, open_db_doc/1]).
 -export([is_deleted/1, rotate_list/2]).
@@ -29,10 +29,6 @@
 -include_lib("mem3/include/mem3.hrl").
 -include_lib("couch/include/couch_db.hrl").
 
-hash(Item) when is_binary(Item) ->
-    erlang:crc32(Item);
-hash(Item) ->
-    erlang:crc32(term_to_binary(Item)).
 
 name_shard(Shard) ->
     name_shard(Shard, "").
@@ -162,7 +158,7 @@ build_shards_by_node(DbName, DocProps) ->
                 dbname = DbName,
                 node = to_atom(Node),
                 range = [Beg, End],
-                opts = get_engine_opt(DocProps)
+                opts = get_shard_opts(DocProps)
             }, Suffix)
         end, Ranges)
     end, ByNode).
@@ -180,7 +176,7 @@ build_shards_by_range(DbName, DocProps) ->
                 node = to_atom(Node),
                 range = [Beg, End],
                 order = Order,
-                opts = get_engine_opt(DocProps)
+                opts = get_shard_opts(DocProps)
             }, Suffix)
         end, lists:zip(Nodes, lists:seq(1, length(Nodes))))
     end, ByRange).
@@ -197,6 +193,9 @@ to_integer(N) when is_binary(N) ->
 to_integer(N) when is_list(N) ->
     list_to_integer(N).
 
+get_shard_opts(DocProps) ->
+    get_engine_opt(DocProps) ++ get_props_opt(DocProps).
+
 get_engine_opt(DocProps) ->
     case couch_util:get_value(<<"engine">>, DocProps) of
         Engine when is_binary(Engine) ->
@@ -205,6 +204,14 @@ get_engine_opt(DocProps) ->
             []
     end.
 
+get_props_opt(DocProps) ->
+    case couch_util:get_value(<<"props">>, DocProps) of
+        {Props} when is_list(Props) ->
+            [{props, Props}];
+        _ ->
+            []
+    end.
+
 n_val(undefined, NodeCount) ->
     n_val(config:get("cluster", "n", "3"), NodeCount);
 n_val(N, NodeCount) when is_list(N) ->