You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by da...@apache.org on 2018/10/25 22:46:09 UTC
[couchdb] 06/11: Implement configurable hash functions
This is an automated email from the ASF dual-hosted git repository.
davisp pushed a commit to branch feature/user-partitioned-databases-davisp
in repository https://gitbox.apache.org/repos/asf/couchdb.git
commit 0dccfd9662316649d47546b9b9bdcecefe2973ac
Author: Paul J. Davis <pa...@gmail.com>
AuthorDate: Thu Oct 25 16:58:48 2018 -0500
Implement configurable hash functions
This provides the capability for features to specify alternative hash
functions for placing documents in a given shard range. While the
functionality exists with this implementation it is not yet actually
used.
---
src/mem3/src/mem3.erl | 8 ++---
src/mem3/src/mem3_hash.erl | 76 ++++++++++++++++++++++++++++++++++++++++++++
src/mem3/src/mem3_shards.erl | 4 +--
src/mem3/src/mem3_util.erl | 21 ++++++++----
4 files changed, 96 insertions(+), 13 deletions(-)
diff --git a/src/mem3/src/mem3.erl b/src/mem3/src/mem3.erl
index de63300..ae52104 100644
--- a/src/mem3/src/mem3.erl
+++ b/src/mem3/src/mem3.erl
@@ -234,15 +234,15 @@ dbname(_) ->
%% @doc Determine if DocId belongs in shard (identified by record or filename)
belongs(#shard{}=Shard, DocId) when is_binary(DocId) ->
[Begin, End] = range(Shard),
- belongs(Begin, End, DocId);
+ belongs(Begin, End, Shard, DocId);
belongs(<<"shards/", _/binary>> = ShardName, DocId) when is_binary(DocId) ->
[Begin, End] = range(ShardName),
- belongs(Begin, End, DocId);
+ belongs(Begin, End, ShardName, DocId);
belongs(DbName, DocId) when is_binary(DbName), is_binary(DocId) ->
true.
-belongs(Begin, End, DocId) ->
- HashKey = mem3_util:hash(DocId),
+belongs(Begin, End, Shard, DocId) ->
+ HashKey = mem3_hash:calculate(Shard, DocId),
Begin =< HashKey andalso HashKey =< End.
range(#shard{range = Range}) ->
diff --git a/src/mem3/src/mem3_hash.erl b/src/mem3/src/mem3_hash.erl
new file mode 100644
index 0000000..4003aca
--- /dev/null
+++ b/src/mem3/src/mem3_hash.erl
@@ -0,0 +1,76 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+% http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(mem3_hash).
+
+-export([
+ calculate/2,
+
+ get_hash_fun/1,
+
+ crc32/1
+]).
+
+
+-include_lib("mem3/include/mem3.hrl").
+
+
+calculate(#shard{opts = Opts} = Shard, DocId) ->
+ Props = couch_util:get_value(props, Opts, []),
+ MFA = get_hash_fun_int(Props),
+ hash(MFA, DocId);
+
+calculate(#ordered_shard{opts = Opts}, DocId) ->
+ Props = couch_util:get_value(props, Opts, []),
+ MFA = get_hash_fun_int(Props),
+ hash(MFA, DocId);
+
+calculate(DbName, DocId) when is_binary(DbName) ->
+ MFA = get_hash_fun(DbName),
+ hash(MFA, DocId);
+
+calculate({Mod, Fun, Args}, DocId) ->
+ erlang:apply(Mod, Fun, [DocId | Args]).
+
+
+get_hash_fun(#shard{opts = Opts}) ->
+ get_hash_fun_int(Opts);
+
+get_hash_fun(#ordered_shard{opts = Opts}) ->
+ get_hash_fun_int(Opts);
+
+get_hash_fun(DbName0) when is_binary(DbName0) ->
+ DbName = mem3:dbname(DbName0),
+ try
+ [Shard | _] = mem3_shards:for_db(DbName),
+ get_hash_fun_int(Shard#shard.opts)
+ catch error:database_does_not_exist ->
+ {?MODULE, crc32, []}
+ end.
+
+
+crc32(Item) when is_binary(Item) ->
+ erlang:crc32(Item);
+crc32(Item) ->
+ erlang:crc32(term_to_binary(Item)).
+
+
+
+get_hash_fun_int(Opts) when is_list(Opts) ->
+ case lists:keyfind(hash, 1, Opts) of
+ {hash, [Mod, Fun, Args]} ->
+ {Mod, Fun, Args};
+ _ ->
+ {?MODULE, hash, []}
+ end.
+
+
diff --git a/src/mem3/src/mem3_shards.erl b/src/mem3/src/mem3_shards.erl
index 183f28f..18fca23 100644
--- a/src/mem3/src/mem3_shards.erl
+++ b/src/mem3/src/mem3_shards.erl
@@ -67,7 +67,7 @@ for_docid(DbName, DocId) ->
for_docid(DbName, DocId, []).
for_docid(DbName, DocId, Options) ->
- HashKey = mem3_util:hash(DocId),
+ HashKey = mem3_hash:hash(DbName, DocId),
ShardHead = #shard{
dbname = DbName,
range = ['$1', '$2'],
@@ -397,7 +397,7 @@ load_shards_from_db(ShardDb, DbName) ->
load_shards_from_disk(DbName, DocId)->
Shards = load_shards_from_disk(DbName),
- HashKey = mem3_util:hash(DocId),
+ HashKey = mem3_hash:hash(hd(Shards), Options),
[S || S <- Shards, in_range(S, HashKey)].
in_range(Shard, HashKey) ->
diff --git a/src/mem3/src/mem3_util.erl b/src/mem3/src/mem3_util.erl
index 254a6df..c6a8494 100644
--- a/src/mem3/src/mem3_util.erl
+++ b/src/mem3/src/mem3_util.erl
@@ -12,7 +12,7 @@
-module(mem3_util).
--export([hash/1, name_shard/2, create_partition_map/5, build_shards/2,
+-export([name_shard/2, create_partition_map/5, build_shards/2,
n_val/2, q_val/1, to_atom/1, to_integer/1, write_db_doc/1, delete_db_doc/1,
shard_info/1, ensure_exists/1, open_db_doc/1]).
-export([is_deleted/1, rotate_list/2]).
@@ -29,10 +29,6 @@
-include_lib("mem3/include/mem3.hrl").
-include_lib("couch/include/couch_db.hrl").
-hash(Item) when is_binary(Item) ->
- erlang:crc32(Item);
-hash(Item) ->
- erlang:crc32(term_to_binary(Item)).
name_shard(Shard) ->
name_shard(Shard, "").
@@ -162,7 +158,7 @@ build_shards_by_node(DbName, DocProps) ->
dbname = DbName,
node = to_atom(Node),
range = [Beg, End],
- opts = get_engine_opt(DocProps)
+ opts = get_shard_opts(DocProps)
}, Suffix)
end, Ranges)
end, ByNode).
@@ -180,7 +176,7 @@ build_shards_by_range(DbName, DocProps) ->
node = to_atom(Node),
range = [Beg, End],
order = Order,
- opts = get_engine_opt(DocProps)
+ opts = get_shard_opts(DocProps)
}, Suffix)
end, lists:zip(Nodes, lists:seq(1, length(Nodes))))
end, ByRange).
@@ -197,6 +193,9 @@ to_integer(N) when is_binary(N) ->
to_integer(N) when is_list(N) ->
list_to_integer(N).
+get_shard_opts(DocProps) ->
+ get_engine_opt(DocProps) ++ get_props_opt(DocProps).
+
get_engine_opt(DocProps) ->
case couch_util:get_value(<<"engine">>, DocProps) of
Engine when is_binary(Engine) ->
@@ -205,6 +204,14 @@ get_engine_opt(DocProps) ->
[]
end.
+get_props_opt(DocProps) ->
+ case couch_util:get_value(<<"props">>, DocProps) of
+ {Props} when is_list(Props) ->
+ [{props, Props}];
+ _ ->
+ []
+ end.
+
n_val(undefined, NodeCount) ->
n_val(config:get("cluster", "n", "3"), NodeCount);
n_val(N, NodeCount) when is_list(N) ->