You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by to...@apache.org on 2017/07/15 18:04:09 UTC
[couchdb] 01/01: calculate data_size correctly
This is an automated email from the ASF dual-hosted git repository.
tonysun83 pushed a commit to branch 3430-external-size-views
in repository https://gitbox.apache.org/repos/asf/couchdb.git
commit da5e7146af87c15c8bdc1a110b3f885d9bf2c4da
Author: Tony Sun <to...@cloudant.com>
AuthorDate: Mon Jun 12 09:26:49 2017 -0700
calculate data_size correctly
Previously, we were calculating the ExternalSize for views by summing
up all the nodes in the btree. Furthermore, this was the compressed
size. Now we modify the reduce function to return an ExternalSize for
uncompressed values in the KVList.
PR: https://github.com/apache/couchdb/pull/608
COUCHDB-3430
---
src/couch/test/couchdb_file_compression_tests.erl | 12 ++++++
src/couch_mrview/src/couch_mrview_util.erl | 47 +++++++++++++++++------
2 files changed, 48 insertions(+), 11 deletions(-)
diff --git a/src/couch/test/couchdb_file_compression_tests.erl b/src/couch/test/couchdb_file_compression_tests.erl
index 41d0556..fa2c226 100644
--- a/src/couch/test/couchdb_file_compression_tests.erl
+++ b/src/couch/test/couchdb_file_compression_tests.erl
@@ -125,6 +125,7 @@ compare_compression_methods(DbName) ->
DbSizeNone = db_disk_size(DbName),
ViewSizeNone = view_disk_size(DbName),
ExternalSizeNone = db_external_size(DbName),
+ ViewExternalSizeNone = view_external_size(DbName),
config:set("couchdb", "file_compression", "snappy", false),
compact_db(DbName),
@@ -132,6 +133,7 @@ compare_compression_methods(DbName) ->
DbSizeSnappy = db_disk_size(DbName),
ViewSizeSnappy = view_disk_size(DbName),
ExternalSizeSnappy = db_external_size(DbName),
+ ViewExternalSizeSnappy = view_external_size(DbName),
?assert(DbSizeNone > DbSizeSnappy),
?assert(ViewSizeNone > ViewSizeSnappy),
@@ -151,12 +153,15 @@ compare_compression_methods(DbName) ->
DbSizeDeflate9 = db_disk_size(DbName),
ViewSizeDeflate9 = view_disk_size(DbName),
ExternalSizeDeflate9 = db_external_size(DbName),
+ ViewExternalSizeDeflate9 = view_external_size(DbName),
?assert(DbSizeDeflate1 > DbSizeDeflate9),
?assert(ViewSizeDeflate1 > ViewSizeDeflate9),
?assert(ExternalSizePreCompact =:= ExternalSizeNone),
?assert(ExternalSizeNone =:= ExternalSizeSnappy),
?assert(ExternalSizeNone =:= ExternalSizeDeflate9).
+ ?assert(ViewExternalSizeNone =:= ViewExternalSizeSnappy),
+ ?assert(ViewExternalSizeNone =:= ViewExternalSizeDeflate9).
populate_db(_Db, NumDocs) when NumDocs =< 0 ->
@@ -214,6 +219,13 @@ view_disk_size(DbName) ->
ok = couch_db:close(Db),
active_size(Info).
+view_external_size(DbName) ->
+ {ok, Db} = couch_db:open_int(DbName, []),
+ {ok, DDoc} = couch_db:open_doc(Db, ?DDOC_ID, [ejson_body]),
+ {ok, Info} = couch_mrview:get_info(Db, DDoc),
+ ok = couch_db:close(Db),
+ external_size(Info).
+
active_size(Info) ->
couch_util:get_nested_json_value({Info}, [sizes, active]).
diff --git a/src/couch_mrview/src/couch_mrview_util.erl b/src/couch_mrview/src/couch_mrview_util.erl
index a8462a2..6325222 100644
--- a/src/couch_mrview/src/couch_mrview_util.erl
+++ b/src/couch_mrview/src/couch_mrview_util.erl
@@ -338,7 +338,10 @@ temp_view_to_ddoc({Props}) ->
get_row_count(#mrview{btree=Bt}) ->
- {ok, {Count, _Reds}} = couch_btree:full_reduce(Bt),
+ Count = case couch_btree:full_reduce(Bt) of
+ {ok, {Count0, _Reds, _}} -> Count0;
+ {ok, {Count0, _Reds}} -> Count0
+ end,
{ok, Count}.
@@ -786,27 +789,33 @@ changes_ekey_opts(_StartSeq, #mrargs{end_key=EKey,
end.
+reduced_external_size(Tree) ->
+ case couch_btree:full_reduce(Tree) of
+ {ok, {_, _, Size}} -> Size;
+ % return 0 for versions of the reduce function without Size
+ {ok, {_, _}} -> 0
+ end.
calculate_external_size(Views) ->
SumFun = fun(#mrview{btree=Bt, seq_btree=SBt, key_byseq_btree=KSBt}, Acc) ->
- Size0 = sum_btree_sizes(Acc, couch_btree:size(Bt)),
+ Size0 = sum_btree_sizes(Acc, reduced_external_size(Bt)),
Size1 = case SBt of
nil -> Size0;
- _ -> sum_btree_sizes(Size0, couch_btree:size(SBt))
+ _ -> sum_btree_sizes(Size0, reduced_external_size(SBt))
end,
case KSBt of
nil -> Size1;
- _ -> sum_btree_sizes(Size1, couch_btree:size(KSBt))
+ _ -> sum_btree_sizes(Size1, reduced_external_size(KSBt))
end
end,
{ok, lists:foldl(SumFun, 0, Views)}.
sum_btree_sizes(nil, _) ->
- null;
+ 0;
sum_btree_sizes(_, nil) ->
- null;
+ 0;
sum_btree_sizes(Size1, Size2) ->
Size1 + Size2.
@@ -1038,22 +1047,32 @@ get_user_reds(Reduction) ->
element(2, Reduction).
+get_external_size_reds(Reduction) when tuple_size(Reduction) == 2 ->
+ 0;
+
+get_external_size_reds(Reduction) when tuple_size(Reduction) == 3 ->
+ element(3, Reduction).
+
+
make_reduce_fun(Lang, ReduceFuns) ->
FunSrcs = [FunSrc || {_, FunSrc} <- ReduceFuns],
fun
(reduce, KVs0) ->
KVs = detuple_kvs(expand_dups(KVs0, []), []),
{ok, Result} = couch_query_servers:reduce(Lang, FunSrcs, KVs),
- {length(KVs), Result};
+ ExternalSize = kv_external_size(KVs, Result),
+ {length(KVs), Result, ExternalSize};
(rereduce, Reds) ->
- ExtractFun = fun(Red, {CountsAcc0, URedsAcc0}) ->
+ ExtractFun = fun(Red, {CountsAcc0, URedsAcc0, ExtAcc0}) ->
CountsAcc = CountsAcc0 + get_count(Red),
URedsAcc = lists:append(URedsAcc0, [get_user_reds(Red)]),
- {CountsAcc, URedsAcc}
+ ExtAcc = ExtAcc0 + get_external_size_reds(Red),
+ {CountsAcc, URedsAcc, ExtAcc}
end,
- {Counts, UReds} = lists:foldl(ExtractFun, {0, []}, Reds),
+ {Counts, UReds, ExternalSize} = lists:foldl(ExtractFun,
+ {0, [], 0}, Reds),
{ok, Result} = couch_query_servers:rereduce(Lang, FunSrcs, UReds),
- {Counts, Result}
+ {Counts, Result, ExternalSize}
end.
@@ -1130,3 +1149,9 @@ get_view_queries({Props}) ->
_ ->
throw({bad_request, "`queries` member must be a array."})
end.
+
+
+kv_external_size(KVList, Reduction) ->
+ lists:foldl(fun([[Key, _], Value], Acc) ->
+ ?term_size(Key) + ?term_size(Value) + Acc
+ end, ?term_size(Reduction), KVList).
--
To stop receiving notification emails like this one, please contact
"commits@couchdb.apache.org" <co...@couchdb.apache.org>.