You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by to...@apache.org on 2017/07/15 18:04:09 UTC

[couchdb] 01/01: calculate data_size correctly

This is an automated email from the ASF dual-hosted git repository.

tonysun83 pushed a commit to branch 3430-external-size-views
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit da5e7146af87c15c8bdc1a110b3f885d9bf2c4da
Author: Tony Sun <to...@cloudant.com>
AuthorDate: Mon Jun 12 09:26:49 2017 -0700

    calculate data_size correctly
    
    Previously, we were calculating the ExternalSize for views by summing
    up all the nodes in the btree. Furthermore, this was the compressed
    size. Now we modify the reduce function to return an ExternalSize for
    uncompressed values in the KVList.
    PR: https://github.com/apache/couchdb/pull/608
    
    COUCHDB-3430
---
 src/couch/test/couchdb_file_compression_tests.erl | 12 ++++++
 src/couch_mrview/src/couch_mrview_util.erl        | 47 +++++++++++++++++------
 2 files changed, 48 insertions(+), 11 deletions(-)

diff --git a/src/couch/test/couchdb_file_compression_tests.erl b/src/couch/test/couchdb_file_compression_tests.erl
index 41d0556..fa2c226 100644
--- a/src/couch/test/couchdb_file_compression_tests.erl
+++ b/src/couch/test/couchdb_file_compression_tests.erl
@@ -125,6 +125,7 @@ compare_compression_methods(DbName) ->
     DbSizeNone = db_disk_size(DbName),
     ViewSizeNone = view_disk_size(DbName),
     ExternalSizeNone = db_external_size(DbName),
+    ViewExternalSizeNone = view_external_size(DbName),
 
     config:set("couchdb", "file_compression", "snappy", false),
     compact_db(DbName),
@@ -132,6 +133,7 @@ compare_compression_methods(DbName) ->
     DbSizeSnappy = db_disk_size(DbName),
     ViewSizeSnappy = view_disk_size(DbName),
     ExternalSizeSnappy = db_external_size(DbName),
+    ViewExternalSizeSnappy = view_external_size(DbName),
 
     ?assert(DbSizeNone > DbSizeSnappy),
     ?assert(ViewSizeNone > ViewSizeSnappy),
@@ -151,12 +153,15 @@ compare_compression_methods(DbName) ->
     DbSizeDeflate9 = db_disk_size(DbName),
     ViewSizeDeflate9 = view_disk_size(DbName),
     ExternalSizeDeflate9 = db_external_size(DbName),
+    ViewExternalSizeDeflate9 = view_external_size(DbName),
 
     ?assert(DbSizeDeflate1 > DbSizeDeflate9),
     ?assert(ViewSizeDeflate1 > ViewSizeDeflate9),
     ?assert(ExternalSizePreCompact =:= ExternalSizeNone),
     ?assert(ExternalSizeNone =:= ExternalSizeSnappy),
     ?assert(ExternalSizeNone =:= ExternalSizeDeflate9).
+    ?assert(ViewExternalSizeNone =:= ViewExternalSizeSnappy),
+    ?assert(ViewExternalSizeNone =:= ViewExternalSizeDeflate9).
 
 
 populate_db(_Db, NumDocs) when NumDocs =< 0 ->
@@ -214,6 +219,13 @@ view_disk_size(DbName) ->
     ok = couch_db:close(Db),
     active_size(Info).
 
+view_external_size(DbName) ->
+    {ok, Db} = couch_db:open_int(DbName, []),
+    {ok, DDoc} = couch_db:open_doc(Db, ?DDOC_ID, [ejson_body]),
+    {ok, Info} = couch_mrview:get_info(Db, DDoc),
+    ok = couch_db:close(Db),
+    external_size(Info).
+
 active_size(Info) ->
     couch_util:get_nested_json_value({Info}, [sizes, active]).
 
diff --git a/src/couch_mrview/src/couch_mrview_util.erl b/src/couch_mrview/src/couch_mrview_util.erl
index a8462a2..6325222 100644
--- a/src/couch_mrview/src/couch_mrview_util.erl
+++ b/src/couch_mrview/src/couch_mrview_util.erl
@@ -338,7 +338,10 @@ temp_view_to_ddoc({Props}) ->
 
 
 get_row_count(#mrview{btree=Bt}) ->
-    {ok, {Count, _Reds}} = couch_btree:full_reduce(Bt),
+    Count = case couch_btree:full_reduce(Bt) of
+        {ok, {Count0, _Reds, _}} -> Count0;
+        {ok, {Count0, _Reds}} -> Count0
+    end,
     {ok, Count}.
 
 
@@ -786,27 +789,33 @@ changes_ekey_opts(_StartSeq, #mrargs{end_key=EKey,
     end.
 
 
+reduced_external_size(Tree) ->
+    case couch_btree:full_reduce(Tree) of
+        {ok, {_, _, Size}} -> Size;
+        % return 0 for versions of the reduce function without Size
+        {ok, {_, _}} -> 0
+    end.
 
 
 calculate_external_size(Views) ->
     SumFun = fun(#mrview{btree=Bt, seq_btree=SBt, key_byseq_btree=KSBt}, Acc) ->
-        Size0 = sum_btree_sizes(Acc, couch_btree:size(Bt)),
+        Size0 = sum_btree_sizes(Acc, reduced_external_size(Bt)),
         Size1 = case SBt of
             nil -> Size0;
-            _ -> sum_btree_sizes(Size0, couch_btree:size(SBt))
+            _ -> sum_btree_sizes(Size0, reduced_external_size(SBt))
         end,
         case KSBt of
             nil -> Size1;
-            _ -> sum_btree_sizes(Size1, couch_btree:size(KSBt))
+            _ -> sum_btree_sizes(Size1, reduced_external_size(KSBt))
         end
     end,
     {ok, lists:foldl(SumFun, 0, Views)}.
 
 
 sum_btree_sizes(nil, _) ->
-    null;
+    0;
 sum_btree_sizes(_, nil) ->
-    null;
+    0;
 sum_btree_sizes(Size1, Size2) ->
     Size1 + Size2.
 
@@ -1038,22 +1047,32 @@ get_user_reds(Reduction) ->
     element(2, Reduction).
 
 
+get_external_size_reds(Reduction) when tuple_size(Reduction) == 2 ->
+    0;
+
+get_external_size_reds(Reduction) when tuple_size(Reduction) == 3 ->
+    element(3, Reduction).
+
+
 make_reduce_fun(Lang, ReduceFuns) ->
     FunSrcs = [FunSrc || {_, FunSrc} <- ReduceFuns],
     fun
         (reduce, KVs0) ->
             KVs = detuple_kvs(expand_dups(KVs0, []), []),
             {ok, Result} = couch_query_servers:reduce(Lang, FunSrcs, KVs),
-            {length(KVs), Result};
+            ExternalSize = kv_external_size(KVs, Result),
+            {length(KVs), Result, ExternalSize};
         (rereduce, Reds) ->
-            ExtractFun = fun(Red, {CountsAcc0, URedsAcc0}) ->
+            ExtractFun = fun(Red, {CountsAcc0, URedsAcc0, ExtAcc0}) ->
                 CountsAcc = CountsAcc0 + get_count(Red),
                 URedsAcc = lists:append(URedsAcc0, [get_user_reds(Red)]),
-                {CountsAcc, URedsAcc}
+                ExtAcc = ExtAcc0 + get_external_size_reds(Red),
+                {CountsAcc, URedsAcc, ExtAcc}
             end,
-            {Counts, UReds} = lists:foldl(ExtractFun, {0, []}, Reds),
+            {Counts, UReds, ExternalSize} = lists:foldl(ExtractFun,
+                {0, [], 0}, Reds),
             {ok, Result} = couch_query_servers:rereduce(Lang, FunSrcs, UReds),
-            {Counts, Result}
+            {Counts, Result, ExternalSize}
     end.
 
 
@@ -1130,3 +1149,9 @@ get_view_queries({Props}) ->
         _ ->
             throw({bad_request, "`queries` member must be a array."})
     end.
+
+
+kv_external_size(KVList, Reduction) ->
+    lists:foldl(fun([[Key, _], Value], Acc) ->
+        ?term_size(Key) + ?term_size(Value) + Acc
+    end, ?term_size(Reduction), KVList).

-- 
To stop receiving notification emails like this one, please contact
"commits@couchdb.apache.org" <co...@couchdb.apache.org>.