You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by da...@apache.org on 2014/02/10 22:54:26 UTC

couch-mrview commit: updated refs/heads/2001-feature-external-size to e91c51b

Updated Branches:
  refs/heads/2001-feature-external-size [created] e91c51bbc


Implement view external size calculations

This patch adds calculations to show the external size of user defined
data in views which is a rough measure of how much disk space it would
take to store the keys, values, and reductions. This can be used for
capacity planning and calculating relative compression ratios.

COUCHDB-2001


Project: http://git-wip-us.apache.org/repos/asf/couchdb-couch-mrview/repo
Commit: http://git-wip-us.apache.org/repos/asf/couchdb-couch-mrview/commit/e91c51bb
Tree: http://git-wip-us.apache.org/repos/asf/couchdb-couch-mrview/tree/e91c51bb
Diff: http://git-wip-us.apache.org/repos/asf/couchdb-couch-mrview/diff/e91c51bb

Branch: refs/heads/2001-feature-external-size
Commit: e91c51bbcc971425193837da29c93de69c5dbe60
Parents: 3a093fa
Author: Paul J. Davis <pa...@gmail.com>
Authored: Mon Feb 10 15:09:01 2014 -0600
Committer: Paul J. Davis <pa...@gmail.com>
Committed: Mon Feb 10 15:09:01 2014 -0600

----------------------------------------------------------------------
 src/couch_mrview_index.erl | 18 ++++++++-----
 src/couch_mrview_util.erl  | 58 +++++++++++++++++++++++++++--------------
 2 files changed, 50 insertions(+), 26 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/couchdb-couch-mrview/blob/e91c51bb/src/couch_mrview_index.erl
----------------------------------------------------------------------
diff --git a/src/couch_mrview_index.erl b/src/couch_mrview_index.erl
index ffcbf5d..fc102eb 100644
--- a/src/couch_mrview_index.erl
+++ b/src/couch_mrview_index.erl
@@ -44,19 +44,23 @@ get(Property, State) ->
             #mrst{
                 fd = Fd,
                 sig = Sig,
-                id_btree = Btree,
                 language = Lang,
                 update_seq = UpdateSeq,
-                purge_seq = PurgeSeq,
-                views = Views
+                purge_seq = PurgeSeq
             } = State,
-            {ok, Size} = couch_file:bytes(Fd),
-            {ok, DataSize} = couch_mrview_util:calculate_data_size(Btree,Views),
+            {ok, FileSize} = couch_file:bytes(Fd),
+            {ok, ActiveSize} = couch_mrview_util:active_size(State),
+            {ok, ExternalSize} = couch_mrview_util:external_size(State),
             {ok, [
                 {signature, list_to_binary(couch_index_util:hexsig(Sig))},
                 {language, Lang},
-                {disk_size, Size},
-                {data_size, DataSize},
+                {disk_size, FileSize},
+                {data_size, ActiveSize},
+                {sizes, {[
+                    {file, FileSize},
+                    {active, ActiveSize},
+                    {external, ExternalSize}
+                ]}},
                 {update_seq, UpdateSeq},
                 {purge_seq, PurgeSeq}
             ]};

http://git-wip-us.apache.org/repos/asf/couchdb-couch-mrview/blob/e91c51bb/src/couch_mrview_util.erl
----------------------------------------------------------------------
diff --git a/src/couch_mrview_util.erl b/src/couch_mrview_util.erl
index 27baa4a..110ec14 100644
--- a/src/couch_mrview_util.erl
+++ b/src/couch_mrview_util.erl
@@ -21,7 +21,7 @@
 -export([all_docs_key_opts/1, all_docs_key_opts/2, key_opts/1, key_opts/2]).
 -export([fold/4, fold_reduce/4]).
 -export([temp_view_to_ddoc/1]).
--export([calculate_data_size/2]).
+-export([active_size/1, external_size/1]).
 -export([validate_args/1]).
 -export([maybe_load_doc/3, maybe_load_doc/4]).
 
@@ -213,12 +213,13 @@ open_view(Db, Fd, Lang, {BTState, USeq, PSeq}, View) ->
         fun(reduce, KVs) ->
             KVs2 = detuple_kvs(expand_dups(KVs, []), []),
             {ok, Result} = couch_query_servers:reduce(Lang, FunSrcs, KVs2),
-            {length(KVs2), Result};
+            {length(KVs2), Result, reduce_external_size(KVs2, Result)};
         (rereduce, Reds) ->
-            Count = lists:sum([Count0 || {Count0, _} <- Reds]),
-            UsrReds = [UsrRedsList || {_, UsrRedsList} <- Reds],
+            Count = lists:sum(extract_reduction(Reds, counts)),
+            DataSize = lists:sum(extract_reduction(Reds, data_size)),
+            UsrReds = extract_reduction(Reds, user_reds),
             {ok, Result} = couch_query_servers:rereduce(Lang, FunSrcs, UsrReds),
-            {Count, Result}
+            {Count, Result, DataSize + erlang:external_size(Result)}
         end,
 
     Less = case couch_util:get_value(<<"collation">>, View#mrview.options) of
@@ -235,6 +236,23 @@ open_view(Db, Fd, Lang, {BTState, USeq, PSeq}, View) ->
     View#mrview{btree=Btree, update_seq=USeq, purge_seq=PSeq}.
 
 
+reduce_external_size(KVList, Reduction) ->
+    InitSize = erlang:external_size(Reduction),
+    lists:foldl(fun([[Key, _], Value], Acc) ->
+        KSize = erlang:external_size(Key),
+        VSize = erlang:external_size(Value),
+        KSize + VSize + Acc
+    end, InitSize, KVList).
+
+
+extract_reduction(Reds, counts) ->
+    [element(1, R) || R <- Reds];
+extract_reduction(Reds, user_reds) ->
+    [element(2, R) || R <- Reds];
+extract_reduction(Reds, data_size) ->
+    lists:map(fun({_, _}) -> 0; ({_, _, S}) -> S end, Reds).
+
+
 temp_view_to_ddoc({Props}) ->
     Language = couch_util:get_value(<<"language">>, Props, <<"javascript">>),
     Options = couch_util:get_value(<<"options">>, Props, {[]}),
@@ -255,8 +273,8 @@ temp_view_to_ddoc({Props}) ->
 
 
 get_row_count(#mrview{btree=Bt}) ->
-    {ok, {Count, _Reds}} = couch_btree:full_reduce(Bt),
-    {ok, Count}.
+    {ok, Reds} = couch_btree:full_reduce(Bt),
+    {ok, element(1, Reds)}.
 
 
 all_docs_reduce_to_count(Reductions) ->
@@ -628,20 +646,22 @@ reverse_key_default(<<255>>) -> <<>>;
 reverse_key_default(Key) -> Key.
 
 
-calculate_data_size(IdBt, Views) ->
-    SumFun = fun(#mrview{btree=Bt}, Acc) ->
-        sum_btree_sizes(Acc, couch_btree:size(Bt))
-    end,
-    Size = lists:foldl(SumFun, couch_btree:size(IdBt), Views),
-    {ok, Size}.
+active_size(#mrst{id_btree=IdBt, views=Views}) ->
+    Trees = [IdBt] ++ [Bt || #mrview{btree=Bt} <- Views],
+    lists:foldl(fun(T, Acc) ->
+        case couch_btree:size(T) of
+            _ when Acc == null -> null;
+            undefined -> null;
+            S -> Acc + S
+        end
+    end, 0, Trees).
 
 
-sum_btree_sizes(nil, _) ->
-    null;
-sum_btree_sizes(_, nil) ->
-    null;
-sum_btree_sizes(Size1, Size2) ->
-    Size1 + Size2.
+external_size(#mrst{views=Views}) ->
+    lists:foldl(fun(#mrview{btree=Btree}, Acc) ->
+        {ok, {_, _, Size}} = couch_btree:full_reduce(Btree),
+        Size + Acc
+    end, 0, Views).
 
 
 detuple_kvs([], Acc) ->