You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by va...@apache.org on 2017/09/25 17:19:12 UTC

[couchdb] branch master updated: Avoid decompressing just to calculate external size

This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/couchdb.git


The following commit(s) were added to refs/heads/master by this push:
     new 8d1c704  Avoid decompressing just to calculate external size
8d1c704 is described below

commit 8d1c7043731fbaa5f4f93243df5144416c946604
Author: Nick Vatamaniuc <va...@apache.org>
AuthorDate: Fri Sep 22 01:02:47 2017 -0400

    Avoid decompressing just to calculate external size
    
    Use snappy's `uncompressed_length` and external binary format's binary spec to
    get uncompressed size.
    
    http://erlang.org/doc/apps/erts/erl_ext_dist.html
    
    `erlang:external_size` is function provided since R16B3 use it without the
    `try ... catch` fallback. Also make sure to use `[{minor_version, 1}]` to match
    what `?term_to_bin` macro does.
    
    Fixes #835
---
 src/couch/include/couch_db.hrl                    |  7 +------
 src/couch/src/couch_compress.erl                  | 14 ++++++++++++++
 src/couch/src/couch_db_updater.erl                |  9 ++++-----
 src/couch/test/couch_compress_tests.erl           | 11 +++++++++++
 src/couch/test/couchdb_file_compression_tests.erl |  2 +-
 5 files changed, 31 insertions(+), 12 deletions(-)

diff --git a/src/couch/include/couch_db.hrl b/src/couch/include/couch_db.hrl
index 7049c6e..17ef4c9 100644
--- a/src/couch/include/couch_db.hrl
+++ b/src/couch/include/couch_db.hrl
@@ -30,12 +30,7 @@
 -define(i2b(V), couch_util:integer_to_boolean(V)).
 -define(b2i(V), couch_util:boolean_to_integer(V)).
 -define(term_to_bin(T), term_to_binary(T, [{minor_version, 1}])).
--define(term_size(T),
-    try
-        erlang:external_size(T)
-    catch _:_ ->
-        byte_size(?term_to_bin(T))
-    end).
+-define(term_size(T), erlang:external_size(T, [{minor_version, 1}])).
 
 -define(DEFAULT_ATTACHMENT_CONTENT_TYPE, <<"application/octet-stream">>).
 
diff --git a/src/couch/src/couch_compress.erl b/src/couch/src/couch_compress.erl
index 71588b2..cfcc2a4 100644
--- a/src/couch/src/couch_compress.erl
+++ b/src/couch/src/couch_compress.erl
@@ -14,6 +14,7 @@
 
 -export([compress/2, decompress/1, is_compressed/2]).
 -export([get_compression_method/0]).
+-export([uncompressed_size/1]).
 
 -include_lib("couch/include/couch_db.hrl").
 
@@ -83,3 +84,16 @@ is_compressed(Term, _Method) when not is_binary(Term) ->
 is_compressed(_, _) ->
     error(invalid_compression).
 
+
+uncompressed_size(<<?SNAPPY_PREFIX, Rest/binary>>) ->
+    {ok, Size} = snappy:uncompressed_length(Rest),
+    Size;
+uncompressed_size(<<?COMPRESSED_TERM_PREFIX, Size:32, _/binary>> = _Bin) ->
+    % See http://erlang.org/doc/apps/erts/erl_ext_dist.html
+    % The uncompressed binary would be encoded with <<131, Rest/binary>>
+    % so need to add 1 for 131
+    Size + 1;
+uncompressed_size(<<?TERM_PREFIX, _/binary>> = Bin) ->
+    byte_size(Bin);
+uncompressed_size(_) ->
+    error(invalid_compression).
diff --git a/src/couch/src/couch_db_updater.erl b/src/couch/src/couch_db_updater.erl
index 78e0b8c..f0b6505 100644
--- a/src/couch/src/couch_db_updater.erl
+++ b/src/couch/src/couch_db_updater.erl
@@ -1079,14 +1079,13 @@ copy_docs(Db, #db{fd = DestFd} = NewDb, MixedInfos, Retry) ->
                 {Body, AttInfos} = copy_doc_attachments(Db, Sp, DestFd),
                 % In the future, we should figure out how to do this for
                 % upgrade purposes.
-                EJsonBody = case is_binary(Body) of
+                ExternalSize = case is_binary(Body) of
                     true ->
-                        couch_compress:decompress(Body);
+                        couch_compress:uncompressed_size(Body);
                     false ->
-                        Body
+                        ?term_size(Body)
                 end,
                 SummaryChunk = make_doc_summary(NewDb, {Body, AttInfos}),
-                ExternalSize = ?term_size(EJsonBody),
                 {ok, Pos, SummarySize} = couch_file:append_raw_chunk(
                     DestFd, SummaryChunk),
                 AttSizes = [{element(3,A), element(4,A)} || A <- AttInfos],
@@ -1472,7 +1471,7 @@ get_meta_body_size(Meta, Summary) ->
         {ejson_size, ExternalSize} ->
             ExternalSize;
         false ->
-            ?term_size(couch_compress:decompress(Summary))
+            couch_compress:uncompressed_size(Summary)
     end.
 
 
diff --git a/src/couch/test/couch_compress_tests.erl b/src/couch/test/couch_compress_tests.erl
index 6d6e6a7..addb9a0 100644
--- a/src/couch/test/couch_compress_tests.erl
+++ b/src/couch/test/couch_compress_tests.erl
@@ -72,3 +72,14 @@ is_compressed_test_() ->
         ?_assertError(invalid_compression,
             couch_compress:is_compressed(?CORRUPT, snappy))
     ].
+
+uncompressed_size_test_() ->
+    [
+        ?_assertEqual(49, couch_compress:uncompressed_size(?NONE)),
+        ?_assertEqual(49, couch_compress:uncompressed_size(?DEFLATE)),
+        ?_assertEqual(49, couch_compress:uncompressed_size(?SNAPPY)),
+        ?_assertEqual(5, couch_compress:uncompressed_size(
+            couch_compress:compress(x, {deflate, 9}))),
+        ?_assertError(invalid_compression,
+            couch_compress:uncompressed_size(?CORRUPT))
+    ].
diff --git a/src/couch/test/couchdb_file_compression_tests.erl b/src/couch/test/couchdb_file_compression_tests.erl
index 09fead5..8f0fe5b 100644
--- a/src/couch/test/couchdb_file_compression_tests.erl
+++ b/src/couch/test/couchdb_file_compression_tests.erl
@@ -157,7 +157,7 @@ compare_compression_methods(DbName) ->
 
     ?assert(DbSizeDeflate1 > DbSizeDeflate9),
     ?assert(ViewSizeDeflate1 > ViewSizeDeflate9),
-    ?assert(ExternalSizePreCompact =:= ExternalSizeNone),
+    ?assert(ExternalSizePreCompact >= ExternalSizeNone),
     ?assert(ExternalSizeNone =:= ExternalSizeSnappy),
     ?assert(ExternalSizeNone =:= ExternalSizeDeflate9),
     ?assert(ViewExternalSizeNone =:= ViewExternalSizeSnappy),

-- 
To stop receiving notification emails like this one, please contact
['"commits@couchdb.apache.org" <co...@couchdb.apache.org>'].