You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by va...@apache.org on 2017/09/25 17:19:12 UTC
[couchdb] branch master updated: Avoid decompressing just to
calculate external size
This is an automated email from the ASF dual-hosted git repository.
vatamane pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/couchdb.git
The following commit(s) were added to refs/heads/master by this push:
new 8d1c704 Avoid decompressing just to calculate external size
8d1c704 is described below
commit 8d1c7043731fbaa5f4f93243df5144416c946604
Author: Nick Vatamaniuc <va...@apache.org>
AuthorDate: Fri Sep 22 01:02:47 2017 -0400
Avoid decompressing just to calculate external size
Use snappy's `uncompressed_length` and external binary format's binary spec to
get uncompressed size.
http://erlang.org/doc/apps/erts/erl_ext_dist.html
`erlang:external_size` is function provided since R16B3 use it without the
`try ... catch` fallback. Also make sure to use `[{minor_version, 1}]` to match
what `?term_to_bin` macro does.
Fixes #835
---
src/couch/include/couch_db.hrl | 7 +------
src/couch/src/couch_compress.erl | 14 ++++++++++++++
src/couch/src/couch_db_updater.erl | 9 ++++-----
src/couch/test/couch_compress_tests.erl | 11 +++++++++++
src/couch/test/couchdb_file_compression_tests.erl | 2 +-
5 files changed, 31 insertions(+), 12 deletions(-)
diff --git a/src/couch/include/couch_db.hrl b/src/couch/include/couch_db.hrl
index 7049c6e..17ef4c9 100644
--- a/src/couch/include/couch_db.hrl
+++ b/src/couch/include/couch_db.hrl
@@ -30,12 +30,7 @@
-define(i2b(V), couch_util:integer_to_boolean(V)).
-define(b2i(V), couch_util:boolean_to_integer(V)).
-define(term_to_bin(T), term_to_binary(T, [{minor_version, 1}])).
--define(term_size(T),
- try
- erlang:external_size(T)
- catch _:_ ->
- byte_size(?term_to_bin(T))
- end).
+-define(term_size(T), erlang:external_size(T, [{minor_version, 1}])).
-define(DEFAULT_ATTACHMENT_CONTENT_TYPE, <<"application/octet-stream">>).
diff --git a/src/couch/src/couch_compress.erl b/src/couch/src/couch_compress.erl
index 71588b2..cfcc2a4 100644
--- a/src/couch/src/couch_compress.erl
+++ b/src/couch/src/couch_compress.erl
@@ -14,6 +14,7 @@
-export([compress/2, decompress/1, is_compressed/2]).
-export([get_compression_method/0]).
+-export([uncompressed_size/1]).
-include_lib("couch/include/couch_db.hrl").
@@ -83,3 +84,16 @@ is_compressed(Term, _Method) when not is_binary(Term) ->
is_compressed(_, _) ->
error(invalid_compression).
+
+uncompressed_size(<<?SNAPPY_PREFIX, Rest/binary>>) ->
+ {ok, Size} = snappy:uncompressed_length(Rest),
+ Size;
+uncompressed_size(<<?COMPRESSED_TERM_PREFIX, Size:32, _/binary>> = _Bin) ->
+ % See http://erlang.org/doc/apps/erts/erl_ext_dist.html
+ % The uncompressed binary would be encoded with <<131, Rest/binary>>
+ % so need to add 1 for 131
+ Size + 1;
+uncompressed_size(<<?TERM_PREFIX, _/binary>> = Bin) ->
+ byte_size(Bin);
+uncompressed_size(_) ->
+ error(invalid_compression).
diff --git a/src/couch/src/couch_db_updater.erl b/src/couch/src/couch_db_updater.erl
index 78e0b8c..f0b6505 100644
--- a/src/couch/src/couch_db_updater.erl
+++ b/src/couch/src/couch_db_updater.erl
@@ -1079,14 +1079,13 @@ copy_docs(Db, #db{fd = DestFd} = NewDb, MixedInfos, Retry) ->
{Body, AttInfos} = copy_doc_attachments(Db, Sp, DestFd),
% In the future, we should figure out how to do this for
% upgrade purposes.
- EJsonBody = case is_binary(Body) of
+ ExternalSize = case is_binary(Body) of
true ->
- couch_compress:decompress(Body);
+ couch_compress:uncompressed_size(Body);
false ->
- Body
+ ?term_size(Body)
end,
SummaryChunk = make_doc_summary(NewDb, {Body, AttInfos}),
- ExternalSize = ?term_size(EJsonBody),
{ok, Pos, SummarySize} = couch_file:append_raw_chunk(
DestFd, SummaryChunk),
AttSizes = [{element(3,A), element(4,A)} || A <- AttInfos],
@@ -1472,7 +1471,7 @@ get_meta_body_size(Meta, Summary) ->
{ejson_size, ExternalSize} ->
ExternalSize;
false ->
- ?term_size(couch_compress:decompress(Summary))
+ couch_compress:uncompressed_size(Summary)
end.
diff --git a/src/couch/test/couch_compress_tests.erl b/src/couch/test/couch_compress_tests.erl
index 6d6e6a7..addb9a0 100644
--- a/src/couch/test/couch_compress_tests.erl
+++ b/src/couch/test/couch_compress_tests.erl
@@ -72,3 +72,14 @@ is_compressed_test_() ->
?_assertError(invalid_compression,
couch_compress:is_compressed(?CORRUPT, snappy))
].
+
+uncompressed_size_test_() ->
+ [
+ ?_assertEqual(49, couch_compress:uncompressed_size(?NONE)),
+ ?_assertEqual(49, couch_compress:uncompressed_size(?DEFLATE)),
+ ?_assertEqual(49, couch_compress:uncompressed_size(?SNAPPY)),
+ ?_assertEqual(5, couch_compress:uncompressed_size(
+ couch_compress:compress(x, {deflate, 9}))),
+ ?_assertError(invalid_compression,
+ couch_compress:uncompressed_size(?CORRUPT))
+ ].
diff --git a/src/couch/test/couchdb_file_compression_tests.erl b/src/couch/test/couchdb_file_compression_tests.erl
index 09fead5..8f0fe5b 100644
--- a/src/couch/test/couchdb_file_compression_tests.erl
+++ b/src/couch/test/couchdb_file_compression_tests.erl
@@ -157,7 +157,7 @@ compare_compression_methods(DbName) ->
?assert(DbSizeDeflate1 > DbSizeDeflate9),
?assert(ViewSizeDeflate1 > ViewSizeDeflate9),
- ?assert(ExternalSizePreCompact =:= ExternalSizeNone),
+ ?assert(ExternalSizePreCompact >= ExternalSizeNone),
?assert(ExternalSizeNone =:= ExternalSizeSnappy),
?assert(ExternalSizeNone =:= ExternalSizeDeflate9),
?assert(ViewExternalSizeNone =:= ViewExternalSizeSnappy),
--
To stop receiving notification emails like this one, please contact
['"commits@couchdb.apache.org" <co...@couchdb.apache.org>'].