You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by va...@apache.org on 2020/04/04 20:51:56 UTC
[couchdb] 01/01: Compress doc bodies and attachments
This is an automated email from the ASF dual-hosted git repository.
vatamane pushed a commit to branch compress-doc-bodies-and-attachments
in repository https://gitbox.apache.org/repos/asf/couchdb.git
commit f30a15b5f23d566d4613ceb3a9cffa1d175fc476
Author: Nick Vatamaniuc <va...@apache.org>
AuthorDate: Sat Apr 4 16:45:52 2020 -0400
Compress doc bodies and attachments
In CouchDB < 4.x we compressed document bodies by default, so enable
for >= 4.x as well.
Use the basic term_to_binary compression mechanism for:
- Document bodies
- Local document bodies
- Attachments, but only if they have not already been compressed.
---
src/fabric/include/fabric2.hrl | 4 +++
src/fabric/src/fabric2_db.erl | 3 +-
src/fabric/src/fabric2_fdb.erl | 39 ++++++++++++++++++-----
src/fabric/test/fabric2_doc_att_tests.erl | 51 +++++++++++++++++++++++++++++--
4 files changed, 86 insertions(+), 11 deletions(-)
diff --git a/src/fabric/include/fabric2.hrl b/src/fabric/include/fabric2.hrl
index 0c07575..99ac874 100644
--- a/src/fabric/include/fabric2.hrl
+++ b/src/fabric/include/fabric2.hrl
@@ -54,6 +54,10 @@
-define(CURR_LDOC_FORMAT, 0).
+% 0 - Attachment storage version
+
+-define(CURR_ATT_STORAGE_VER, 0).
+
% Misc constants
-define(PDICT_DB_KEY, '$fabric_db_handle').
diff --git a/src/fabric/src/fabric2_db.erl b/src/fabric/src/fabric2_db.erl
index ca9f037..edabe4e 100644
--- a/src/fabric/src/fabric2_db.erl
+++ b/src/fabric/src/fabric2_db.erl
@@ -809,7 +809,8 @@ read_attachment(Db, DocId, AttId) ->
write_attachment(Db, DocId, Att) ->
Data = couch_att:fetch(data, Att),
- {ok, AttId} = fabric2_fdb:write_attachment(Db, DocId, Data),
+ Encoding = couch_att:fetch(encoding, Att),
+ {ok, AttId} = fabric2_fdb:write_attachment(Db, DocId, Data, Encoding),
couch_att:store(data, {loc, Db, DocId, AttId}, Att).
diff --git a/src/fabric/src/fabric2_fdb.erl b/src/fabric/src/fabric2_fdb.erl
index 22ccc99..74f9651 100644
--- a/src/fabric/src/fabric2_fdb.erl
+++ b/src/fabric/src/fabric2_fdb.erl
@@ -54,7 +54,7 @@
write_local_doc/2,
read_attachment/3,
- write_attachment/3,
+ write_attachment/4,
get_last_change/1,
@@ -896,27 +896,51 @@ read_attachment(#{} = Db, DocId, AttId) ->
db_prefix := DbPrefix
} = ensure_current(Db),
+ IdKey = erlfdb_tuple:pack({?DB_ATT_NAMES, DocId, AttId}, DbPrefix),
+ InfoFuture = erlfdb:get(Tx, IdKey),
+
AttKey = erlfdb_tuple:pack({?DB_ATTS, DocId, AttId}, DbPrefix),
- case erlfdb:wait(erlfdb:get_range_startswith(Tx, AttKey)) of
+ Data = case erlfdb:wait(erlfdb:get_range_startswith(Tx, AttKey)) of
not_found ->
throw({not_found, missing});
KVs ->
Vs = [V || {_K, V} <- KVs],
iolist_to_binary(Vs)
+ end,
+
+ case erlfdb:wait(InfoFuture) of
+ <<>> ->
+ Data; % Old format, before CURR_ATT_STORAGE_VER = 0
+ InfoBin ->
+ {?CURR_ATT_STORAGE_VER, Compressed} = erlfdb_tuple:unpack(InfoBin),
+ case Compressed of
+ true -> binary_to_term(Data, [safe]);
+ false -> Data
+ end
end.
-write_attachment(#{} = Db, DocId, Data) when is_binary(Data) ->
+write_attachment(#{} = Db, DocId, Data, Encoding)
+ when is_binary(Data), is_atom(Encoding) ->
#{
tx := Tx,
db_prefix := DbPrefix
} = ensure_current(Db),
AttId = fabric2_util:uuid(),
- Chunks = chunkify_binary(Data),
+ {Data1, Compressed} = case Encoding of
+ gzip ->
+ {Data, false};
+ _ ->
+ Opts = [{minor_version, 1}, {compressed, 6}],
+ {term_to_binary(Data, Opts), true}
+ end,
IdKey = erlfdb_tuple:pack({?DB_ATT_NAMES, DocId, AttId}, DbPrefix),
- ok = erlfdb:set(Tx, IdKey, <<>>),
+ InfoVal = erlfdb_tuple:pack({?CURR_ATT_STORAGE_VER, Compressed}),
+ ok = erlfdb:set(Tx, IdKey, InfoVal),
+
+ Chunks = chunkify_binary(Data1),
lists:foldl(fun(Chunk, ChunkId) ->
AttKey = erlfdb_tuple:pack({?DB_ATTS, DocId, AttId, ChunkId}, DbPrefix),
@@ -1356,7 +1380,8 @@ doc_to_fdb(Db, #doc{} = Doc) ->
DiskAtts = lists:map(fun couch_att:to_disk_term/1, Atts),
- Value = term_to_binary({Body, DiskAtts, Deleted}, [{minor_version, 1}]),
+ Opts = [{minor_version, 1}, {compressed, 6}],
+ Value = term_to_binary({Body, DiskAtts, Deleted}, Opts),
Chunks = chunkify_binary(Value),
{Rows, _} = lists:mapfoldl(fun(Chunk, ChunkId) ->
@@ -1408,7 +1433,7 @@ local_doc_to_fdb(Db, #doc{} = Doc) ->
_ when is_binary(Rev) -> Rev
end,
- BVal = term_to_binary(Body, [{minor_version, 1}]),
+ BVal = term_to_binary(Body, [{minor_version, 1}, {compressed, 6}]),
{Rows, _} = lists:mapfoldl(fun(Chunk, ChunkId) ->
K = erlfdb_tuple:pack({?DB_LOCAL_DOC_BODIES, Id, ChunkId}, DbPrefix),
{{K, Chunk}, ChunkId + 1}
diff --git a/src/fabric/test/fabric2_doc_att_tests.erl b/src/fabric/test/fabric2_doc_att_tests.erl
index ac531e9..fc7bbcc 100644
--- a/src/fabric/test/fabric2_doc_att_tests.erl
+++ b/src/fabric/test/fabric2_doc_att_tests.erl
@@ -29,6 +29,7 @@ doc_crud_test_() ->
fun cleanup/1,
with([
?TDEF(create_att),
+ ?TDEF(create_att_already_compressed),
?TDEF(delete_att),
?TDEF(multiple_atts),
?TDEF(delete_one_att),
@@ -84,7 +85,47 @@ create_att({Db, _}) ->
IdVal = erlfdb:wait(erlfdb:get(Tx, IdKey)),
AttVals = erlfdb:wait(erlfdb:get_range_startswith(Tx, AttKey)),
- ?assertEqual(<<>>, IdVal),
+ ?assertEqual(erlfdb_tuple:pack({0, true}), IdVal),
+ Opts = [{minor_version, 1}, {compressed, 6}],
+ Expect = term_to_binary(<<"foobar">>, Opts),
+ ?assertMatch([{_, Expect}], AttVals)
+ end).
+
+create_att_already_compressed({Db, _}) ->
+ DocId = fabric2_util:uuid(),
+ Att1 = couch_att:new([
+ {name, <<"foo.txt">>},
+ {type, <<"application/octet-stream">>},
+ {att_len, 6},
+ {data, <<"foobar">>},
+ {encoding, gzip},
+ {md5, <<>>}
+ ]),
+ Doc1 = #doc{
+ id = DocId,
+ atts = [Att1]
+ },
+ {ok, _} = fabric2_db:update_doc(Db, Doc1),
+ {ok, Doc2} = fabric2_db:open_doc(Db, DocId),
+ #doc{
+ atts = [Att2]
+ } = Doc2,
+ {loc, _Db, DocId, AttId} = couch_att:fetch(data, Att2),
+ AttData = fabric2_db:read_attachment(Db, DocId, AttId),
+ ?assertEqual(<<"foobar">>, AttData),
+
+ % Check that the raw keys exist
+ #{
+ db_prefix := DbPrefix
+ } = Db,
+ IdKey = erlfdb_tuple:pack({?DB_ATT_NAMES, DocId, AttId}, DbPrefix),
+ AttKey = erlfdb_tuple:pack({?DB_ATTS, DocId, AttId}, DbPrefix),
+
+ fabric2_fdb:transactional(fun(Tx) ->
+ IdVal = erlfdb:wait(erlfdb:get(Tx, IdKey)),
+ AttVals = erlfdb:wait(erlfdb:get_range_startswith(Tx, AttKey)),
+
+ ?assertEqual(erlfdb_tuple:pack({0, false}), IdVal),
?assertMatch([{_, <<"foobar">>}], AttVals)
end).
@@ -175,7 +216,7 @@ large_att({Db, _}) ->
AttData = iolist_to_binary([
<<"foobar">> || _ <- lists:seq(1, 60000)
]),
- Att1 = mk_att(<<"long.txt">>, AttData),
+ Att1 = mk_att(<<"long.txt">>, AttData, gzip),
{ok, _} = create_doc(Db, DocId, [Att1]),
?assertEqual(#{<<"long.txt">> => AttData}, read_atts(Db, DocId)),
@@ -204,12 +245,16 @@ att_on_conflict_isolation({Db, _}) ->
mk_att(Name, Data) ->
+ mk_att(Name, Data, identity).
+
+
+mk_att(Name, Data, Encoding) ->
couch_att:new([
{name, Name},
{type, <<"application/octet-stream">>},
{att_len, size(Data)},
{data, Data},
- {encoding, identity},
+ {encoding, Encoding},
{md5, <<>>}
]).