You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by fd...@apache.org on 2012/01/26 16:21:37 UTC

git commit: Ensure compaction respects compression setting

Updated Branches:
  refs/heads/master 6282b5d03 -> 7309340cf


Ensure compaction respects compression setting

For a database with file format version 6 (CouchDB 1.2+), if
the file_compression setting was changed, compaction ignored
it, making it impossible to change the compression method of
a database and its indexes.

Closes COUCHDB-1394


Project: http://git-wip-us.apache.org/repos/asf/couchdb/repo
Commit: http://git-wip-us.apache.org/repos/asf/couchdb/commit/7309340c
Tree: http://git-wip-us.apache.org/repos/asf/couchdb/tree/7309340c
Diff: http://git-wip-us.apache.org/repos/asf/couchdb/diff/7309340c

Branch: refs/heads/master
Commit: 7309340cf941f28a8a6975c75697ed28e2ee805b
Parents: 6282b5d
Author: Filipe David Borba Manana <fd...@apache.org>
Authored: Thu Jan 26 14:58:28 2012 +0000
Committer: Filipe David Borba Manana <fd...@apache.org>
Committed: Thu Jan 26 14:58:28 2012 +0000

----------------------------------------------------------------------
 src/couch_mrview/src/couch_mrview_util.erl |    9 +-
 src/couchdb/couch_compress.erl             |   25 +++-
 src/couchdb/couch_db.erl                   |    5 +-
 src/couchdb/couch_db_updater.erl           |    7 +-
 test/etap/076-file-compression.t           |  186 +++++++++++++++++++++++
 test/etap/Makefile.am                      |    1 +
 6 files changed, 217 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/couchdb/blob/7309340c/src/couch_mrview/src/couch_mrview_util.erl
----------------------------------------------------------------------
diff --git a/src/couch_mrview/src/couch_mrview_util.erl b/src/couch_mrview/src/couch_mrview_util.erl
index a13b154..2d75df9 100644
--- a/src/couch_mrview/src/couch_mrview_util.erl
+++ b/src/couch_mrview/src/couch_mrview_util.erl
@@ -188,8 +188,7 @@ init_state(Db, Fd, State, Header) ->
         (rereduce, Reds) -> lists:sum(Reds)
     end,
 
-    %IdBtOpts = [{compression, couch_db:compression(Db)}],
-    IdBtOpts = [{reduce, IdReduce}],
+    IdBtOpts = [{reduce, IdReduce}, {compression, couch_db:compression(Db)}],
     {ok, IdBtree} = couch_btree:open(IdBtreeState, Fd, IdBtOpts),
 
     OpenViewFun = fun(St, View) -> open_view(Db, Fd, Lang, St, View) end,
@@ -204,7 +203,7 @@ init_state(Db, Fd, State, Header) ->
     }.
 
 
-open_view(_Db, Fd, Lang, {BTState, USeq, PSeq}, View) ->
+open_view(Db, Fd, Lang, {BTState, USeq, PSeq}, View) ->
     FunSrcs = [FunSrc || {_Name, FunSrc} <- View#mrview.reduce_funs],
     ReduceFun =
         fun(reduce, KVs) ->
@@ -225,8 +224,8 @@ open_view(_Db, Fd, Lang, {BTState, USeq, PSeq}, View) ->
 
     ViewBtOpts = [
         {less, Less},
-        {reduce, ReduceFun}
-        %{compression, couch_db:compression(Db)}
+        {reduce, ReduceFun},
+        {compression, couch_db:compression(Db)}
     ],
     {ok, Btree} = couch_btree:open(BTState, Fd, ViewBtOpts),
     View#mrview{btree=Btree, update_seq=USeq, purge_seq=PSeq}.

http://git-wip-us.apache.org/repos/asf/couchdb/blob/7309340c/src/couchdb/couch_compress.erl
----------------------------------------------------------------------
diff --git a/src/couchdb/couch_compress.erl b/src/couchdb/couch_compress.erl
index 523108f..ac386fd 100644
--- a/src/couchdb/couch_compress.erl
+++ b/src/couchdb/couch_compress.erl
@@ -12,16 +12,17 @@
 
 -module(couch_compress).
 
--export([compress/2, decompress/1, is_compressed/1]).
+-export([compress/2, decompress/1, is_compressed/2]).
 -export([get_compression_method/0]).
 
 -include("couch_db.hrl").
 
 % binaries compressed with snappy have their first byte set to this value
 -define(SNAPPY_PREFIX, 1).
-% binaries that are a result of an erlang:term_to_binary/1,2 call have this
-% value as their first byte
+% Term prefixes documented at:
+%      http://www.erlang.org/doc/apps/erts/erl_ext_dist.html
 -define(TERM_PREFIX, 131).
+-define(COMPRESSED_TERM_PREFIX, 131, 80).
 
 
 get_compression_method() ->
@@ -38,6 +39,12 @@ get_compression_method() ->
     end.
 
 
+compress(<<?SNAPPY_PREFIX, _/binary>> = Bin, snappy) ->
+    Bin;
+compress(<<?SNAPPY_PREFIX, _/binary>> = Bin, Method) ->
+    compress(decompress(Bin), Method);
+compress(<<?TERM_PREFIX, _/binary>> = Bin, Method) ->
+    compress(decompress(Bin), Method);
 compress(Term, none) ->
     ?term_to_bin(Term);
 compress(Term, {deflate, Level}) ->
@@ -64,10 +71,14 @@ decompress(<<?TERM_PREFIX, _/binary>> = Bin) ->
     binary_to_term(Bin).
 
 
-is_compressed(<<?SNAPPY_PREFIX, _/binary>>) ->
+is_compressed(<<?SNAPPY_PREFIX, _/binary>>, Method) ->
+    Method =:= snappy;
+is_compressed(<<?COMPRESSED_TERM_PREFIX, _/binary>>, {deflate, _Level}) ->
     true;
-is_compressed(<<?TERM_PREFIX, _/binary>>) ->
-    true;
-is_compressed(Term) when not is_binary(Term) ->
+is_compressed(<<?COMPRESSED_TERM_PREFIX, _/binary>>, _Method) ->
+    false;
+is_compressed(<<?TERM_PREFIX, _/binary>>, Method) ->
+    Method =:= none;
+is_compressed(Term, _Method) when not is_binary(Term) ->
     false.
 

http://git-wip-us.apache.org/repos/asf/couchdb/blob/7309340c/src/couchdb/couch_db.erl
----------------------------------------------------------------------
diff --git a/src/couchdb/couch_db.erl b/src/couchdb/couch_db.erl
index ae21bfa..81e97f2 100644
--- a/src/couchdb/couch_db.erl
+++ b/src/couchdb/couch_db.erl
@@ -29,7 +29,7 @@
 -export([init/1,terminate/2,handle_call/3,handle_cast/2,code_change/3,handle_info/2]).
 -export([changes_since/4,changes_since/5,read_doc/2,new_revid/1]).
 -export([check_is_admin/1, check_is_member/1]).
--export([reopen/1, is_system_db/1]).
+-export([reopen/1, is_system_db/1, compression/1]).
 
 -include("couch_db.hrl").
 
@@ -416,6 +416,9 @@ set_revs_limit(_Db, _Limit) ->
 name(#db{name=Name}) ->
     Name.
 
+compression(#db{compression=Compression}) ->
+    Compression.
+
 update_doc(Db, Doc, Options) ->
     update_doc(Db, Doc, Options, interactive_edit).
 

http://git-wip-us.apache.org/repos/asf/couchdb/blob/7309340c/src/couchdb/couch_db_updater.erl
----------------------------------------------------------------------
diff --git a/src/couchdb/couch_db_updater.erl b/src/couchdb/couch_db_updater.erl
index 54531db..bb22278 100644
--- a/src/couchdb/couch_db_updater.erl
+++ b/src/couchdb/couch_db_updater.erl
@@ -899,7 +899,8 @@ copy_docs(Db, #db{updater_fd = DestFd} = NewDb, InfoBySeq0, Retry) ->
 
 copy_compact(Db, NewDb0, Retry) ->
     FsyncOptions = [Op || Op <- NewDb0#db.fsync_options, Op == before_header],
-    NewDb = NewDb0#db{fsync_options=FsyncOptions},
+    Compression = couch_compress:get_compression_method(),
+    NewDb = NewDb0#db{fsync_options=FsyncOptions, compression=Compression},
     TotalChanges = couch_db:count_changes_since(Db, NewDb#db.update_seq),
     BufferSize = list_to_integer(
         couch_config:get("database_compaction", "doc_buffer_size", "524288")),
@@ -1018,14 +1019,14 @@ update_compact_task(NumChanges) ->
     couch_task_status:update([{changes_done, Changes2}, {progress, Progress}]).
 
 make_doc_summary(#db{compression = Comp}, {Body0, Atts0}) ->
-    Body = case couch_compress:is_compressed(Body0) of
+    Body = case couch_compress:is_compressed(Body0, Comp) of
     true ->
         Body0;
     false ->
         % pre 1.2 database file format
         couch_compress:compress(Body0, Comp)
     end,
-    Atts = case couch_compress:is_compressed(Atts0) of
+    Atts = case couch_compress:is_compressed(Atts0, Comp) of
     true ->
         Atts0;
     false ->

http://git-wip-us.apache.org/repos/asf/couchdb/blob/7309340c/test/etap/076-file-compression.t
----------------------------------------------------------------------
diff --git a/test/etap/076-file-compression.t b/test/etap/076-file-compression.t
new file mode 100755
index 0000000..2929230
--- /dev/null
+++ b/test/etap/076-file-compression.t
@@ -0,0 +1,186 @@
+#!/usr/bin/env escript
+%% -*- erlang -*-
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+%   http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-record(user_ctx, {
+    name = null,
+    roles = [],
+    handler
+}).
+
+test_db_name() -> <<"couch_test_file_compression">>.
+ddoc_id() -> <<"_design/test">>.
+num_docs() -> 5000.
+
+
+main(_) ->
+    test_util:init_code_path(),
+
+    etap:plan(10),
+    case (catch test()) of
+        ok ->
+            etap:end_tests();
+        Other ->
+            etap:diag(io_lib:format("Test died abnormally: ~p", [Other])),
+            etap:bail(Other)
+    end,
+    ok.
+
+
+test() ->
+    couch_server_sup:start_link(test_util:config_files()),
+    couch_config:set("couchdb", "file_compression", "none", false),
+
+    create_database(),
+    compact_db(),
+    compact_view(),
+    DbDiskSize1 = db_disk_size(),
+    ViewDiskSize1 = view_disk_size(),
+
+    couch_config:set("couchdb", "file_compression", "snappy", false),
+    compact_db(),
+    compact_view(),
+    DbDiskSize2 = db_disk_size(),
+    ViewDiskSize2 = view_disk_size(),
+
+    etap:is(DbDiskSize2 < DbDiskSize1, true, "Database disk size decreased"),
+    etap:is(ViewDiskSize2 < ViewDiskSize1, true, "Index disk size decreased"),
+
+    couch_config:set("couchdb", "file_compression", "deflate_9", false),
+    compact_db(),
+    compact_view(),
+    DbDiskSize3 = db_disk_size(),
+    ViewDiskSize3 = view_disk_size(),
+
+    etap:is(DbDiskSize3 < DbDiskSize2, true, "Database disk size decreased again"),
+    etap:is(ViewDiskSize3 < ViewDiskSize2, true, "Index disk size decreased again"),
+
+    couch_config:set("couchdb", "file_compression", "deflate_1", false),
+    compact_db(),
+    compact_view(),
+    DbDiskSize4 = db_disk_size(),
+    ViewDiskSize4 = view_disk_size(),
+
+    etap:is(DbDiskSize4 > DbDiskSize3, true, "Database disk size increased"),
+    etap:is(ViewDiskSize4 > ViewDiskSize3, true, "Index disk size increased"),
+
+    couch_config:set("couchdb", "file_compression", "snappy", false),
+    compact_db(),
+    compact_view(),
+    DbDiskSize5 = db_disk_size(),
+    ViewDiskSize5 = view_disk_size(),
+
+    etap:is(DbDiskSize5 > DbDiskSize4, true, "Database disk size increased again"),
+    etap:is(ViewDiskSize5 > ViewDiskSize4, true, "Index disk size increased again"),
+
+    couch_config:set("couchdb", "file_compression", "none", false),
+    compact_db(),
+    compact_view(),
+    DbDiskSize6 = db_disk_size(),
+    ViewDiskSize6 = view_disk_size(),
+
+    etap:is(DbDiskSize6 > DbDiskSize5, true, "Database disk size increased again"),
+    etap:is(ViewDiskSize6 > ViewDiskSize5, true, "Index disk size increased again"),
+
+    delete_db(),
+    couch_server_sup:stop(),
+    ok.
+
+
+create_database() ->
+    {ok, Db} = couch_db:create(
+        test_db_name(),
+        [{user_ctx, #user_ctx{roles = [<<"_admin">>]}}, overwrite]),
+    ok = populate_db(Db, num_docs()),
+    DDoc = couch_doc:from_json_obj({[
+        {<<"_id">>, ddoc_id()},
+        {<<"language">>, <<"javascript">>},
+        {<<"views">>, {[
+                {<<"view1">>, {[
+                    {<<"map">>, <<"function(doc) { emit(doc._id, doc.string); }">>}
+                ]}}
+            ]}
+        }
+    ]}),
+    {ok, _} = couch_db:update_doc(Db, DDoc, []),
+    refresh_index(),
+    ok = couch_db:close(Db).
+
+
+populate_db(_Db, NumDocs) when NumDocs =< 0 ->
+    ok;
+populate_db(Db, NumDocs) ->
+    Docs = lists:map(
+        fun(_) ->
+            couch_doc:from_json_obj({[
+                {<<"_id">>, couch_uuids:random()},
+                {<<"string">>, list_to_binary(lists:duplicate(1000, $X))}
+            ]})
+        end,
+        lists:seq(1, 500)),
+    {ok, _} = couch_db:update_docs(Db, Docs, []),
+    populate_db(Db, NumDocs - 500).
+
+
+refresh_index() ->
+    {ok, Db} = couch_db:open_int(test_db_name(), []),
+    {ok, DDoc} = couch_db:open_doc(Db, ddoc_id(), [ejson_body]),
+    couch_mrview:query_view(Db, DDoc, <<"view1">>, [{stale, false}]),
+    ok = couch_db:close(Db).
+
+
+compact_db() ->
+    {ok, Db} = couch_db:open_int(test_db_name(), []),
+    {ok, CompactPid} = couch_db:start_compact(Db),
+    MonRef = erlang:monitor(process, CompactPid),
+    receive
+    {'DOWN', MonRef, process, CompactPid, normal} ->
+        ok;
+    {'DOWN', MonRef, process, CompactPid, Reason} ->
+        etap:bail("Error compacting database: " ++ couch_util:to_list(Reason))
+    after 120000 ->
+        etap:bail("Timeout waiting for database compaction")
+    end,
+    ok = couch_db:close(Db).
+
+
+compact_view() ->
+    {ok, MonRef} = couch_mrview:compact(test_db_name(), ddoc_id(), [monitor]),
+    receive
+    {'DOWN', MonRef, process, _CompactPid, normal} ->
+        ok;
+    {'DOWN', MonRef, process, _CompactPid, Reason} ->
+        etap:bail("Error compacting view group: " ++ couch_util:to_list(Reason))
+    after 120000 ->
+        etap:bail("Timeout waiting for view group compaction")
+    end.
+
+
+db_disk_size() ->
+    {ok, Db} = couch_db:open_int(test_db_name(), []),
+    {ok, Info} = couch_db:get_db_info(Db),
+    ok = couch_db:close(Db),
+    couch_util:get_value(disk_size, Info).
+
+
+view_disk_size() ->
+    {ok, Db} = couch_db:open_int(test_db_name(), []),
+    {ok, DDoc} = couch_db:open_doc(Db, ddoc_id(), [ejson_body]),
+    {ok, Info} = couch_mrview:get_info(Db, DDoc),
+    ok = couch_db:close(Db),
+    couch_util:get_value(disk_size, Info).
+
+
+delete_db() ->
+    ok = couch_server:delete(
+        test_db_name(), [{user_ctx, #user_ctx{roles = [<<"_admin">>]}}]).

http://git-wip-us.apache.org/repos/asf/couchdb/blob/7309340c/test/etap/Makefile.am
----------------------------------------------------------------------
diff --git a/test/etap/Makefile.am b/test/etap/Makefile.am
index 2eb4e6a..be85c49 100644
--- a/test/etap/Makefile.am
+++ b/test/etap/Makefile.am
@@ -58,6 +58,7 @@ EXTRA_DIST = \
     073-changes.t \
     074-doc-update-conflicts.t \
     075-auth-cache.t \
+    076-file-compression.t \
     080-config-get-set.t \
     081-config-override.1.ini \
     081-config-override.2.ini \