You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by da...@apache.org on 2017/02/06 21:43:10 UTC

[4/6] couch commit: updated refs/heads/45918-pluggable-storage-engines to 0211034

http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/9c4b8432/src/couch_db_updater.erl
----------------------------------------------------------------------
diff --git a/src/couch_db_updater.erl b/src/couch_db_updater.erl
index 7872635..e08f9c0 100644
--- a/src/couch_db_updater.erl
+++ b/src/couch_db_updater.erl
@@ -14,73 +14,37 @@
 -behaviour(gen_server).
 -vsn(1).
 
--export([btree_by_id_split/1, btree_by_id_join/2, btree_by_id_reduce/2]).
--export([btree_by_seq_split/1, btree_by_seq_join/2, btree_by_seq_reduce/2]).
--export([make_doc_summary/2]).
+-export([make_doc_summary/2, add_sizes/3, upgrade_sizes/1]).
 -export([init/1,terminate/2,handle_call/3,handle_cast/2,code_change/3,handle_info/2]).
 
 -include_lib("couch/include/couch_db.hrl").
 
--record(comp_header, {
-    db_header,
-    meta_state
-}).
 
--record(merge_st, {
-    id_tree,
-    seq_tree,
-    curr,
-    rem_seqs,
-    infos
-}).
 
-init({DbName, Filepath, Fd, Options}) ->
+init({Engine, DbName, FilePath, Options0}) ->
     erlang:put(io_priority, {db_update, DbName}),
-    case lists:member(create, Options) of
-    true ->
-        % create a new header and writes it to the file
-        Header =  couch_db_header:new(),
-        ok = couch_file:write_header(Fd, Header),
-        % delete any old compaction files that might be hanging around
-        RootDir = config:get("couchdb", "database_dir", "."),
-        couch_file:delete(RootDir, Filepath ++ ".compact"),
-        couch_file:delete(RootDir, Filepath ++ ".compact.data"),
-        couch_file:delete(RootDir, Filepath ++ ".compact.meta");
-    false ->
-        case couch_file:read_header(Fd) of
-        {ok, Header} ->
-            ok;
-        no_valid_header ->
-            % create a new header and writes it to the file
-            Header =  couch_db_header:new(),
-            ok = couch_file:write_header(Fd, Header),
-            % delete any old compaction files that might be hanging around
-            file:delete(Filepath ++ ".compact"),
-            file:delete(Filepath ++ ".compact.data"),
-            file:delete(Filepath ++ ".compact.meta")
-        end
-    end,
-    Db = init_db(DbName, Filepath, Fd, Header, Options),
-    case lists:member(sys_db, Options) of
-        false ->
-            couch_stats_process_tracker:track([couchdb, open_databases]);
-        true ->
-            ok
-    end,
-    % we don't load validation funs here because the fabric query is liable to
-    % race conditions.  Instead see couch_db:validate_doc_update, which loads
-    % them lazily
-    {ok, Db#db{main_pid = self()}}.
+    DefaultSecObj = default_security_object(DbName),
+    Options = [{default_security_object, DefaultSecObj} | Options0],
+    try
+        {ok, EngineState} = couch_db_engine:init(Engine, FilePath, Options),
+        Db = init_db(DbName, FilePath, EngineState, Options),
+        maybe_track_db(Db),
+        % we don't load validation funs here because the fabric query is liable to
+        % race conditions.  Instead see couch_db:validate_doc_update, which loads
+        % them lazily
+        NewDb = Db#db{main_pid = self()},
+        proc_lib:init_ack({ok, NewDb}),
+        gen_server:enter_loop(?MODULE, [], NewDb)
+    catch
+        throw:InitError ->
+            proc_lib:init_ack(InitError)
+    end.
 
 
-terminate(_Reason, Db) ->
-    % If the reason we died is because our fd disappeared
-    % then we don't need to try closing it again.
-    if Db#db.fd_monitor == closed -> ok; true ->
-        ok = couch_file:close(Db#db.fd)
-    end,
+terminate(Reason, Db) ->
+    couch_log:error("STOPPING DB: ~s", [Db#db.name]),
     couch_util:shutdown_sync(Db#db.compactor_pid),
-    couch_util:shutdown_sync(Db#db.fd),
+    couch_db_engine:terminate(Reason, Db),
     ok.
 
 handle_call(get_db, _From, Db) ->
@@ -104,28 +68,21 @@ handle_call(cancel_compact, _From, #db{compactor_pid = nil} = Db) ->
 handle_call(cancel_compact, _From, #db{compactor_pid = Pid} = Db) ->
     unlink(Pid),
     exit(Pid, kill),
-    RootDir = config:get("couchdb", "database_dir", "."),
-    ok = couch_file:delete(RootDir, Db#db.filepath ++ ".compact"),
+    couch_server:delete_compaction_files(Db#db.name),
     Db2 = Db#db{compactor_pid = nil},
     ok = gen_server:call(couch_server, {db_updated, Db2}, infinity),
     {reply, ok, Db2};
-handle_call(increment_update_seq, _From, Db) ->
-    Db2 = commit_data(Db#db{update_seq=Db#db.update_seq+1}),
-    ok = gen_server:call(couch_server, {db_updated, Db2}, infinity),
-    couch_event:notify(Db#db.name, updated),
-    {reply, {ok, Db2#db.update_seq}, Db2};
 
-handle_call({set_security, NewSec}, _From, #db{compression = Comp} = Db) ->
-    {ok, Ptr, _} = couch_file:append_term(
-        Db#db.fd, NewSec, [{compression, Comp}]),
-    Db2 = commit_data(Db#db{security=NewSec, security_ptr=Ptr,
-            update_seq=Db#db.update_seq+1}),
-    ok = gen_server:call(couch_server, {db_updated, Db2}, infinity),
-    {reply, ok, Db2};
+handle_call({set_security, NewSec}, _From, #db{} = Db) ->
+    {ok, NewDb} = couch_db_engine:set(Db, security, NewSec),
+    NewSecDb = NewDb#db{
+        security = NewSec
+    },
+    ok = gen_server:call(couch_server, {db_updated, NewSecDb}, infinity),
+    {reply, ok, NewSecDb};
 
 handle_call({set_revs_limit, Limit}, _From, Db) ->
-    Db2 = commit_data(Db#db{revs_limit=Limit,
-            update_seq=Db#db.update_seq+1}),
+    {ok, Db2} = couch_db_engine:set(Db, revs_limit, Limit),
     ok = gen_server:call(couch_server, {db_updated, Db2}, infinity),
     {reply, ok, Db2};
 
@@ -133,73 +90,78 @@ handle_call({purge_docs, _IdRevs}, _From,
         #db{compactor_pid=Pid}=Db) when Pid /= nil ->
     {reply, {error, purge_during_compaction}, Db};
 handle_call({purge_docs, IdRevs}, _From, Db) ->
-    #db{
-        fd = Fd,
-        id_tree = DocInfoByIdBTree,
-        seq_tree = DocInfoBySeqBTree,
-        update_seq = LastSeq,
-        header = Header,
-        compression = Comp
-        } = Db,
-    DocLookups = couch_btree:lookup(DocInfoByIdBTree,
-            [Id || {Id, _Revs} <- IdRevs]),
-
-    NewDocInfos = lists:zipwith(
-        fun({_Id, Revs}, {ok, #full_doc_info{rev_tree=Tree}=FullDocInfo}) ->
+    DocIds = [Id || {Id, _Revs} <- IdRevs],
+    OldDocInfos = couch_db_engine:open_docs(Db, DocIds),
+
+    NewDocInfos = lists:flatmap(fun
+        ({{Id, Revs}, #full_doc_info{id = Id, rev_tree = Tree} = FDI}) ->
             case couch_key_tree:remove_leafs(Tree, Revs) of
-            {_, []=_RemovedRevs} -> % no change
-                nil;
-            {NewTree, RemovedRevs} ->
-                {FullDocInfo#full_doc_info{rev_tree=NewTree},RemovedRevs}
+                {_, [] = _RemovedRevs} -> % no change
+                    [];
+                {NewTree, RemovedRevs} ->
+                    NewFDI = FDI#full_doc_info{rev_tree = NewTree},
+                    [{FDI, NewFDI, RemovedRevs}]
             end;
-        (_, not_found) ->
-            nil
+        ({_, not_found}) ->
+            []
+    end, lists:zip(IdRevs, OldDocInfos)),
+
+    InitUpdateSeq = couch_db_engine:get(Db, update_seq),
+    InitAcc = {InitUpdateSeq, [], []},
+    FinalAcc = lists:foldl(fun({_, #full_doc_info{} = OldFDI, RemRevs}, Acc) ->
+        #full_doc_info{
+            id = Id,
+            rev_tree = OldTree
+        } = OldFDI,
+        {SeqAcc0, FDIAcc, IdRevsAcc} = Acc,
+
+        {NewFDIAcc, NewSeqAcc} = case OldTree of
+            [] ->
+                % If we purged every #leaf{} in the doc record
+                % then we're removing it completely from the
+                % database.
+                FDIAcc;
+            _ ->
+                % Its possible to purge the #leaf{} that contains
+                % the update_seq where this doc sits in the update_seq
+                % sequence. Rather than do a bunch of complicated checks
+                % we just re-label every #leaf{} and reinsert it into
+                % the update_seq sequence.
+                {NewTree, SeqAcc1} = couch_key_tree:mapfold(fun
+                    (_RevId, Leaf, leaf, InnerSeqAcc) ->
+                        {Leaf#leaf{seq = InnerSeqAcc + 1}, InnerSeqAcc + 1};
+                    (_RevId, Value, _Type, InnerSeqAcc) ->
+                        {Value, InnerSeqAcc}
+                end, SeqAcc0, OldTree),
+
+                NewFDI = OldFDI#full_doc_info{
+                    update_seq = SeqAcc1,
+                    rev_tree = NewTree
+                },
+
+                {[NewFDI | FDIAcc], SeqAcc1}
         end,
-        IdRevs, DocLookups),
-
-    SeqsToRemove = [Seq
-            || {#full_doc_info{update_seq=Seq},_} <- NewDocInfos],
-
-    FullDocInfoToUpdate = [FullInfo
-            || {#full_doc_info{rev_tree=Tree}=FullInfo,_}
-            <- NewDocInfos, Tree /= []],
-
-    IdRevsPurged = [{Id, Revs}
-            || {#full_doc_info{id=Id}, Revs} <- NewDocInfos],
-
-    {DocInfoToUpdate, NewSeq} = lists:mapfoldl(
-        fun(#full_doc_info{rev_tree=Tree}=FullInfo, SeqAcc) ->
-            Tree2 = couch_key_tree:map_leafs(
-                fun(_RevId, Leaf) ->
-                    Leaf#leaf{seq=SeqAcc+1}
-                end, Tree),
-            {FullInfo#full_doc_info{rev_tree=Tree2}, SeqAcc + 1}
-        end, LastSeq, FullDocInfoToUpdate),
-
-    IdsToRemove = [Id || {#full_doc_info{id=Id,rev_tree=[]},_}
-            <- NewDocInfos],
-
-    {ok, DocInfoBySeqBTree2} = couch_btree:add_remove(DocInfoBySeqBTree,
-            DocInfoToUpdate, SeqsToRemove),
-    {ok, DocInfoByIdBTree2} = couch_btree:add_remove(DocInfoByIdBTree,
-            FullDocInfoToUpdate, IdsToRemove),
-    {ok, Pointer, _} = couch_file:append_term(
-            Fd, IdRevsPurged, [{compression, Comp}]),
-
-    NewHeader = couch_db_header:set(Header, [
-        {purge_seq, couch_db_header:purge_seq(Header) + 1},
-        {purged_docs, Pointer}
-    ]),
-    Db2 = commit_data(
-        Db#db{
-            id_tree = DocInfoByIdBTree2,
-            seq_tree = DocInfoBySeqBTree2,
-            update_seq = NewSeq + 1,
-            header=NewHeader}),
+        NewIdRevsAcc = [{Id, RemRevs} | IdRevsAcc],
+        {NewSeqAcc, NewFDIAcc, NewIdRevsAcc}
+    end, InitAcc, NewDocInfos),
+
+    {_FinalSeq, FDIs, PurgedIdRevs} = FinalAcc,
+
+    % We need to only use the list of #full_doc_info{} records
+    % that we have actually changed due to a purge.
+    PreviousFDIs = [PrevFDI || {PrevFDI, _, _} <- NewDocInfos],
+    Pairs = pair_purge_info(PreviousFDIs, FDIs),
+
+    {ok, Db2} = couch_db_engine:write_doc_infos(Db, Pairs, [], PurgedIdRevs),
 
     ok = gen_server:call(couch_server, {db_updated, Db2}, infinity),
     couch_event:notify(Db#db.name, updated),
-    {reply, {ok, couch_db_header:purge_seq(NewHeader), IdRevsPurged}, Db2}.
+
+    PurgeSeq = couch_db_engine:get(Db2, purge_seq),
+    {reply, {ok, PurgeSeq, PurgedIdRevs}, Db2};
+
+handle_call(Msg, From, Db) ->
+    couch_db_engine:handle_call(Msg, From, Db).
 
 
 handle_cast({load_validation_funs, ValidationFuns}, Db) ->
@@ -208,65 +170,29 @@ handle_cast({load_validation_funs, ValidationFuns}, Db) ->
     {noreply, Db2};
 handle_cast(start_compact, Db) ->
     case Db#db.compactor_pid of
-    nil ->
-        couch_log:info("Starting compaction for db \"~s\"", [Db#db.name]),
-        Pid = spawn_link(fun() -> start_copy_compact(Db) end),
-        Db2 = Db#db{compactor_pid=Pid},
-        ok = gen_server:call(couch_server, {db_updated, Db2}, infinity),
-        {noreply, Db2};
-    _ ->
-        % compact currently running, this is a no-op
-        {noreply, Db}
-    end;
-handle_cast({compact_done, CompactFilepath}, #db{filepath=Filepath}=Db) ->
-    {ok, NewFd} = couch_file:open(CompactFilepath),
-    {ok, NewHeader0} = couch_file:read_header(NewFd),
-    NewHeader = couch_db_header:set(NewHeader0, [
-        {compacted_seq, Db#db.update_seq}
-    ]),
-    #db{update_seq=NewSeq} = NewDb =
-        init_db(Db#db.name, Filepath, NewFd, NewHeader, Db#db.options),
-    unlink(NewFd),
-    case Db#db.update_seq == NewSeq of
-    true ->
-        % suck up all the local docs into memory and write them to the new db
-        {ok, _, LocalDocs} = couch_btree:foldl(Db#db.local_tree,
-                fun(Value, _Offset, Acc) -> {ok, [Value | Acc]} end, []),
-        {ok, NewLocalBtree} = couch_btree:add(NewDb#db.local_tree, LocalDocs),
-
-        NewDb2 = commit_data(NewDb#db{
-            local_tree = NewLocalBtree,
-            main_pid = self(),
-            filepath = Filepath,
-            instance_start_time = Db#db.instance_start_time,
-            revs_limit = Db#db.revs_limit
-        }),
-
-        couch_log:debug("CouchDB swapping files ~s and ~s.",
-                        [Filepath, CompactFilepath]),
-        ok = file:rename(CompactFilepath, Filepath ++ ".compact"),
-        RootDir = config:get("couchdb", "database_dir", "."),
-        couch_file:delete(RootDir, Filepath),
-        ok = file:rename(Filepath ++ ".compact", Filepath),
-        % Delete the old meta compaction file after promoting
-        % the compaction file.
-        couch_file:delete(RootDir, Filepath ++ ".compact.meta"),
-        close_db(Db),
-        NewDb3 = refresh_validate_doc_funs(NewDb2),
-        ok = gen_server:call(couch_server, {db_updated, NewDb3}, infinity),
-        couch_event:notify(NewDb3#db.name, compacted),
-        couch_log:info("Compaction for db \"~s\" completed.", [Db#db.name]),
-        {noreply, NewDb3#db{compactor_pid=nil}};
-    false ->
-        couch_log:info("Compaction file still behind main file "
-                       "(update seq=~p. compact update seq=~p). Retrying.",
-                       [Db#db.update_seq, NewSeq]),
-        close_db(NewDb),
-        Pid = spawn_link(fun() -> start_copy_compact(Db) end),
-        Db2 = Db#db{compactor_pid=Pid},
-        ok = gen_server:call(couch_server, {db_updated, Db2}, infinity),
-        {noreply, Db2}
+        nil ->
+            % For now we only support compacting to the same
+            % storage engine. After the first round of patches
+            % we'll add a field that sets the target engine
+            % type to compact to with a new copy compactor.
+            UpdateSeq = couch_db_engine:get(Db, update_seq),
+            Args = [Db#db.name, UpdateSeq],
+            couch_log:info("Starting compaction for db \"~s\" at ~p", Args),
+            {ok, Db2} = couch_db_engine:start_compaction(Db),
+            ok = gen_server:call(couch_server, {db_updated, Db2}, infinity),
+            {noreply, Db2};
+        _ ->
+            % compact currently running, this is a no-op
+            {noreply, Db}
     end;
+handle_cast({compact_done, CompactEngine, CompactInfo}, #db{} = OldDb) ->
+    {ok, NewDb} = case couch_db_engine:get(OldDb, engine) of
+        CompactEngine ->
+            couch_db_engine:finish_compaction(OldDb, CompactInfo);
+        _ ->
+            finish_engine_swap(OldDb, CompactEngine, CompactInfo)
+    end,
+    {noreply, NewDb};
 
 handle_cast(Msg, #db{name = Name} = Db) ->
     couch_log:error("Database `~s` updater received unexpected cast: ~p",
@@ -290,9 +216,9 @@ handle_info({update_docs, Client, GroupedDocs, NonRepDocs, MergeConflicts,
                 FullCommit2) of
     {ok, Db2, UpdatedDDocIds} ->
         ok = gen_server:call(couch_server, {db_updated, Db2}, infinity),
-        if Db2#db.update_seq /= Db#db.update_seq ->
-            couch_event:notify(Db2#db.name, updated);
-        true -> ok
+        case {get_update_seq(Db), get_update_seq(Db2)} of
+            {Seq, Seq} -> ok;
+            _ -> couch_event:notify(Db2#db.name, updated)
         end,
         if NonRepDocs2 /= [] ->
             couch_event:notify(Db2#db.name, local_updated);
@@ -335,9 +261,8 @@ handle_info({'EXIT', _Pid, normal}, Db) ->
     {noreply, Db};
 handle_info({'EXIT', _Pid, Reason}, Db) ->
     {stop, Reason, Db};
-handle_info({'DOWN', Ref, _, _, Reason}, #db{fd_monitor=Ref, name=Name} = Db) ->
-    couch_log:error("DB ~s shutting down - Fd ~p", [Name, Reason]),
-    {stop, normal, Db#db{fd=undefined, fd_monitor=closed}}.
+handle_info(Msg, Db) ->
+    couch_db_engine:handle_info(Msg, Db).
 
 code_change(_OldVsn, State, _Extra) ->
     {ok, State}.
@@ -388,235 +313,35 @@ collect_updates(GroupedDocsAcc, ClientsAcc, MergeConflicts, FullCommit) ->
         {GroupedDocsAcc, ClientsAcc, FullCommit}
     end.
 
-rev_tree(DiskTree) ->
-    couch_key_tree:map(fun
-        (_RevId, {Del, Ptr, Seq}) ->
-            #leaf{
-                deleted = ?i2b(Del),
-                ptr = Ptr,
-                seq = Seq
-            };
-        (_RevId, {Del, Ptr, Seq, Size}) ->
-            #leaf{
-                deleted = ?i2b(Del),
-                ptr = Ptr,
-                seq = Seq,
-                sizes = upgrade_sizes(Size)
-            };
-        (_RevId, {Del, Ptr, Seq, Sizes, Atts}) ->
-            #leaf{
-                deleted = ?i2b(Del),
-                ptr = Ptr,
-                seq = Seq,
-                sizes = upgrade_sizes(Sizes),
-                atts = Atts
-            };
-        (_RevId, ?REV_MISSING) ->
-            ?REV_MISSING
-    end, DiskTree).
-
-disk_tree(RevTree) ->
-    couch_key_tree:map(fun
-        (_RevId, ?REV_MISSING) ->
-            ?REV_MISSING;
-        (_RevId, #leaf{} = Leaf) ->
-            #leaf{
-                deleted = Del,
-                ptr = Ptr,
-                seq = Seq,
-                sizes = Sizes,
-                atts = Atts
-            } = Leaf,
-            {?b2i(Del), Ptr, Seq, split_sizes(Sizes), Atts}
-    end, RevTree).
 
-upgrade_sizes(#size_info{}=SI) ->
-    SI;
-upgrade_sizes({D, E}) ->
-    #size_info{active=D, external=E};
-upgrade_sizes(S) when is_integer(S) ->
-    #size_info{active=S, external=0}.
-
-split_sizes(#size_info{}=SI) ->
-    {SI#size_info.active, SI#size_info.external}.
-
-join_sizes({Active, External}) when is_integer(Active), is_integer(External) ->
-    #size_info{active=Active, external=External}.
-
-btree_by_seq_split(#full_doc_info{}=Info) ->
-    #full_doc_info{
-        id = Id,
-        update_seq = Seq,
-        deleted = Del,
-        sizes = SizeInfo,
-        rev_tree = Tree
-    } = Info,
-    {Seq, {Id, ?b2i(Del), split_sizes(SizeInfo), disk_tree(Tree)}}.
-
-btree_by_seq_join(Seq, {Id, Del, DiskTree}) when is_integer(Del) ->
-    btree_by_seq_join(Seq, {Id, Del, {0, 0}, DiskTree});
-btree_by_seq_join(Seq, {Id, Del, Sizes, DiskTree}) when is_integer(Del) ->
-    #full_doc_info{
-        id = Id,
-        update_seq = Seq,
-        deleted = ?i2b(Del),
-        sizes = join_sizes(Sizes),
-        rev_tree = rev_tree(DiskTree)
-    };
-btree_by_seq_join(KeySeq, {Id, RevInfos, DeletedRevInfos}) ->
-    % Older versions stored #doc_info records in the seq_tree.
-    % Compact to upgrade.
-    #doc_info{
-        id = Id,
-        high_seq=KeySeq,
-        revs =
-            [#rev_info{rev=Rev,seq=Seq,deleted=false,body_sp = Bp} ||
-                {Rev, Seq, Bp} <- RevInfos] ++
-            [#rev_info{rev=Rev,seq=Seq,deleted=true,body_sp = Bp} ||
-                {Rev, Seq, Bp} <- DeletedRevInfos]}.
-
-btree_by_id_split(#full_doc_info{}=Info) ->
-    #full_doc_info{
-        id = Id,
-        update_seq = Seq,
-        deleted = Deleted,
-        sizes = SizeInfo,
-        rev_tree = Tree
-    } = Info,
-    {Id, {Seq, ?b2i(Deleted), split_sizes(SizeInfo), disk_tree(Tree)}}.
-
-% Handle old formats before data_size was added
-btree_by_id_join(Id, {HighSeq, Deleted, DiskTree}) ->
-    btree_by_id_join(Id, {HighSeq, Deleted, #size_info{}, DiskTree});
-
-btree_by_id_join(Id, {HighSeq, Deleted, Sizes, DiskTree}) ->
-    #full_doc_info{
-        id = Id,
-        update_seq = HighSeq,
-        deleted = ?i2b(Deleted),
-        sizes = upgrade_sizes(Sizes),
-        rev_tree = rev_tree(DiskTree)
-    }.
-
-btree_by_id_reduce(reduce, FullDocInfos) ->
-    lists:foldl(
-        fun(Info, {NotDeleted, Deleted, Sizes}) ->
-            Sizes2 = reduce_sizes(Sizes, Info#full_doc_info.sizes),
-            case Info#full_doc_info.deleted of
-            true ->
-                {NotDeleted, Deleted + 1, Sizes2};
-            false ->
-                {NotDeleted + 1, Deleted, Sizes2}
-            end
-        end,
-        {0, 0, #size_info{}}, FullDocInfos);
-btree_by_id_reduce(rereduce, Reds) ->
-    lists:foldl(
-        fun({NotDeleted, Deleted}, {AccNotDeleted, AccDeleted, _AccSizes}) ->
-            % pre 1.2 format, will be upgraded on compaction
-            {AccNotDeleted + NotDeleted, AccDeleted + Deleted, nil};
-        ({NotDeleted, Deleted, Sizes}, {AccNotDeleted, AccDeleted, AccSizes}) ->
-            AccSizes2 = reduce_sizes(AccSizes, Sizes),
-            {AccNotDeleted + NotDeleted, AccDeleted + Deleted, AccSizes2}
-        end,
-        {0, 0, #size_info{}}, Reds).
-
-reduce_sizes(nil, _) ->
-    nil;
-reduce_sizes(_, nil) ->
-    nil;
-reduce_sizes(#size_info{}=S1, #size_info{}=S2) ->
-    #size_info{
-        active = S1#size_info.active + S2#size_info.active,
-        external = S1#size_info.external + S2#size_info.external
-    };
-reduce_sizes(S1, S2) ->
-    reduce_sizes(upgrade_sizes(S1), upgrade_sizes(S2)).
-
-btree_by_seq_reduce(reduce, DocInfos) ->
-    % count the number of documents
-    length(DocInfos);
-btree_by_seq_reduce(rereduce, Reds) ->
-    lists:sum(Reds).
-
-init_db(DbName, Filepath, Fd, Header0, Options) ->
-    Header = couch_db_header:upgrade(Header0),
-
-    {ok, FsyncOptions} = couch_util:parse_term(
-            config:get("couchdb", "fsync_options",
-                    "[before_header, after_header, on_file_open]")),
-
-    case lists:member(on_file_open, FsyncOptions) of
-    true -> ok = couch_file:sync(Fd);
-    _ -> ok
-    end,
-
-    Compression = couch_compress:get_compression_method(),
-
-    IdTreeState = couch_db_header:id_tree_state(Header),
-    SeqTreeState = couch_db_header:seq_tree_state(Header),
-    LocalTreeState = couch_db_header:local_tree_state(Header),
-    {ok, IdBtree} = couch_btree:open(IdTreeState, Fd,
-        [{split, fun ?MODULE:btree_by_id_split/1},
-        {join, fun ?MODULE:btree_by_id_join/2},
-        {reduce, fun ?MODULE:btree_by_id_reduce/2},
-        {compression, Compression}]),
-    {ok, SeqBtree} = couch_btree:open(SeqTreeState, Fd,
-            [{split, fun ?MODULE:btree_by_seq_split/1},
-            {join, fun ?MODULE:btree_by_seq_join/2},
-            {reduce, fun ?MODULE:btree_by_seq_reduce/2},
-            {compression, Compression}]),
-    {ok, LocalDocsBtree} = couch_btree:open(LocalTreeState, Fd,
-        [{compression, Compression}]),
-    case couch_db_header:security_ptr(Header) of
-    nil ->
-        Security = default_security_object(DbName),
-        SecurityPtr = nil;
-    SecurityPtr ->
-        {ok, Security} = couch_file:pread_term(Fd, SecurityPtr)
-    end,
+init_db(DbName, FilePath, EngineState, Options) ->
     % convert start time tuple to microsecs and store as a binary string
     {MegaSecs, Secs, MicroSecs} = os:timestamp(),
     StartTime = ?l2b(io_lib:format("~p",
             [(MegaSecs*1000000*1000000) + (Secs*1000000) + MicroSecs])),
-    ok = couch_file:set_db_pid(Fd, self()),
-    Db = #db{
-        fd=Fd,
-        fd_monitor = erlang:monitor(process, Fd),
-        header=Header,
-        id_tree = IdBtree,
-        seq_tree = SeqBtree,
-        local_tree = LocalDocsBtree,
-        committed_update_seq = couch_db_header:update_seq(Header),
-        update_seq = couch_db_header:update_seq(Header),
+
+    BDU = couch_util:get_value(before_doc_update, Options, nil),
+    ADR = couch_util:get_value(after_doc_read, Options, nil),
+
+    CleanedOptions = lists:foldl(fun
+        (create, Acc) -> Acc;
+        (Else, Acc) -> [Else | Acc]
+    end, [], Options),
+
+    InitDb = #db{
         name = DbName,
-        filepath = Filepath,
-        security = Security,
-        security_ptr = SecurityPtr,
+        filepath = FilePath,
+        engine = EngineState,
         instance_start_time = StartTime,
-        revs_limit = couch_db_header:revs_limit(Header),
-        fsync_options = FsyncOptions,
-        options = Options,
-        compression = Compression,
-        before_doc_update = couch_util:get_value(before_doc_update, Options, nil),
-        after_doc_read = couch_util:get_value(after_doc_read, Options, nil)
+        options = CleanedOptions,
+        before_doc_update = BDU,
+        after_doc_read = ADR
     },
 
-    % If we just created a new UUID while upgrading a
-    % database then we want to flush that to disk or
-    % we risk sending out the uuid and having the db
-    % crash which would result in it generating a new
-    % uuid each time it was reopened.
-    case Header /= Header0 of
-        true ->
-            sync_header(Db, Header);
-        false ->
-            Db
-    end.
-
-
-close_db(#db{fd_monitor = Ref}) ->
-    erlang:demonitor(Ref).
+    InitDb#db{
+        committed_update_seq = couch_db_engine:get(InitDb, update_seq),
+        security = couch_db_engine:get(InitDb, security)
+    }.
 
 
 refresh_validate_doc_funs(#db{name = <<"shards/", _/binary>> = Name} = Db) ->
@@ -640,50 +365,36 @@ refresh_validate_doc_funs(Db0) ->
 
 flush_trees(_Db, [], AccFlushedTrees) ->
     {ok, lists:reverse(AccFlushedTrees)};
-flush_trees(#db{fd = Fd} = Db,
+flush_trees(#db{} = Db,
         [InfoUnflushed | RestUnflushed], AccFlushed) ->
     #full_doc_info{update_seq=UpdateSeq, rev_tree=Unflushed} = InfoUnflushed,
     {Flushed, FinalAcc} = couch_key_tree:mapfold(
         fun(_Rev, Value, Type, SizesAcc) ->
             case Value of
-            #doc{deleted = IsDeleted, body = {summary, _, _, _} = DocSummary} ->
-                {summary, Summary, AttSizeInfo, AttsFd} = DocSummary,
-                % this node value is actually an unwritten document summary,
-                % write to disk.
-                % make sure the Fd in the written bins is the same Fd we are
-                % and convert bins, removing the FD.
-                % All bins should have been written to disk already.
-                case {AttsFd, Fd} of
-                {nil, _} ->
-                    ok;
-                {SameFd, SameFd} ->
-                    ok;
-                _ ->
-                    % Fd where the attachments were written to is not the same
-                    % as our Fd. This can happen when a database is being
-                    % switched out during a compaction.
-                    couch_log:debug("File where the attachments are written has"
-                                    " changed. Possibly retrying.", []),
-                    throw(retry)
-                end,
-                ExternalSize = ?term_size(Summary),
-                {ok, NewSummaryPointer, SummarySize} =
-                    couch_file:append_raw_chunk(Fd, Summary),
-                Leaf = #leaf{
-                    deleted = IsDeleted,
-                    ptr = NewSummaryPointer,
-                    seq = UpdateSeq,
-                    sizes = #size_info{
-                        active = SummarySize,
-                        external = ExternalSize
+                % This node is a document summary that needs to be
+                % flushed to disk.
+                #doc{} = Doc ->
+                    check_doc_atts(Db, Doc),
+                    ExternalSize = ?term_size(Doc#doc.body),
+                    {size_info, AttSizeInfo} =
+                            lists:keyfind(size_info, 1, Doc#doc.meta),
+                    {ok, NewDoc, WrittenSize} =
+                            couch_db_engine:write_doc_body(Db, Doc),
+                    Leaf = #leaf{
+                        deleted = Doc#doc.deleted,
+                        ptr = NewDoc#doc.body,
+                        seq = UpdateSeq,
+                        sizes = #size_info{
+                            active = WrittenSize,
+                            external = ExternalSize
+                        },
+                        atts = AttSizeInfo
                     },
-                    atts = AttSizeInfo
-                },
-                {Leaf, add_sizes(Type, Leaf, SizesAcc)};
-            #leaf{} ->
-                {Value, add_sizes(Type, Value, SizesAcc)};
-            _ ->
-                {Value, SizesAcc}
+                    {Leaf, add_sizes(Type, Leaf, SizesAcc)};
+                #leaf{} ->
+                    {Value, add_sizes(Type, Value, SizesAcc)};
+                _ ->
+                    {Value, SizesAcc}
             end
         end, {0, 0, []}, Unflushed),
     {FinalAS, FinalES, FinalAtts} = FinalAcc,
@@ -697,6 +408,29 @@ flush_trees(#db{fd = Fd} = Db,
     },
     flush_trees(Db, RestUnflushed, [NewInfo | AccFlushed]).
 
+
+check_doc_atts(Db, Doc) ->
+    {atts_stream, Stream} = lists:keyfind(atts_stream, 1, Doc#doc.meta),
+    % Make sure that the attachments were written to the currently
+    % active attachment stream. If compaction swaps during a write
+    % request we may have to rewrite our attachment bodies.
+    if Stream == nil -> ok; true ->
+        case couch_db:is_active_stream(Db, Stream) of
+            true ->
+                ok;
+            false ->
+                % Stream where the attachments were written to is
+                % no longer the current attachment stream. This
+                % can happen when a database is switched at
+                % compaction time.
+                couch_log:debug("Stream where the attachments were"
+                                " written has changed."
+                                " Possibly retrying.", []),
+                throw(retry)
+        end
+    end.
+
+
 add_sizes(Type, #leaf{sizes=Sizes, atts=AttSizes}, Acc) ->
     % Maybe upgrade from disk_size only
     #size_info{
@@ -709,6 +443,15 @@ add_sizes(Type, #leaf{sizes=Sizes, atts=AttSizes}, Acc) ->
     NewAttsAcc = lists:umerge(AttSizes, AttsAcc),
     {NewASAcc, NewESAcc, NewAttsAcc}.
 
+
+upgrade_sizes(#size_info{}=SI) ->
+    SI;
+upgrade_sizes({D, E}) ->
+    #size_info{active=D, external=E};
+upgrade_sizes(S) when is_integer(S) ->
+    #size_info{active=S, external=0}.
+
+
 send_result(Client, Doc, NewResult) ->
     % used to send a result to the client
     catch(Client ! {result, self(), {doc_tag(Doc), NewResult}}).
@@ -835,58 +578,40 @@ merge_rev_tree(OldInfo, NewDoc, _Client, Limit, true) ->
     {NewTree, _} = couch_key_tree:merge(OldTree, NewTree0, Limit),
     OldInfo#full_doc_info{rev_tree = NewTree}.
 
-stem_full_doc_infos(#db{revs_limit=Limit}, DocInfos) ->
-    [Info#full_doc_info{rev_tree=couch_key_tree:stem(Tree, Limit)} ||
-            #full_doc_info{rev_tree=Tree}=Info <- DocInfos].
+update_docs_int(Db, DocsList, LocalDocs, MergeConflicts, FullCommit) ->
+    UpdateSeq = couch_db_engine:get(Db, update_seq),
+    RevsLimit = couch_db_engine:get(Db, revs_limit),
 
-update_docs_int(Db, DocsList, NonRepDocs, MergeConflicts, FullCommit) ->
-    #db{
-        id_tree = DocInfoByIdBTree,
-        seq_tree = DocInfoBySeqBTree,
-        update_seq = LastSeq,
-        revs_limit = RevsLimit
-        } = Db,
     Ids = [Id || [{_Client, #doc{id=Id}}|_] <- DocsList],
     % lookup up the old documents, if they exist.
-    OldDocLookups = couch_btree:lookup(DocInfoByIdBTree, Ids),
-    OldDocInfos = lists:zipwith(
-        fun(_Id, {ok, FullDocInfo}) ->
-            FullDocInfo;
+    OldDocLookups = couch_db_engine:open_docs(Db, Ids),
+    OldDocInfos = lists:zipwith(fun
+        (_Id, #full_doc_info{} = FDI) ->
+            FDI;
         (Id, not_found) ->
             #full_doc_info{id=Id}
-        end,
-        Ids, OldDocLookups),
+    end, Ids, OldDocLookups),
     % Merge the new docs into the revision trees.
-    {ok, NewFullDocInfos, RemoveSeqs, NewSeq} = merge_rev_trees(RevsLimit,
-            MergeConflicts, DocsList, OldDocInfos, [], [], LastSeq),
-
-    % All documents are now ready to write.
-
-    {ok, Db2}  = update_local_docs(Db, NonRepDocs),
+    {ok, NewFullDocInfos, RemSeqs, _} = merge_rev_trees(RevsLimit,
+            MergeConflicts, DocsList, OldDocInfos, [], [], UpdateSeq),
 
     % Write out the document summaries (the bodies are stored in the nodes of
     % the trees, the attachments are already written to disk)
-    {ok, IndexFullDocInfos} = flush_trees(Db2, NewFullDocInfos, []),
-
-    % and the indexes
-    {ok, DocInfoByIdBTree2} = couch_btree:add_remove(DocInfoByIdBTree, IndexFullDocInfos, []),
-    {ok, DocInfoBySeqBTree2} = couch_btree:add_remove(DocInfoBySeqBTree, IndexFullDocInfos, RemoveSeqs),
+    {ok, IndexFDIs} = flush_trees(Db, NewFullDocInfos, []),
+    Pairs = pair_write_info(OldDocLookups, IndexFDIs),
+    LocalDocs2 = update_local_doc_revs(LocalDocs),
 
+    {ok, Db1} = couch_db_engine:write_doc_infos(Db, Pairs, LocalDocs2, []),
 
-    WriteCount = length(IndexFullDocInfos),
+    WriteCount = length(IndexFDIs),
     couch_stats:increment_counter([couchdb, document_inserts],
-         WriteCount - length(RemoveSeqs)),
+         WriteCount - length(RemSeqs)),
     couch_stats:increment_counter([couchdb, document_writes], WriteCount),
     couch_stats:increment_counter(
         [couchdb, local_document_writes],
-        length(NonRepDocs)
+        length(LocalDocs2)
     ),
 
-    Db3 = Db2#db{
-        id_tree = DocInfoByIdBTree2,
-        seq_tree = DocInfoBySeqBTree2,
-        update_seq = NewSeq},
-
     % Check if we just updated any design documents, and update the validation
     % funs if we did.
     UpdatedDDocIds = lists:flatmap(fun
@@ -894,549 +619,101 @@ update_docs_int(Db, DocsList, NonRepDocs, MergeConflicts, FullCommit) ->
         (_) -> []
     end, Ids),
 
-    Db4 = case length(UpdatedDDocIds) > 0 of
+    Db2 = case length(UpdatedDDocIds) > 0 of
         true ->
-            couch_event:notify(Db3#db.name, ddoc_updated),
-            ddoc_cache:evict(Db3#db.name, UpdatedDDocIds),
-            refresh_validate_doc_funs(Db3);
+            couch_event:notify(Db1#db.name, ddoc_updated),
+            ddoc_cache:evict(Db1#db.name, UpdatedDDocIds),
+            refresh_validate_doc_funs(Db1);
         false ->
-            Db3
+            Db1
     end,
 
-    {ok, commit_data(Db4, not FullCommit), UpdatedDDocIds}.
-
-update_local_docs(Db, []) ->
-    {ok, Db};
-update_local_docs(#db{local_tree=Btree}=Db, Docs) ->
-    BtreeEntries = lists:map(
-        fun({Client, NewDoc}) ->
-            #doc{
-                id = Id,
-                deleted = Delete,
-                revs = {0, PrevRevs},
-                body = Body
-            } = NewDoc,
-            case PrevRevs of
-            [RevStr|_] ->
+    {ok, commit_data(Db2, not FullCommit), UpdatedDDocIds}.
+
+
+update_local_doc_revs(Docs) ->
+    lists:map(fun({Client, NewDoc}) ->
+        #doc{
+            deleted = Delete,
+            revs = {0, PrevRevs}
+        } = NewDoc,
+        case PrevRevs of
+            [RevStr | _] ->
                 PrevRev = list_to_integer(?b2l(RevStr));
             [] ->
                 PrevRev = 0
-            end,
-            case Delete of
-                false ->
-                    send_result(Client, NewDoc, {ok,
-                        {0, ?l2b(integer_to_list(PrevRev + 1))}}),
-                    {update, {Id, {PrevRev + 1, Body}}};
-                true  ->
-                    send_result(Client, NewDoc,
-                        {ok, {0, <<"0">>}}),
-                    {remove, Id}
-            end
-        end, Docs),
-
-    BtreeIdsRemove = [Id || {remove, Id} <- BtreeEntries],
-    BtreeIdsUpdate = [{Key, Val} || {update, {Key, Val}} <- BtreeEntries],
-
-    {ok, Btree2} =
-        couch_btree:add_remove(Btree, BtreeIdsUpdate, BtreeIdsRemove),
-
-    {ok, Db#db{local_tree = Btree2}}.
+        end,
+        NewRev = case Delete of
+            false ->
+                ?l2b(integer_to_list(PrevRev + 1));
+            true  ->
+                <<"0">>
+        end,
+        send_result(Client, NewDoc, {ok, {0, NewRev}}),
+        NewDoc#doc{
+            revs = {0, [NewRev]}
+        }
+    end, Docs).
 
-db_to_header(Db, Header) ->
-    couch_db_header:set(Header, [
-        {update_seq, Db#db.update_seq},
-        {seq_tree_state, couch_btree:get_state(Db#db.seq_tree)},
-        {id_tree_state, couch_btree:get_state(Db#db.id_tree)},
-        {local_tree_state, couch_btree:get_state(Db#db.local_tree)},
-        {security_ptr, Db#db.security_ptr},
-        {revs_limit, Db#db.revs_limit}
-    ]).
 
 commit_data(Db) ->
     commit_data(Db, false).
 
-commit_data(#db{waiting_delayed_commit=nil} = Db, true) ->
-    TRef = erlang:send_after(1000,self(),delayed_commit),
-    Db#db{waiting_delayed_commit=TRef};
+commit_data(#db{waiting_delayed_commit = nil} = Db, true) ->
+    TRef = erlang:send_after(1000, self(), delayed_commit),
+    Db#db{waiting_delayed_commit = TRef};
 commit_data(Db, true) ->
     Db;
 commit_data(Db, _) ->
     #db{
-        header = OldHeader,
-        waiting_delayed_commit = Timer
-    } = Db,
-    if is_reference(Timer) -> erlang:cancel_timer(Timer); true -> ok end,
-    case db_to_header(Db, OldHeader) of
-        OldHeader -> Db#db{waiting_delayed_commit=nil};
-        NewHeader -> sync_header(Db, NewHeader)
-    end.
-
-sync_header(Db, NewHeader) ->
-    #db{
-        fd = Fd,
-        filepath = FilePath,
-        fsync_options = FsyncOptions,
         waiting_delayed_commit = Timer
     } = Db,
-
     if is_reference(Timer) -> erlang:cancel_timer(Timer); true -> ok end,
-
-    Before = lists:member(before_header, FsyncOptions),
-    After = lists:member(after_header, FsyncOptions),
-
-    if Before -> couch_file:sync(FilePath); true -> ok end,
-    ok = couch_file:write_header(Fd, NewHeader),
-    if After -> couch_file:sync(FilePath); true -> ok end,
-
-    Db#db{
-        header=NewHeader,
-        committed_update_seq=Db#db.update_seq,
-        waiting_delayed_commit=nil
+    {ok, Db1} = couch_db_engine:commit_data(Db),
+    Db1#db{
+        waiting_delayed_commit = nil,
+        committed_update_seq = couch_db_engine:get(Db, update_seq)
     }.
 
-copy_doc_attachments(#db{fd = SrcFd} = SrcDb, SrcSp, DestFd) ->
-    {ok, {BodyData, BinInfos0}} = couch_db:read_doc(SrcDb, SrcSp),
-    BinInfos = case BinInfos0 of
-    _ when is_binary(BinInfos0) ->
-        couch_compress:decompress(BinInfos0);
-    _ when is_list(BinInfos0) ->
-        % pre 1.2 file format
-        BinInfos0
-    end,
-    % copy the bin values
-    NewBinInfos = lists:map(
-        fun({Name, Type, BinSp, AttLen, RevPos, ExpectedMd5}) ->
-            % 010 UPGRADE CODE
-            {NewBinSp, AttLen, AttLen, ActualMd5, _IdentityMd5} =
-                couch_stream:copy_to_new_stream(SrcFd, BinSp, DestFd),
-            check_md5(ExpectedMd5, ActualMd5),
-            {Name, Type, NewBinSp, AttLen, AttLen, RevPos, ExpectedMd5, identity};
-        ({Name, Type, BinSp, AttLen, DiskLen, RevPos, ExpectedMd5, Enc1}) ->
-            {NewBinSp, AttLen, _, ActualMd5, _IdentityMd5} =
-                couch_stream:copy_to_new_stream(SrcFd, BinSp, DestFd),
-            check_md5(ExpectedMd5, ActualMd5),
-            Enc = case Enc1 of
-            true ->
-                % 0110 UPGRADE CODE
-                gzip;
-            false ->
-                % 0110 UPGRADE CODE
-                identity;
-            _ ->
-                Enc1
-            end,
-            {Name, Type, NewBinSp, AttLen, DiskLen, RevPos, ExpectedMd5, Enc}
-        end, BinInfos),
-    {BodyData, NewBinInfos}.
-
-merge_lookups(Infos, []) ->
-    Infos;
-merge_lookups([], _) ->
-    [];
-merge_lookups([#doc_info{}=DI | RestInfos], [{ok, FDI} | RestLookups]) ->
-    % Assert we've matched our lookups
-    if DI#doc_info.id == FDI#full_doc_info.id -> ok; true ->
-        erlang:error({mismatched_doc_infos, DI#doc_info.id})
-    end,
-    [FDI | merge_lookups(RestInfos, RestLookups)];
-merge_lookups([FDI | RestInfos], Lookups) ->
-    [FDI | merge_lookups(RestInfos, Lookups)].
-
-check_md5(Md5, Md5) -> ok;
-check_md5(_, _) -> throw(md5_mismatch).
-
-copy_docs(Db, #db{fd = DestFd} = NewDb, MixedInfos, Retry) ->
-    DocInfoIds = [Id || #doc_info{id=Id} <- MixedInfos],
-    LookupResults = couch_btree:lookup(Db#db.id_tree, DocInfoIds),
-    % COUCHDB-968, make sure we prune duplicates during compaction
-    NewInfos0 = lists:usort(fun(#full_doc_info{id=A}, #full_doc_info{id=B}) ->
-        A =< B
-    end, merge_lookups(MixedInfos, LookupResults)),
-
-    NewInfos1 = lists:map(fun(Info) ->
-        {NewRevTree, FinalAcc} = couch_key_tree:mapfold(fun
-            (_Rev, #leaf{ptr=Sp}=Leaf, leaf, SizesAcc) ->
-                {Body, AttInfos} = copy_doc_attachments(Db, Sp, DestFd),
-                SummaryChunk = make_doc_summary(NewDb, {Body, AttInfos}),
-                ExternalSize = ?term_size(SummaryChunk),
-                {ok, Pos, SummarySize} = couch_file:append_raw_chunk(
-                    DestFd, SummaryChunk),
-                AttSizes = [{element(3,A), element(4,A)} || A <- AttInfos],
-                NewLeaf = Leaf#leaf{
-                    ptr = Pos,
-                    sizes = #size_info{
-                        active = SummarySize,
-                        external = ExternalSize
-                    },
-                    atts = AttSizes
-                },
-                {NewLeaf, add_sizes(leaf, NewLeaf, SizesAcc)};
-            (_Rev, _Leaf, branch, SizesAcc) ->
-                {?REV_MISSING, SizesAcc}
-        end, {0, 0, []}, Info#full_doc_info.rev_tree),
-        {FinalAS, FinalES, FinalAtts} = FinalAcc,
-        TotalAttSize = lists:foldl(fun({_, S}, A) -> S + A end, 0, FinalAtts),
-        NewActiveSize = FinalAS + TotalAttSize,
-        NewExternalSize = FinalES + TotalAttSize,
-        Info#full_doc_info{
-            rev_tree = NewRevTree,
-            sizes = #size_info{
-                active = NewActiveSize,
-                external = NewExternalSize
-            }
-        }
-    end, NewInfos0),
-
-    NewInfos = stem_full_doc_infos(Db, NewInfos1),
-    RemoveSeqs =
-    case Retry of
-    nil ->
-        [];
-    OldDocIdTree ->
-        % Compaction is being rerun to catch up to writes during the
-        % first pass. This means we may have docs that already exist
-        % in the seq_tree in the .data file. Here we lookup any old
-        % update_seqs so that they can be removed.
-        Ids = [Id || #full_doc_info{id=Id} <- NewInfos],
-        Existing = couch_btree:lookup(OldDocIdTree, Ids),
-        [Seq || {ok, #full_doc_info{update_seq=Seq}} <- Existing]
-    end,
-
-    {ok, SeqTree} = couch_btree:add_remove(
-            NewDb#db.seq_tree, NewInfos, RemoveSeqs),
-
-    FDIKVs = lists:map(fun(#full_doc_info{id=Id, update_seq=Seq}=FDI) ->
-        {{Id, Seq}, FDI}
-    end, NewInfos),
-    {ok, IdEms} = couch_emsort:add(NewDb#db.id_tree, FDIKVs),
-    update_compact_task(length(NewInfos)),
-    NewDb#db{id_tree=IdEms, seq_tree=SeqTree}.
-
-
-copy_compact(Db, NewDb0, Retry) ->
-    Compression = couch_compress:get_compression_method(),
-    NewDb = NewDb0#db{compression=Compression},
-    TotalChanges = couch_db:count_changes_since(Db, NewDb#db.update_seq),
-    BufferSize = list_to_integer(
-        config:get("database_compaction", "doc_buffer_size", "524288")),
-    CheckpointAfter = couch_util:to_integer(
-        config:get("database_compaction", "checkpoint_after",
-            BufferSize * 10)),
-
-    EnumBySeqFun =
-    fun(DocInfo, _Offset,
-            {AccNewDb, AccUncopied, AccUncopiedSize, AccCopiedSize}) ->
-
-        Seq = case DocInfo of
-            #full_doc_info{} -> DocInfo#full_doc_info.update_seq;
-            #doc_info{} -> DocInfo#doc_info.high_seq
-        end,
 
-        AccUncopiedSize2 = AccUncopiedSize + ?term_size(DocInfo),
-        if AccUncopiedSize2 >= BufferSize ->
-            NewDb2 = copy_docs(
-                Db, AccNewDb, lists:reverse([DocInfo | AccUncopied]), Retry),
-            AccCopiedSize2 = AccCopiedSize + AccUncopiedSize2,
-            if AccCopiedSize2 >= CheckpointAfter ->
-                CommNewDb2 = commit_compaction_data(NewDb2#db{update_seq=Seq}),
-                {ok, {CommNewDb2, [], 0, 0}};
-            true ->
-                {ok, {NewDb2#db{update_seq = Seq}, [], 0, AccCopiedSize2}}
-            end;
+maybe_track_db(#db{options = Options}) ->
+    case lists:member(sys_db, Options) of
         true ->
-            {ok, {AccNewDb, [DocInfo | AccUncopied], AccUncopiedSize2,
-                AccCopiedSize}}
-        end
-    end,
-
-    TaskProps0 = [
-        {type, database_compaction},
-        {database, Db#db.name},
-        {progress, 0},
-        {changes_done, 0},
-        {total_changes, TotalChanges}
-    ],
-    case (Retry =/= nil) and couch_task_status:is_task_added() of
-    true ->
-        couch_task_status:update([
-            {retry, true},
-            {progress, 0},
-            {changes_done, 0},
-            {total_changes, TotalChanges}
-        ]);
-    false ->
-        couch_task_status:add_task(TaskProps0),
-        couch_task_status:set_update_frequency(500)
-    end,
-
-    {ok, _, {NewDb2, Uncopied, _, _}} =
-        couch_btree:foldl(Db#db.seq_tree, EnumBySeqFun,
-            {NewDb, [], 0, 0},
-            [{start_key, NewDb#db.update_seq + 1}]),
-
-    NewDb3 = copy_docs(Db, NewDb2, lists:reverse(Uncopied), Retry),
-
-    % copy misc header values
-    if NewDb3#db.security /= Db#db.security ->
-        {ok, Ptr, _} = couch_file:append_term(
-            NewDb3#db.fd, Db#db.security,
-            [{compression, NewDb3#db.compression}]),
-        NewDb4 = NewDb3#db{security=Db#db.security, security_ptr=Ptr};
-    true ->
-        NewDb4 = NewDb3
-    end,
-
-    commit_compaction_data(NewDb4#db{update_seq=Db#db.update_seq}).
-
-
-start_copy_compact(#db{}=Db) ->
-    erlang:put(io_priority, {db_compact, Db#db.name}),
-    #db{name=Name, filepath=Filepath, options=Options, header=Header} = Db,
-    couch_log:debug("Compaction process spawned for db \"~s\"", [Name]),
-
-    {ok, NewDb, DName, DFd, MFd, Retry} =
-        open_compaction_files(Name, Header, Filepath, Options),
-    erlang:monitor(process, MFd),
-
-    % This is a bit worrisome. init_db/4 will monitor the data fd
-    % but it doesn't know about the meta fd. For now I'll maintain
-    % that the data fd is the old normal fd and meta fd is special
-    % and hope everything works out for the best.
-    unlink(DFd),
-
-    NewDb1 = copy_purge_info(Db, NewDb),
-    NewDb2 = copy_compact(Db, NewDb1, Retry),
-    NewDb3 = sort_meta_data(NewDb2),
-    NewDb4 = commit_compaction_data(NewDb3),
-    NewDb5 = copy_meta_data(NewDb4),
-    NewDb6 = sync_header(NewDb5, db_to_header(NewDb5, NewDb5#db.header)),
-    close_db(NewDb6),
-
-    ok = couch_file:close(MFd),
-    gen_server:cast(Db#db.main_pid, {compact_done, DName}).
-
-
-open_compaction_files(DbName, SrcHdr, DbFilePath, Options) ->
-    DataFile = DbFilePath ++ ".compact.data",
-    MetaFile = DbFilePath ++ ".compact.meta",
-    {ok, DataFd, DataHdr} = open_compaction_file(DataFile),
-    {ok, MetaFd, MetaHdr} = open_compaction_file(MetaFile),
-    DataHdrIsDbHdr = couch_db_header:is_header(DataHdr),
-    case {DataHdr, MetaHdr} of
-        {#comp_header{}=A, #comp_header{}=A} ->
-            DbHeader = A#comp_header.db_header,
-            Db0 = init_db(DbName, DataFile, DataFd, DbHeader, Options),
-            Db1 = bind_emsort(Db0, MetaFd, A#comp_header.meta_state),
-            {ok, Db1, DataFile, DataFd, MetaFd, Db0#db.id_tree};
-        _ when DataHdrIsDbHdr ->
-            ok = reset_compaction_file(MetaFd, couch_db_header:from(SrcHdr)),
-            Db0 = init_db(DbName, DataFile, DataFd, DataHdr, Options),
-            Db1 = bind_emsort(Db0, MetaFd, nil),
-            {ok, Db1, DataFile, DataFd, MetaFd, Db0#db.id_tree};
-        _ ->
-            Header = couch_db_header:from(SrcHdr),
-            ok = reset_compaction_file(DataFd, Header),
-            ok = reset_compaction_file(MetaFd, Header),
-            Db0 = init_db(DbName, DataFile, DataFd, Header, Options),
-            Db1 = bind_emsort(Db0, MetaFd, nil),
-            {ok, Db1, DataFile, DataFd, MetaFd, nil}
+            ok;
+        false ->
+            couch_stats_process_tracker:track([couchdb, open_databases])
     end.
 
 
-open_compaction_file(FilePath) ->
-    case couch_file:open(FilePath, [nologifmissing]) of
-        {ok, Fd} ->
-            case couch_file:read_header(Fd) of
-                {ok, Header} -> {ok, Fd, Header};
-                no_valid_header -> {ok, Fd, nil}
-            end;
-        {error, enoent} ->
-            {ok, Fd} = couch_file:open(FilePath, [create]),
-            {ok, Fd, nil}
-    end.
+get_update_seq(Db) ->
+    couch_db_engine:get(Db, update_seq).
 
 
-reset_compaction_file(Fd, Header) ->
-    ok = couch_file:truncate(Fd, 0),
-    ok = couch_file:write_header(Fd, Header).
-
-
-copy_purge_info(OldDb, NewDb) ->
-    OldHdr = OldDb#db.header,
-    NewHdr = NewDb#db.header,
-    OldPurgeSeq = couch_db_header:purge_seq(OldHdr),
-    if OldPurgeSeq > 0 ->
-        {ok, PurgedIdsRevs} = couch_db:get_last_purged(OldDb),
-        Opts = [{compression, NewDb#db.compression}],
-        {ok, Ptr, _} = couch_file:append_term(NewDb#db.fd, PurgedIdsRevs, Opts),
-        NewNewHdr = couch_db_header:set(NewHdr, [
-            {purge_seq, OldPurgeSeq},
-            {purged_docs, Ptr}
-        ]),
-        NewDb#db{header = NewNewHdr};
-    true ->
-        NewDb
-    end.
+finish_engine_swap(_OldDb, _NewEngine, _CompactFilePath) ->
+    erlang:error(explode).
 
 
-commit_compaction_data(#db{}=Db) ->
-    % Compaction needs to write headers to both the data file
-    % and the meta file so if we need to restart we can pick
-    % back up from where we left off.
-    commit_compaction_data(Db, couch_emsort:get_fd(Db#db.id_tree)),
-    commit_compaction_data(Db, Db#db.fd).
-
-
-commit_compaction_data(#db{header=OldHeader}=Db0, Fd) ->
-    % Mostly copied from commit_data/2 but I have to
-    % replace the logic to commit and fsync to a specific
-    % fd instead of the Filepath stuff that commit_data/2
-    % does.
-    DataState = couch_db_header:id_tree_state(OldHeader),
-    MetaFd = couch_emsort:get_fd(Db0#db.id_tree),
-    MetaState = couch_emsort:get_state(Db0#db.id_tree),
-    Db1 = bind_id_tree(Db0, Db0#db.fd, DataState),
-    Header = db_to_header(Db1, OldHeader),
-    CompHeader = #comp_header{
-        db_header = Header,
-        meta_state = MetaState
-    },
-    ok = couch_file:sync(Fd),
-    ok = couch_file:write_header(Fd, CompHeader),
-    Db2 = Db1#db{
-        waiting_delayed_commit=nil,
-        header=Header,
-        committed_update_seq=Db1#db.update_seq
-    },
-    bind_emsort(Db2, MetaFd, MetaState).
-
-
-bind_emsort(Db, Fd, nil) ->
-    {ok, Ems} = couch_emsort:open(Fd),
-    Db#db{id_tree=Ems};
-bind_emsort(Db, Fd, State) ->
-    {ok, Ems} = couch_emsort:open(Fd, [{root, State}]),
-    Db#db{id_tree=Ems}.
-
-
-bind_id_tree(Db, Fd, State) ->
-    {ok, IdBtree} = couch_btree:open(State, Fd, [
-        {split, fun ?MODULE:btree_by_id_split/1},
-        {join, fun ?MODULE:btree_by_id_join/2},
-        {reduce, fun ?MODULE:btree_by_id_reduce/2}
-    ]),
-    Db#db{id_tree=IdBtree}.
-
-
-sort_meta_data(Db0) ->
-    {ok, Ems} = couch_emsort:merge(Db0#db.id_tree),
-    Db0#db{id_tree=Ems}.
-
-
-copy_meta_data(#db{fd=Fd, header=Header}=Db) ->
-    Src = Db#db.id_tree,
-    DstState = couch_db_header:id_tree_state(Header),
-    {ok, IdTree0} = couch_btree:open(DstState, Fd, [
-        {split, fun ?MODULE:btree_by_id_split/1},
-        {join, fun ?MODULE:btree_by_id_join/2},
-        {reduce, fun ?MODULE:btree_by_id_reduce/2}
-    ]),
-    {ok, Iter} = couch_emsort:iter(Src),
-    Acc0 = #merge_st{
-        id_tree=IdTree0,
-        seq_tree=Db#db.seq_tree,
-        rem_seqs=[],
-        infos=[]
-    },
-    Acc = merge_docids(Iter, Acc0),
-    {ok, IdTree} = couch_btree:add(Acc#merge_st.id_tree, Acc#merge_st.infos),
-    {ok, SeqTree} = couch_btree:add_remove(
-        Acc#merge_st.seq_tree, [], Acc#merge_st.rem_seqs
-    ),
-    Db#db{id_tree=IdTree, seq_tree=SeqTree}.
-
-
-merge_docids(Iter, #merge_st{infos=Infos}=Acc) when length(Infos) > 1000 ->
-    #merge_st{
-        id_tree=IdTree0,
-        seq_tree=SeqTree0,
-        rem_seqs=RemSeqs
-    } = Acc,
-    {ok, IdTree1} = couch_btree:add(IdTree0, Infos),
-    {ok, SeqTree1} = couch_btree:add_remove(SeqTree0, [], RemSeqs),
-    Acc1 = Acc#merge_st{
-        id_tree=IdTree1,
-        seq_tree=SeqTree1,
-        rem_seqs=[],
-        infos=[]
-    },
-    merge_docids(Iter, Acc1);
-merge_docids(Iter, #merge_st{curr=Curr}=Acc) ->
-    case next_info(Iter, Curr, []) of
-        {NextIter, NewCurr, FDI, Seqs} ->
-            Acc1 = Acc#merge_st{
-                infos = [FDI | Acc#merge_st.infos],
-                rem_seqs = Seqs ++ Acc#merge_st.rem_seqs,
-                curr = NewCurr
-            },
-            merge_docids(NextIter, Acc1);
-        {finished, FDI, Seqs} ->
-            Acc#merge_st{
-                infos = [FDI | Acc#merge_st.infos],
-                rem_seqs = Seqs ++ Acc#merge_st.rem_seqs,
-                curr = undefined
-            };
-        empty ->
-            Acc
-    end.
-
+make_doc_summary(Db, DocParts) ->
+    couch_db_engine:make_doc_summary(Db, DocParts).
 
-next_info(Iter, undefined, []) ->
-    case couch_emsort:next(Iter) of
-        {ok, {{Id, Seq}, FDI}, NextIter} ->
-            next_info(NextIter, {Id, Seq, FDI}, []);
-        finished ->
-            empty
-    end;
-next_info(Iter, {Id, Seq, FDI}, Seqs) ->
-    case couch_emsort:next(Iter) of
-        {ok, {{Id, NSeq}, NFDI}, NextIter} ->
-            next_info(NextIter, {Id, NSeq, NFDI}, [Seq | Seqs]);
-        {ok, {{NId, NSeq}, NFDI}, NextIter} ->
-            {NextIter, {NId, NSeq, NFDI}, FDI, Seqs};
-        finished ->
-            {finished, FDI, Seqs}
-    end.
 
+pair_write_info(Old, New) ->
+    lists:map(fun(FDI) ->
+        case lists:keyfind(FDI#full_doc_info.id, #full_doc_info.id, Old) of
+            #full_doc_info{} = OldFDI -> {OldFDI, FDI};
+            false -> {not_found, FDI}
+        end
+    end, New).
 
-update_compact_task(NumChanges) ->
-    [Changes, Total] = couch_task_status:get([changes_done, total_changes]),
-    Changes2 = Changes + NumChanges,
-    Progress = case Total of
-    0 ->
-        0;
-    _ ->
-        (Changes2 * 100) div Total
-    end,
-    couch_task_status:update([{changes_done, Changes2}, {progress, Progress}]).
 
+pair_purge_info(Old, New) ->
+    lists:map(fun(OldFDI) ->
+        case lists:keyfind(OldFDI#full_doc_info.id, #full_doc_info.id, New) of
+            #full_doc_info{} = NewFDI -> {OldFDI, NewFDI};
+            false -> {OldFDI, not_found}
+        end
+    end, Old).
 
-make_doc_summary(#db{compression = Comp}, {Body0, Atts0}) ->
-    Body = case couch_compress:is_compressed(Body0, Comp) of
-    true ->
-        Body0;
-    false ->
-        % pre 1.2 database file format
-        couch_compress:compress(Body0, Comp)
-    end,
-    Atts = case couch_compress:is_compressed(Atts0, Comp) of
-    true ->
-        Atts0;
-    false ->
-        couch_compress:compress(Atts0, Comp)
-    end,
-    SummaryBin = ?term_to_bin({Body, Atts}),
-    couch_file:assemble_file_chunk(SummaryBin, couch_crypto:hash(md5, SummaryBin)).
 
 default_security_object(<<"shards/", _/binary>>) ->
     case config:get("couchdb", "default_security", "everyone") of

http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/9c4b8432/src/couch_httpd_db.erl
----------------------------------------------------------------------
diff --git a/src/couch_httpd_db.erl b/src/couch_httpd_db.erl
index 965e8fb..f061d4d 100644
--- a/src/couch_httpd_db.erl
+++ b/src/couch_httpd_db.erl
@@ -216,7 +216,13 @@ handle_design_info_req(#httpd{
 
 create_db_req(#httpd{user_ctx=UserCtx}=Req, DbName) ->
     ok = couch_httpd:verify_is_server_admin(Req),
-    case couch_server:create(DbName, [{user_ctx, UserCtx}]) of
+    Engine = case couch_httpd:qs_value(Req, "engine") of
+        EngineStr when is_list(EngineStr) ->
+            [{engine, iolist_to_binary(EngineStr)}];
+        _ ->
+            []
+    end,
+    case couch_server:create(DbName, [{user_ctx, UserCtx}] ++ Engine) of
     {ok, Db} ->
         couch_db:close(Db),
         DbUrl = absolute_uri(Req, "/" ++ couch_util:url_encode(DbName)),

http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/9c4b8432/src/couch_httpd_misc_handlers.erl
----------------------------------------------------------------------
diff --git a/src/couch_httpd_misc_handlers.erl b/src/couch_httpd_misc_handlers.erl
index 0cbbdd8..9ed5448 100644
--- a/src/couch_httpd_misc_handlers.erl
+++ b/src/couch_httpd_misc_handlers.erl
@@ -17,8 +17,6 @@
     handle_uuids_req/1,handle_config_req/1,
     handle_task_status_req/1, handle_file_req/2]).
 
--export([increment_update_seq_req/2]).
-
 
 -include_lib("couch/include/couch_db.hrl").
 
@@ -308,14 +306,3 @@ handle_approved_config_req(#httpd{method='DELETE',path_parts=[_,Section,Key]}=Re
         send_json(Req, 200, list_to_binary(OldValue))
     end.
 
-
-% httpd db handlers
-
-increment_update_seq_req(#httpd{method='POST'}=Req, Db) ->
-    couch_httpd:validate_ctype(Req, "application/json"),
-    {ok, NewSeq} = couch_db:increment_update_seq(Db),
-    send_json(Req, {[{ok, true},
-        {update_seq, NewSeq}
-    ]});
-increment_update_seq_req(Req, _Db) ->
-    send_method_not_allowed(Req, "POST").

http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/9c4b8432/src/couch_lru.erl
----------------------------------------------------------------------
diff --git a/src/couch_lru.erl b/src/couch_lru.erl
index d58eb69..2e03055 100644
--- a/src/couch_lru.erl
+++ b/src/couch_lru.erl
@@ -14,6 +14,7 @@
 -export([new/0, insert/2, update/2, close/1]).
 
 -include_lib("couch/include/couch_db.hrl").
+-include("couch_server_int.hrl").
 
 new() ->
     {gb_trees:empty(), dict:new()}.
@@ -42,16 +43,17 @@ close({Tree, _} = Cache) ->
 close_int(none, _) ->
     erlang:error(all_dbs_active);
 close_int({Lru, DbName, Iter}, {Tree, Dict} = Cache) ->
-    case ets:update_element(couch_dbs, DbName, {#db.fd_monitor, locked}) of
+    case ets:update_element(couch_dbs, DbName, {#srv_entry.lock, locked}) of
     true ->
-        [#db{main_pid = Pid} = Db] = ets:lookup(couch_dbs, DbName),
+        [#srv_entry{db=Db, pid=Pid}] = ets:lookup(couch_dbs, DbName),
         case couch_db:is_idle(Db) of true ->
             true = ets:delete(couch_dbs, DbName),
             true = ets:delete(couch_dbs_pid_to_name, Pid),
             exit(Pid, kill),
             {gb_trees:delete(Lru, Tree), dict:erase(DbName, Dict)};
         false ->
-            true = ets:update_element(couch_dbs, DbName, {#db.fd_monitor, nil}),
+            true = ets:update_element(couch_dbs, DbName,
+                    {#srv_entry.lock, undefined}),
             couch_stats:increment_counter([couchdb, couch_server, lru_skip]),
             close_int(gb_trees:next(Iter), update(DbName, Cache))
         end;
@@ -59,4 +61,4 @@ close_int({Lru, DbName, Iter}, {Tree, Dict} = Cache) ->
         NewTree = gb_trees:delete(Lru, Tree),
         NewIter = gb_trees:iterator(NewTree),
         close_int(gb_trees:next(NewIter), {NewTree, dict:erase(DbName, Dict)})
-    end.
+    end.
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/9c4b8432/src/couch_server.erl
----------------------------------------------------------------------
diff --git a/src/couch_server.erl b/src/couch_server.erl
index 2634bbf..28c942d 100644
--- a/src/couch_server.erl
+++ b/src/couch_server.erl
@@ -21,16 +21,20 @@
 -export([handle_cast/2,code_change/3,handle_info/2,terminate/2]).
 -export([dev_start/0,is_admin/2,has_admins/0,get_stats/0]).
 -export([close_lru/0]).
+-export([delete_compaction_files/1]).
+-export([exists/1]).
 
 % config_listener api
 -export([handle_config_change/5, handle_config_terminate/3]).
 
 -include_lib("couch/include/couch_db.hrl").
+-include("couch_server_int.hrl").
 
 -define(MAX_DBS_OPEN, 100).
 
 -record(server,{
     root_dir = [],
+    engines = [],
     max_dbs_open=?MAX_DBS_OPEN,
     dbs_open=0,
     start_time="",
@@ -72,20 +76,24 @@ sup_start_link() ->
     gen_server:start_link({local, couch_server}, couch_server, [], []).
 
 
+open(DbName, Options0) when is_list(DbName) ->
+    open(iolist_to_binary(DbName), Options0);
 open(DbName, Options0) ->
     Options = maybe_add_sys_db_callbacks(DbName, Options0),
     Ctx = couch_util:get_value(user_ctx, Options, #user_ctx{}),
     case ets:lookup(couch_dbs, DbName) of
-    [#db{fd=Fd, fd_monitor=Lock} = Db] when Lock =/= locked ->
+    [#srv_entry{db=Db, lock=Lock}] when Lock =/= locked ->
         update_lru(DbName, Options),
-        {ok, Db#db{user_ctx=Ctx, fd_monitor=erlang:monitor(process,Fd)}};
+        {ok, NewDb} = couch_db:incref(Db),
+        couch_db:set_user_ctx(NewDb, Ctx);
     _ ->
         Timeout = couch_util:get_value(timeout, Options, infinity),
         Create = couch_util:get_value(create_if_missing, Options, false),
         case gen_server:call(couch_server, {open, DbName, Options}, Timeout) of
-        {ok, #db{fd=Fd} = Db} ->
+        {ok, Db} ->
             update_lru(DbName, Options),
-            {ok, Db#db{user_ctx=Ctx, fd_monitor=erlang:monitor(process,Fd)}};
+            {ok, NewDb} = couch_db:incref(Db),
+            couch_db:set_user_ctx(NewDb, Ctx);
         {not_found, no_db_file} when Create ->
             couch_log:warning("creating missing database: ~s", [DbName]),
             couch_server:create(DbName, Options);
@@ -103,19 +111,53 @@ update_lru(DbName, Options) ->
 close_lru() ->
     gen_server:call(couch_server, close_lru).
 
+create(DbName, Options0) when is_list(DbName) ->
+    create(iolist_to_binary(DbName), Options0);
 create(DbName, Options0) ->
     Options = maybe_add_sys_db_callbacks(DbName, Options0),
     case gen_server:call(couch_server, {create, DbName, Options}, infinity) of
-    {ok, #db{fd=Fd} = Db} ->
+    {ok, Db} ->
         Ctx = couch_util:get_value(user_ctx, Options, #user_ctx{}),
-        {ok, Db#db{user_ctx=Ctx, fd_monitor=erlang:monitor(process,Fd)}};
+        {ok, NewDb} = couch_db:incref(Db),
+        couch_db:set_user_ctx(NewDb, Ctx);
     Error ->
         Error
     end.
 
+delete(DbName, Options) when is_list(DbName) ->
+    delete(iolist_to_binary(DbName), Options);
 delete(DbName, Options) ->
     gen_server:call(couch_server, {delete, DbName, Options}, infinity).
 
+
+exists(DbName) ->
+    RootDir = config:get("couchdb", "database_dir", "."),
+    Engines = get_configured_engines(),
+    Possible = lists:foldl(fun({Extension, Engine}, Acc) ->
+        Path = make_filepath(RootDir, DbName, Extension),
+        case couch_db_engine:exists(Engine, Path) of
+            true ->
+                [{Engine, Path} | Acc];
+            false ->
+                Acc
+        end
+    end, [], Engines),
+    Possible /= [].
+
+
+delete_compaction_files(DbName) ->
+    delete_compaction_files(DbName, []).
+
+delete_compaction_files(DbName, DelOpts) when is_list(DbName) ->
+    RootDir = config:get("couchdb", "database_dir", "."),
+    lists:foreach(fun({Ext, Engine}) ->
+        FPath = make_filepath(RootDir, DbName, Ext),
+        couch_db_engine:delete_compaction_files(Engine, RootDir, FPath, DelOpts)
+    end, get_configured_engines()),
+    ok;
+delete_compaction_files(DbName, DelOpts) when is_binary(DbName) ->
+    delete_compaction_files(?b2l(DbName), DelOpts).
+
 maybe_add_sys_db_callbacks(DbName, Options) when is_binary(DbName) ->
     maybe_add_sys_db_callbacks(?b2l(DbName), Options);
 maybe_add_sys_db_callbacks(DbName, Options) ->
@@ -159,9 +201,6 @@ is_admin(User, ClearPwd) ->
 has_admins() ->
     config:get("admins") /= [].
 
-get_full_filename(Server, DbName) ->
-    filename:join([Server#server.root_dir, "./" ++ DbName ++ ".couch"]).
-
 hash_admin_passwords() ->
     hash_admin_passwords(true).
 
@@ -179,6 +218,7 @@ init([]) ->
     % will restart us and then we will pick up the new settings.
 
     RootDir = config:get("couchdb", "database_dir", "."),
+    Engines = get_configured_engines(),
     MaxDbsOpen = list_to_integer(
             config:get("couchdb", "max_dbs_open", integer_to_list(?MAX_DBS_OPEN))),
     UpdateLruOnRead =
@@ -186,10 +226,12 @@ init([]) ->
     ok = config:listen_for_changes(?MODULE, nil),
     ok = couch_file:init_delete_dir(RootDir),
     hash_admin_passwords(),
-    ets:new(couch_dbs, [set, protected, named_table, {keypos, #db.name}]),
+    EtsOpts = [set, protected, named_table, {keypos, #srv_entry.name}],
+    ets:new(couch_dbs, EtsOpts),
     ets:new(couch_dbs_pid_to_name, [set, protected, named_table]),
     process_flag(trap_exit, true),
     {ok, #server{root_dir=RootDir,
+                engines = Engines,
                 max_dbs_open=MaxDbsOpen,
                 update_lru_on_read=UpdateLruOnRead,
                 start_time=couch_util:rfc1123_date()}}.
@@ -198,8 +240,9 @@ terminate(Reason, Srv) ->
     couch_log:error("couch_server terminating with ~p, state ~2048p",
                     [Reason,
                      Srv#server{lru = redacted}]),
-    ets:foldl(fun(#db{main_pid=Pid}, _) -> couch_util:shutdown_sync(Pid) end,
-        nil, couch_dbs),
+    ets:foldl(fun(#srv_entry{db=Db}, _) ->
+        couch_db:shutdown(Db)
+    end, nil, couch_dbs),
     ok.
 
 handle_config_change("couchdb", "database_dir", _, _, _) ->
@@ -213,6 +256,8 @@ handle_config_change("couchdb", "max_dbs_open", Max, _, _) when is_list(Max) ->
     {ok, gen_server:call(couch_server,{set_max_dbs_open,list_to_integer(Max)})};
 handle_config_change("couchdb", "max_dbs_open", _, _, _) ->
     {ok, gen_server:call(couch_server,{set_max_dbs_open,?MAX_DBS_OPEN})};
+handle_config_change("couchdb_engines", _, _, _, _) ->
+    {ok, gen_server:call(couch_server,reload_engines)};
 handle_config_change("admins", _, _, Persist, _) ->
     % spawn here so couch event manager doesn't deadlock
     {ok, spawn(fun() -> hash_admin_passwords(Persist) end)};
@@ -250,11 +295,15 @@ all_databases() ->
 all_databases(Fun, Acc0) ->
     {ok, #server{root_dir=Root}} = gen_server:call(couch_server, get_server),
     NormRoot = couch_util:normpath(Root),
-    FinalAcc = try
-    filelib:fold_files(Root,
+    Extensions = get_engine_extensions(),
+    ExtRegExp = "(" ++ string:join(Extensions, "|") ++ ")",
+    RegExp =
         "^[a-z0-9\\_\\$()\\+\\-]*" % stock CouchDB name regex
         "(\\.[0-9]{10,})?"         % optional shard timestamp
-        "\\.couch$",               % filename extension
+        "\\." ++ ExtRegExp ++ "$", % filename extension
+    FinalAcc = try
+    couch_util:fold_files(Root,
+        RegExp,
         true,
             fun(Filename, AccIn) ->
                 NormFilename = couch_util:normpath(Filename),
@@ -262,7 +311,8 @@ all_databases(Fun, Acc0) ->
                 [$/ | RelativeFilename] -> ok;
                 RelativeFilename -> ok
                 end,
-                case Fun(?l2b(filename:rootname(RelativeFilename, ".couch")), AccIn) of
+                Ext = filename:extension(RelativeFilename),
+                case Fun(?l2b(filename:rootname(RelativeFilename, Ext)), AccIn) of
                 {ok, NewAcc} -> NewAcc;
                 {stop, NewAcc} -> throw({stop, Fun, NewAcc})
                 end
@@ -289,11 +339,11 @@ maybe_close_lru_db(#server{lru=Lru}=Server) ->
         {error, all_dbs_active}
     end.
 
-open_async(Server, From, DbName, Filepath, Options) ->
+open_async(Server, From, DbName, {Module, Filepath}, Options) ->
     Parent = self(),
     T0 = os:timestamp(),
     Opener = spawn_link(fun() ->
-        Res = couch_db:start_link(DbName, Filepath, Options),
+        Res = couch_db:start_link(Module, DbName, Filepath, Options),
         case {Res, lists:member(create, Options)} of
             {{ok, _Db}, true} ->
                 couch_event:notify(DbName, created);
@@ -307,15 +357,13 @@ open_async(Server, From, DbName, Filepath, Options) ->
         true -> create;
         false -> open
     end,
-    % icky hack of field values - compactor_pid used to store clients
-    % and fd used for opening request info
-    true = ets:insert(couch_dbs, #db{
+    true = ets:insert(couch_dbs, #srv_entry{
         name = DbName,
-        fd = ReqType,
-        main_pid = Opener,
-        compactor_pid = [From],
-        fd_monitor = locked,
-        options = Options
+        pid = Opener,
+        waiters = [From],
+        lock = locked,
+        req_type = ReqType,
+        db_options = Options
     }),
     true = ets:insert(couch_dbs_pid_to_name, {Opener, DbName}),
     db_opened(Server, Options).
@@ -332,17 +380,22 @@ handle_call({set_update_lru_on_read, UpdateOnRead}, _From, Server) ->
     {reply, ok, Server#server{update_lru_on_read=UpdateOnRead}};
 handle_call({set_max_dbs_open, Max}, _From, Server) ->
     {reply, ok, Server#server{max_dbs_open=Max}};
+handle_call(reload_engines, _From, Server) ->
+    {reply, ok, Server#server{engines = get_configured_engines()}};
 handle_call(get_server, _From, Server) ->
     {reply, {ok, Server}, Server};
 handle_call({open_result, T0, DbName, {ok, Db}}, {FromPid, _Tag}, Server) ->
-    link(Db#db.main_pid),
+    DbPid = couch_db:pid(Db),
+    link(DbPid),
     true = ets:delete(couch_dbs_pid_to_name, FromPid),
     OpenTime = timer:now_diff(os:timestamp(), T0) / 1000,
     couch_stats:update_histogram([couchdb, db_open_time], OpenTime),
-    % icky hack of field values - compactor_pid used to store clients
-    % and fd used to possibly store a creation request
-    [#db{fd=ReqType, compactor_pid=Froms}] = ets:lookup(couch_dbs, DbName),
-    [gen_server:reply(From, {ok, Db}) || From <- Froms],
+    [#srv_entry{}=SE] = ets:lookup(couch_dbs, DbName),
+    #srv_entry{
+        waiters = Waiters,
+        req_type = ReqType
+    } = SE,
+    [gen_server:reply(From, {ok, Db}) || From <- Waiters],
     % Cancel the creation request if it exists.
     case ReqType of
         {create, DbName, _Filepath, _Options, CrFrom} ->
@@ -350,8 +403,15 @@ handle_call({open_result, T0, DbName, {ok, Db}}, {FromPid, _Tag}, Server) ->
         _ ->
             ok
     end,
-    true = ets:insert(couch_dbs, Db),
-    true = ets:insert(couch_dbs_pid_to_name, {Db#db.main_pid, DbName}),
+    true = ets:insert(couch_dbs, SE#srv_entry{
+        name = DbName,
+        db = Db,
+        pid = DbPid,
+        lock = unlocked,
+        waiters = undefined,
+        start_time = couch_db:get_instance_start_time(Db)
+    }),
+    true = ets:insert(couch_dbs_pid_to_name, {DbPid, DbName}),
     Lru = case couch_db:is_system_db(Db) of
         false ->
             couch_lru:insert(DbName, Server#server.lru);
@@ -362,19 +422,21 @@ handle_call({open_result, T0, DbName, {ok, Db}}, {FromPid, _Tag}, Server) ->
 handle_call({open_result, T0, DbName, {error, eexist}}, From, Server) ->
     handle_call({open_result, T0, DbName, file_exists}, From, Server);
 handle_call({open_result, _T0, DbName, Error}, {FromPid, _Tag}, Server) ->
-    % icky hack of field values - compactor_pid used to store clients
-    [#db{fd=ReqType, compactor_pid=Froms}=Db] = ets:lookup(couch_dbs, DbName),
-    [gen_server:reply(From, Error) || From <- Froms],
+    [#srv_entry{}=SE] = ets:lookup(couch_dbs, DbName),
+    #srv_entry{
+        waiters = Waiters
+    } = SE,
+    [gen_server:reply(From, Error) || From <- Waiters],
     couch_log:info("open_result error ~p for ~s", [Error, DbName]),
     true = ets:delete(couch_dbs, DbName),
     true = ets:delete(couch_dbs_pid_to_name, FromPid),
-    NewServer = case ReqType of
+    NewServer = case SE#srv_entry.req_type of
         {create, DbName, Filepath, Options, CrFrom} ->
             open_async(Server, CrFrom, DbName, Filepath, Options);
         _ ->
             Server
     end,
-    {reply, ok, db_closed(NewServer, Db#db.options)};
+    {reply, ok, db_closed(NewServer, SE#srv_entry.db_options)};
 handle_call({open, DbName, Options}, From, Server) ->
     case ets:lookup(couch_dbs, DbName) of
     [] ->
@@ -383,47 +445,47 @@ handle_call({open, DbName, Options}, From, Server) ->
         ok ->
             case make_room(Server, Options) of
             {ok, Server2} ->
-                Filepath = get_full_filename(Server, DbNameList),
-                {noreply, open_async(Server2, From, DbName, Filepath, Options)};
+                Engine = get_engine(Server, DbNameList),
+                {noreply, open_async(Server2, From, DbName, Engine, Options)};
             CloseError ->
                 {reply, CloseError, Server}
             end;
         Error ->
             {reply, Error, Server}
         end;
-    [#db{compactor_pid = Froms} = Db] when is_list(Froms) ->
-        % icky hack of field values - compactor_pid used to store clients
-        true = ets:insert(couch_dbs, Db#db{compactor_pid = [From|Froms]}),
-        if length(Froms) =< 10 -> ok; true ->
+    [#srv_entry{waiters = Waiters} = Entry] when is_list(Waiters) ->
+        true = ets:insert(couch_dbs, Entry#srv_entry{
+                waiters = [From | Waiters]
+        }),
+        if length(Waiters) =< 10 -> ok; true ->
             Fmt = "~b clients waiting to open db ~s",
-            couch_log:info(Fmt, [length(Froms), DbName])
+            couch_log:info(Fmt, [length(Waiters), DbName])
         end,
         {noreply, Server};
-    [#db{} = Db] ->
+    [#srv_entry{db=Db}] ->
         {reply, {ok, Db}, Server}
     end;
 handle_call({create, DbName, Options}, From, Server) ->
     DbNameList = binary_to_list(DbName),
-    Filepath = get_full_filename(Server, DbNameList),
+    Engine = get_engine(Server, DbNameList, Options),
     case check_dbname(Server, DbNameList) of
     ok ->
         case ets:lookup(couch_dbs, DbName) of
         [] ->
             case make_room(Server, Options) of
             {ok, Server2} ->
-                {noreply, open_async(Server2, From, DbName, Filepath,
-                        [create | Options])};
+                Opts = [create | Options],
+                {noreply, open_async(Server2, From, DbName, Engine, Opts)};
             CloseError ->
                 {reply, CloseError, Server}
             end;
-        [#db{fd=open}=Db] ->
+        [#srv_entry{req_type=open}=Entry] ->
             % We're trying to create a database while someone is in
             % the middle of trying to open it. We allow one creator
             % to wait while we figure out if it'll succeed.
-            % icky hack of field values - fd used to store create request
             CrOptions = [create | Options],
-            NewDb = Db#db{fd={create, DbName, Filepath, CrOptions, From}},
-            true = ets:insert(couch_dbs, NewDb),
+            ReqType = {create, DbName, Engine, CrOptions, From},
+            true = ets:insert(couch_dbs, Entry#srv_entry{req_type = ReqType}),
             {noreply, Server};
         [_AlreadyRunningDb] ->
             {reply, file_exists, Server}
@@ -435,36 +497,32 @@ handle_call({delete, DbName, Options}, _From, Server) ->
     DbNameList = binary_to_list(DbName),
     case check_dbname(Server, DbNameList) of
     ok ->
-        FullFilepath = get_full_filename(Server, DbNameList),
         Server2 =
         case ets:lookup(couch_dbs, DbName) of
         [] -> Server;
-        [#db{main_pid=Pid, compactor_pid=Froms} = Db] when is_list(Froms) ->
-            % icky hack of field values - compactor_pid used to store clients
+        [#srv_entry{pid=Pid, waiters=Waiters}=SE] when is_list(Waiters) ->
             true = ets:delete(couch_dbs, DbName),
             true = ets:delete(couch_dbs_pid_to_name, Pid),
-            exit(Pid, kill),
-            [gen_server:reply(F, not_found) || F <- Froms],
-            db_closed(Server, Db#db.options);
-        [#db{main_pid=Pid} = Db] ->
+            couch_util:shutdown_sync(Pid),
+            [gen_server:reply(F, not_found) || F <- Waiters],
+            db_closed(Server, SE#srv_entry.db_options);
+        [#srv_entry{pid=Pid}=SE] ->
             true = ets:delete(couch_dbs, DbName),
             true = ets:delete(couch_dbs_pid_to_name, Pid),
-            exit(Pid, kill),
-            db_closed(Server, Db#db.options)
+            couch_util:shutdown_sync(Pid),
+            db_closed(Server, SE#srv_entry.db_options)
         end,
 
-        %% Delete any leftover compaction files. If we don't do this a
-        %% subsequent request for this DB will try to open them to use
-        %% as a recovery.
-        lists:foreach(fun(Ext) ->
-            couch_file:delete(Server#server.root_dir, FullFilepath ++ Ext)
-        end, [".compact", ".compact.data", ".compact.meta"]),
-        couch_file:delete(Server#server.root_dir, FullFilepath ++ ".compact"),
-
         couch_db_plugin:on_delete(DbName, Options),
 
         DelOpt = [{context, delete} | Options],
-        case couch_file:delete(Server#server.root_dir, FullFilepath, DelOpt) of
+
+        % Make sure and remove all compaction data
+        delete_compaction_files(DbNameList, DelOpt),
+
+        {Engine, FilePath} = get_engine(Server, DbNameList),
+        RootDir = Server#server.root_dir,
+        case couch_db_engine:delete(Engine, RootDir, FilePath, DelOpt) of
         ok ->
             couch_event:notify(DbName, deleted),
             {reply, ok, Server2};
@@ -476,11 +534,20 @@ handle_call({delete, DbName, Options}, _From, Server) ->
     Error ->
         {reply, Error, Server}
     end;
-handle_call({db_updated, #db{}=Db}, _From, Server0) ->
-    #db{name = DbName, instance_start_time = StartTime} = Db,
-    Server = try ets:lookup_element(couch_dbs, DbName, #db.instance_start_time) of
-        StartTime ->
-            true = ets:insert(couch_dbs, Db),
+handle_call({db_updated, Db}, _From, Server0) ->
+    true = couch_db:is_db(Db),
+    DbName = couch_db:name(Db),
+    StartTime = couch_db:get_instance_start_time(Db),
+    Server = try ets:lookup(couch_dbs, DbName) of
+        [#srv_entry{start_time=StartTime}=SE] ->
+            true = ets:insert(couch_dbs, SE#srv_entry{
+                name = DbName,
+                db = Db,
+                pid = couch_db:pid(Db),
+                lock = unlocked,
+                waiters = undefined,
+                start_time = StartTime
+            }),
             Lru = case couch_db:is_system_db(Db) of
                 false -> couch_lru:update(DbName, Server0#server.lru);
                 true -> Server0#server.lru
@@ -508,22 +575,23 @@ handle_info({'EXIT', _Pid, config_change}, Server) ->
 handle_info({'EXIT', Pid, Reason}, Server) ->
     case ets:lookup(couch_dbs_pid_to_name, Pid) of
     [{Pid, DbName}] ->
-        [#db{compactor_pid=Froms}=Db] = ets:lookup(couch_dbs, DbName),
+        [#srv_entry{}=SE] = ets:lookup(couch_dbs, DbName),
+        #srv_entry{
+            waiters = Waiters,
+            db_options = DbOptions
+        } = SE,
         if Reason /= snappy_nif_not_loaded -> ok; true ->
             Msg = io_lib:format("To open the database `~s`, Apache CouchDB "
                 "must be built with Erlang OTP R13B04 or higher.", [DbName]),
             couch_log:error(Msg, [])
         end,
         couch_log:info("db ~s died with reason ~p", [DbName, Reason]),
-        % icky hack of field values - compactor_pid used to store clients
-        if is_list(Froms) ->
-            [gen_server:reply(From, Reason) || From <- Froms];
-        true ->
-            ok
+        if not is_list(Waiters) -> ok; true ->
+            [gen_server:reply(From, Reason) || From <- Waiters]
         end,
         true = ets:delete(couch_dbs, DbName),
         true = ets:delete(couch_dbs_pid_to_name, Pid),
-        {noreply, db_closed(Server, Db#db.options)};
+        {noreply, db_closed(Server, DbOptions)};
     [] ->
         {noreply, Server}
     end;
@@ -541,3 +609,102 @@ db_closed(Server, Options) ->
         false -> Server#server{dbs_open=Server#server.dbs_open - 1};
         true -> Server
     end.
+
+
+get_configured_engines() ->
+    ConfigEntries = config:get("couchdb_engines"),
+    Engines = lists:flatmap(fun({Extension, ModuleStr}) ->
+        try
+            [{Extension, list_to_atom(ModuleStr)}]
+        catch _T:_R ->
+            []
+        end
+    end, ConfigEntries),
+    case Engines of
+        [] ->
+            [{"couch", couch_bt_engine}];
+        Else ->
+            Else
+    end.
+
+
+get_engine(Server, DbName, Options) ->
+    #server{
+        root_dir = RootDir,
+        engines = Engines
+    } = Server,
+    case couch_util:get_value(engine, Options) of
+        Ext when is_binary(Ext) ->
+            ExtStr = binary_to_list(Ext),
+            case couch_util:get_value(ExtStr, Engines) of
+                Engine when is_atom(Engine) ->
+                    Path = make_filepath(RootDir, DbName, ExtStr),
+                    {Engine, Path};
+                _ ->
+                    get_engine(Server, DbName)
+            end;
+        _ ->
+            get_engine(Server, DbName)
+    end.
+
+
+get_engine(Server, DbName) ->
+    #server{
+        root_dir = RootDir,
+        engines = Engines
+    } = Server,
+    Possible = lists:foldl(fun({Extension, Engine}, Acc) ->
+        Path = make_filepath(RootDir, DbName, Extension),
+        case couch_db_engine:exists(Engine, Path) of
+            true ->
+                [{Engine, Path} | Acc];
+            false ->
+                Acc
+        end
+    end, [], Engines),
+    case Possible of
+        [] ->
+            get_default_engine(Server, DbName);
+        [Engine] ->
+            Engine;
+        _ ->
+            erlang:error(engine_conflict)
+    end.
+
+
+get_default_engine(Server, DbName) ->
+    #server{
+        root_dir = RootDir,
+        engines = Engines
+    } = Server,
+    Default = {couch_bt_engine, make_filepath(RootDir, DbName, "couch")},
+    case config:get("couchdb", "default_engine") of
+        Extension when is_list(Extension) ->
+            case lists:keyfind(Extension, 1, Engines) of
+                {Extension, Module} ->
+                    {Module, make_filepath(RootDir, DbName, Extension)};
+                false ->
+                    Default
+            end;
+        _ ->
+            Default
+    end.
+
+
+make_filepath(RootDir, DbName, Extension) when is_binary(RootDir) ->
+    make_filepath(binary_to_list(RootDir), DbName, Extension);
+make_filepath(RootDir, DbName, Extension) when is_binary(DbName) ->
+    make_filepath(RootDir, binary_to_list(DbName), Extension);
+make_filepath(RootDir, DbName, Extension) when is_binary(Extension) ->
+    make_filepath(RootDir, DbName, binary_to_list(Extension));
+make_filepath(RootDir, DbName, Extension) ->
+    filename:join([RootDir, "./" ++ DbName ++ "." ++ Extension]).
+
+
+get_engine_extensions() ->
+    case config:get("couchdb_engines") of
+        [] ->
+            ["couch"];
+        Entries ->
+            [Ext || {Ext, _Mod} <- Entries]
+    end.

http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/9c4b8432/src/couch_server_int.hrl
----------------------------------------------------------------------
diff --git a/src/couch_server_int.hrl b/src/couch_server_int.hrl
new file mode 100644
index 0000000..ca09ff9
--- /dev/null
+++ b/src/couch_server_int.hrl
@@ -0,0 +1,23 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+%   http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+
+-record(srv_entry, {
+    name,
+    db,
+    pid,
+    lock,
+    waiters,
+    req_type,
+    db_options,
+    start_time
+}).