You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by da...@apache.org on 2018/03/16 19:59:56 UTC
[couchdb] 02/20: [WIP] - Declare new purge storage engine APIs
This is an automated email from the ASF dual-hosted git repository.
davisp pushed a commit to branch COUCHDB-3326-clustered-purge-davisp-refactor
in repository https://gitbox.apache.org/repos/asf/couchdb.git
commit 1b9ed8fe58fefdf00164ab971805632b0f9ac431
Author: Paul J. Davis <pa...@gmail.com>
AuthorDate: Wed Mar 14 12:19:37 2018 -0500
[WIP] - Declare new purge storage engine APIs
---
src/couch/src/couch_db_engine.erl | 148 +++++++++++++++++++++++++++++---------
1 file changed, 116 insertions(+), 32 deletions(-)
diff --git a/src/couch/src/couch_db_engine.erl b/src/couch/src/couch_db_engine.erl
index 4974201..0e7a1cf 100644
--- a/src/couch/src/couch_db_engine.erl
+++ b/src/couch/src/couch_db_engine.erl
@@ -22,6 +22,8 @@
-type rev() :: {non_neg_integer(), binary()}.
-type revs() :: [rev()].
-type json() :: any().
+-type uuid() :: binary().
+-type purge_seq() :: non_neg_integer().
-type doc_pair() :: {
#full_doc_info{} | not_found,
@@ -39,7 +41,7 @@
sync
].
--type purge_info() :: [{docid(), revs()}].
+-type purge_info() :: {purge_seq(), uuid(), docid(), revs()}.
-type epochs() :: [{Node::atom(), UpdateSeq::non_neg_integer()}].
-type size_info() :: [{Name::atom(), Size::non_neg_integer()}].
@@ -62,6 +64,10 @@
{dir, fwd | rev}
].
+-type purge_fold_options() :: [
+ % Need to enumerate these
+].
+
-type db_handle() :: any().
-type doc_fold_fun() :: fun((#full_doc_info{}, UserAcc::any()) ->
@@ -76,6 +82,10 @@
{ok, NewUserAcc::any()} |
{stop, NewUserAcc::any()}).
+-type purge_fold_fun() :: fun((purge_info(), UserAcc::any()) ->
+ {ok, NewUserAcc::any()} |
+ {stop, NewUserAcc::any()}).
+
% This is called by couch_server to determine which
% engine should be used for the given database. DbPath
@@ -206,13 +216,14 @@
-callback get_epochs(DbHandle::db_handle()) -> Epochs::epochs().
-% Get the last purge request performed.
--callback get_last_purged(DbHandle::db_handle()) -> LastPurged::purge_info().
-
-
% Get the current purge sequence. This should be incremented
% for every purge operation.
--callback get_purge_seq(DbHandle::db_handle()) -> PurgeSeq::non_neg_integer().
+-callback get_purge_seq(DbHandle::db_handle()) -> purge_seq().
+
+
+% Get the purged infos limit. This should just return the last
+% value that was passed to set_purged_docs_limit/2.
+-callback get_purge_infos_limit(DbHandle::db_handle()) -> pos_integer().
% Get the revision limit. This should just return the last
@@ -261,6 +272,11 @@
-callback set_revs_limit(DbHandle::db_handle(), RevsLimit::pos_integer()) ->
{ok, NewDbHandle::db_handle()}.
+
+-callback set_purge_infos_limit(DbHandle::db_handle(), Limit::pos_integer()) ->
+ {ok, NewDbHandle::db_handle()}.
+
+
-callback set_security(DbHandle::db_handle(), SecProps::any()) ->
{ok, NewDbHandle::db_handle()}.
@@ -301,6 +317,15 @@
doc().
+% This function will be called from many contexts concurrently.
+% If the storage engine has a purge_info() record for any of the
+% provided UUIDs, those purge_info() records should be returned. The
+% resulting list should have the same length as the input list of
+% UUIDs.
+-callback load_purge_infos(DbHandle::db_handle(), [uuid()]) ->
+ [purge_info() | not_found].
+
+
% This function is called concurrently by any client process
% that is writing a document. It should accept a #doc{}
% record and return a #doc{} record with a mutated body it
@@ -341,31 +366,20 @@
% #full_doc_info{} records. The first element of the pair is
% the #full_doc_info{} that exists on disk. The second element
% is the new version that should be written to disk. There are
-% three basic cases that should be followed:
+% two basic cases that should be followed:
%
% 1. {not_found, #full_doc_info{}} - A new document was created
% 2. {#full_doc_info{}, #full_doc_info{}} - A document was updated
-% 3. {#full_doc_info{}, not_found} - A document was purged completely
%
-% Number one and two are fairly straight forward as long as proper
-% accounting for moving entries in the udpate sequence are accounted
-% for. However, case 3 you'll notice is "purged completely" which
-% means it needs to be removed from the database including the
-% update sequence. Also, for engines that are not using append
-% only storage like the legacy engine, case 2 can be the result of
-% a purge so special care will be needed to see which revisions
-% should be removed.
+% The cases are fairly straight forward as long as proper
+% accounting for moving entries in the update sequence are accounted
+% for.
%
% The LocalDocs variable is applied separately. Its important to
% note for new storage engine authors that these documents are
% separate because they should *not* be included as part of the
% changes index for the database.
%
-% The PurgedDocIdRevs is the list of Ids and Revisions that were
-% purged during this update. While its not guaranteed by the API,
-% currently there will never be purge changes comingled with
-% standard updates.
-%
% Traditionally an invocation of write_doc_infos should be all
% or nothing in so much that if an error occurs (or the VM dies)
% then the database doesn't retain any of the changes. However
@@ -376,8 +390,36 @@
-callback write_doc_infos(
DbHandle::db_handle(),
Pairs::doc_pairs(),
- LocalDocs::[#doc{}],
- PurgedDocIdRevs::[{docid(), revs()}]) ->
+ LocalDocs::[#doc{}]) ->
+ {ok, NewDbHandle::db_handle()}.
+
+
+% This function is called from the context of couch_db_updater
+% and as such is guaranteed single threaded for the given
+% DbHandle.
+%
+% The DocPair argument is a 2-tuple of #full_doc_info{} records. The
+% first element of th epair is the #full_doc_info{} that exists
+% on disk. The second element is the new version that should be
+% written to disk. There are two basic cases that should be considered:
+%
+% 1. {#full_doc_info{}, #full_doc_info{}} - A document was partially purged
+% 2. {#full_doc_info{}, not_found} - A document was completely purged
+%
+% In case 1, non-tail-append engines may have to remove revisions
+% specifically rather than rely on compaction to remove them. Also
+% note that the new #full_doc_info{} will have a different update_seq
+% that will need to be reflected in the changes feed.
+%
+% In case 2 you'll notice is "purged completely" which
+% means it needs to be removed from the database including the
+% update sequence.
+%
+% The PurgeInfo contains the purge_seq, uuid, docid and revisions that
+% were requested to be purged. This should be persisted in such a way
+% that we can efficiently load purge_info() by its UUID as well as
+% iterate over purge_info() entries in order of their PurgeSeq.
+-callback purge_doc(DbHandle::db_handle(), doc_pair(), purge_info()) ->
{ok, NewDbHandle::db_handle()}.
@@ -518,6 +560,21 @@
% This function may be called by many processes concurrently.
%
+% This function is called to fold over purged requests in order of
+% their oldest purge (increasing purge_seq order)
+%
+% The StartPurgeSeq parameter indicates where the fold should start *after*.
+-callback fold_purge_infos(
+ DbHandle::db_handle(),
+ StartPurgeSeq::purge_seq(),
+ UserFold::purge_fold_fun(),
+ UserAcc::any(),
+ purge_fold_options()) ->
+ {ok, LastUserAcc::any()}.
+
+
+% This function may be called by many processes concurrently.
+%
% This function is called to count the number of documents changed
% since the given UpdateSeq (ie, not including the possible change
% at exactly UpdateSeq). It is currently only used internally to
@@ -597,8 +654,8 @@
get_disk_version/1,
get_doc_count/1,
get_epochs/1,
- get_last_purged/1,
get_purge_seq/1,
+ get_purge_infos_limit/1,
get_revs_limit/1,
get_security/1,
get_size_info/1,
@@ -607,14 +664,17 @@
set_revs_limit/2,
set_security/2,
+ set_purge_docs_limit/2,
open_docs/2,
open_local_docs/2,
read_doc_body/2,
+ load_purge_infos/2,
serialize_doc/2,
write_doc_body/2,
- write_doc_infos/4,
+ write_doc_infos/3,
+ purge_doc_revs/3,
commit_data/1,
open_write_stream/2,
@@ -624,6 +684,7 @@
fold_docs/4,
fold_local_docs/4,
fold_changes/5,
+ fold_purge_infos/5,
count_changes_since/2,
start_compaction/1,
@@ -737,14 +798,14 @@ get_epochs(#db{} = Db) ->
Engine:get_epochs(EngineState).
-get_last_purged(#db{} = Db) ->
+get_purge_seq(#db{} = Db) ->
#db{engine = {Engine, EngineState}} = Db,
- Engine:get_last_purged(EngineState).
+ Engine:get_purge_seq(EngineState).
-get_purge_seq(#db{} = Db) ->
+get_purge_infos_limit(#db{} = Db) ->
#db{engine = {Engine, EngineState}} = Db,
- Engine:get_purge_seq(EngineState).
+ Engine:get_purge_infos_limit(EngineState).
get_revs_limit(#db{} = Db) ->
@@ -777,6 +838,12 @@ set_revs_limit(#db{} = Db, RevsLimit) ->
{ok, Db#db{engine = {Engine, NewSt}}}.
+set_purge_infos_limit(#db{} = Db, PurgedDocsLimit) ->
+ #db{engine = {Engine, EngineState}} = Db,
+ {ok, NewSt} = Engine:set_purge_infos_limit(EngineState, PurgedDocsLimit),
+ {ok, Db#db{engine = {Engine, NewSt}}}.
+
+
set_security(#db{} = Db, SecProps) ->
#db{engine = {Engine, EngineState}} = Db,
{ok, NewSt} = Engine:set_security(EngineState, SecProps),
@@ -798,6 +865,11 @@ read_doc_body(#db{} = Db, RawDoc) ->
Engine:read_doc_body(EngineState, RawDoc).
+load_purge_infos(#db{} = Db, UUIDs) ->
+ #db{engine = {Engine, EngineState}} = Db,
+ Engine:open_purged_docs(EngineState, UUIDs).
+
+
serialize_doc(#db{} = Db, #doc{} = Doc) ->
#db{engine = {Engine, EngineState}} = Db,
Engine:serialize_doc(EngineState, Doc).
@@ -808,10 +880,16 @@ write_doc_body(#db{} = Db, #doc{} = Doc) ->
Engine:write_doc_body(EngineState, Doc).
-write_doc_infos(#db{} = Db, DocUpdates, LocalDocs, PurgedDocIdRevs) ->
+write_doc_infos(#db{} = Db, DocUpdates, LocalDocs) ->
#db{engine = {Engine, EngineState}} = Db,
- {ok, NewSt} = Engine:write_doc_infos(
- EngineState, DocUpdates, LocalDocs, PurgedDocIdRevs),
+ {ok, NewSt} = Engine:write_doc_infos(EngineState, DocUpdates, LocalDocs),
+ {ok, Db#db{engine = {Engine, NewSt}}}.
+
+
+purge_doc_revs(#db{} = Db, DocUpdates, Purges) ->
+ #db{engine = {Engine, EngineState}} = Db,
+ {ok, NewSt} = Engine:purge_doc_revs(
+ EngineState, DocUpdates, Purges),
{ok, Db#db{engine = {Engine, NewSt}}}.
@@ -851,6 +929,12 @@ fold_changes(#db{} = Db, StartSeq, UserFun, UserAcc, Options) ->
Engine:fold_changes(EngineState, StartSeq, UserFun, UserAcc, Options).
+fold_purge_infos(#db{} = Db, StartPurgeSeq, UserFun, UserAcc, Options) ->
+ #db{engine = {Engine, EngineState}} = Db,
+ Engine:fold_purge_infos(
+ EngineState, StartPurgeSeq, UserFun, UserAcc, Options).
+
+
count_changes_since(#db{} = Db, StartSeq) ->
#db{engine = {Engine, EngineState}} = Db,
Engine:count_changes_since(EngineState, StartSeq).
--
To stop receiving notification emails like this one, please contact
davisp@apache.org.