You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by da...@apache.org on 2018/03/16 19:59:56 UTC

[couchdb] 02/20: [WIP] - Declare new purge storage engine APIs

This is an automated email from the ASF dual-hosted git repository.

davisp pushed a commit to branch COUCHDB-3326-clustered-purge-davisp-refactor
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 1b9ed8fe58fefdf00164ab971805632b0f9ac431
Author: Paul J. Davis <pa...@gmail.com>
AuthorDate: Wed Mar 14 12:19:37 2018 -0500

    [WIP] - Declare new purge storage engine APIs
---
 src/couch/src/couch_db_engine.erl | 148 +++++++++++++++++++++++++++++---------
 1 file changed, 116 insertions(+), 32 deletions(-)

diff --git a/src/couch/src/couch_db_engine.erl b/src/couch/src/couch_db_engine.erl
index 4974201..0e7a1cf 100644
--- a/src/couch/src/couch_db_engine.erl
+++ b/src/couch/src/couch_db_engine.erl
@@ -22,6 +22,8 @@
 -type rev() :: {non_neg_integer(), binary()}.
 -type revs() :: [rev()].
 -type json() :: any().
+-type uuid() :: binary().
+-type purge_seq() :: non_neg_integer().
 
 -type doc_pair() :: {
         #full_doc_info{} | not_found,
@@ -39,7 +41,7 @@
         sync
     ].
 
--type purge_info() :: [{docid(), revs()}].
+-type purge_info() :: {purge_seq(), uuid(), docid(), revs()}.
 -type epochs() :: [{Node::atom(), UpdateSeq::non_neg_integer()}].
 -type size_info() :: [{Name::atom(), Size::non_neg_integer()}].
 
@@ -62,6 +64,10 @@
         {dir, fwd | rev}
     ].
 
+-type purge_fold_options() :: [
+    % Need to enumerate these
+].
+
 -type db_handle() :: any().
 
 -type doc_fold_fun() :: fun((#full_doc_info{}, UserAcc::any()) ->
@@ -76,6 +82,10 @@
         {ok, NewUserAcc::any()} |
         {stop, NewUserAcc::any()}).
 
+-type purge_fold_fun() :: fun((purge_info(), UserAcc::any()) ->
+        {ok, NewUserAcc::any()} |
+        {stop, NewUserAcc::any()}).
+
 
 % This is called by couch_server to determine which
 % engine should be used for the given database. DbPath
@@ -206,13 +216,14 @@
 -callback get_epochs(DbHandle::db_handle()) -> Epochs::epochs().
 
 
-% Get the last purge request performed.
--callback get_last_purged(DbHandle::db_handle()) -> LastPurged::purge_info().
-
-
 % Get the current purge sequence. This should be incremented
 % for every purge operation.
--callback get_purge_seq(DbHandle::db_handle()) -> PurgeSeq::non_neg_integer().
+-callback get_purge_seq(DbHandle::db_handle()) -> purge_seq().
+
+
+% Get the purged infos limit. This should just return the last
+% value that was passed to set_purged_docs_limit/2.
+-callback get_purge_infos_limit(DbHandle::db_handle()) -> pos_integer().
 
 
 % Get the revision limit. This should just return the last
@@ -261,6 +272,11 @@
 -callback set_revs_limit(DbHandle::db_handle(), RevsLimit::pos_integer()) ->
         {ok, NewDbHandle::db_handle()}.
 
+
+-callback set_purge_infos_limit(DbHandle::db_handle(), Limit::pos_integer()) ->
+        {ok, NewDbHandle::db_handle()}.
+
+
 -callback set_security(DbHandle::db_handle(), SecProps::any()) ->
         {ok, NewDbHandle::db_handle()}.
 
@@ -301,6 +317,15 @@
         doc().
 
 
+% This function will be called from many contexts concurrently.
+% If the storage engine has a purge_info() record for any of the
+% provided UUIDs, those purge_info() records should be returned. The
+% resulting list should have the same length as the input list of
+% UUIDs.
+-callback load_purge_infos(DbHandle::db_handle(), [uuid()]) ->
+        [purge_info() | not_found].
+
+
 % This function is called concurrently by any client process
 % that is writing a document. It should accept a #doc{}
 % record and return a #doc{} record with a mutated body it
@@ -341,31 +366,20 @@
 % #full_doc_info{} records. The first element of the pair is
 % the #full_doc_info{} that exists on disk. The second element
 % is the new version that should be written to disk. There are
-% three basic cases that should be followed:
+% two basic cases that should be followed:
 %
 %     1. {not_found, #full_doc_info{}} - A new document was created
 %     2. {#full_doc_info{}, #full_doc_info{}} - A document was updated
-%     3. {#full_doc_info{}, not_found} - A document was purged completely
 %
-% Number one and two are fairly straight forward as long as proper
-% accounting for moving entries in the udpate sequence are accounted
-% for. However, case 3 you'll notice is "purged completely" which
-% means it needs to be removed from the database including the
-% update sequence. Also, for engines that are not using append
-% only storage like the legacy engine, case 2 can be the result of
-% a purge so special care will be needed to see which revisions
-% should be removed.
+% The cases are fairly straight forward as long as proper
+% accounting for moving entries in the update sequence are accounted
+% for.
 %
 % The LocalDocs variable is applied separately. Its important to
 % note for new storage engine authors that these documents are
 % separate because they should *not* be included as part of the
 % changes index for the database.
 %
-% The PurgedDocIdRevs is the list of Ids and Revisions that were
-% purged during this update. While its not guaranteed by the API,
-% currently there will never be purge changes comingled with
-% standard updates.
-%
 % Traditionally an invocation of write_doc_infos should be all
 % or nothing in so much that if an error occurs (or the VM dies)
 % then the database doesn't retain any of the changes. However
@@ -376,8 +390,36 @@
 -callback write_doc_infos(
     DbHandle::db_handle(),
     Pairs::doc_pairs(),
-    LocalDocs::[#doc{}],
-    PurgedDocIdRevs::[{docid(), revs()}]) ->
+    LocalDocs::[#doc{}]) ->
+        {ok, NewDbHandle::db_handle()}.
+
+
+% This function is called from the context of couch_db_updater
+% and as such is guaranteed single threaded for the given
+% DbHandle.
+%
+% The DocPair argument is a 2-tuple of #full_doc_info{} records. The
+% first element of th epair is the #full_doc_info{} that exists
+% on disk. The second element is the new version that should be
+% written to disk. There are two basic cases that should be considered:
+%
+%     1. {#full_doc_info{}, #full_doc_info{}} - A document was partially purged
+%     2. {#full_doc_info{}, not_found} - A document was completely purged
+%
+% In case 1, non-tail-append engines may have to remove revisions
+% specifically rather than rely on compaction to remove them. Also
+% note that the new #full_doc_info{} will have a different update_seq
+% that will need to be reflected in the changes feed.
+%
+% In case 2 you'll notice is "purged completely" which
+% means it needs to be removed from the database including the
+% update sequence.
+%
+% The PurgeInfo contains the purge_seq, uuid, docid and revisions that
+% were requested to be purged. This should be persisted in such a way
+% that we can efficiently load purge_info() by its UUID as well as
+% iterate over purge_info() entries in order of their PurgeSeq.
+-callback purge_doc(DbHandle::db_handle(), doc_pair(), purge_info()) ->
         {ok, NewDbHandle::db_handle()}.
 
 
@@ -518,6 +560,21 @@
 
 % This function may be called by many processes concurrently.
 %
+% This function is called to fold over purged requests in order of
+% their oldest purge (increasing purge_seq order)
+%
+% The StartPurgeSeq parameter indicates where the fold should start *after*.
+-callback fold_purge_infos(
+    DbHandle::db_handle(),
+    StartPurgeSeq::purge_seq(),
+    UserFold::purge_fold_fun(),
+    UserAcc::any(),
+    purge_fold_options()) ->
+        {ok, LastUserAcc::any()}.
+
+
+% This function may be called by many processes concurrently.
+%
 % This function is called to count the number of documents changed
 % since the given UpdateSeq (ie, not including the possible change
 % at exactly UpdateSeq). It is currently only used internally to
@@ -597,8 +654,8 @@
     get_disk_version/1,
     get_doc_count/1,
     get_epochs/1,
-    get_last_purged/1,
     get_purge_seq/1,
+    get_purge_infos_limit/1,
     get_revs_limit/1,
     get_security/1,
     get_size_info/1,
@@ -607,14 +664,17 @@
 
     set_revs_limit/2,
     set_security/2,
+    set_purge_docs_limit/2,
 
     open_docs/2,
     open_local_docs/2,
     read_doc_body/2,
+    load_purge_infos/2,
 
     serialize_doc/2,
     write_doc_body/2,
-    write_doc_infos/4,
+    write_doc_infos/3,
+    purge_doc_revs/3,
     commit_data/1,
 
     open_write_stream/2,
@@ -624,6 +684,7 @@
     fold_docs/4,
     fold_local_docs/4,
     fold_changes/5,
+    fold_purge_infos/5,
     count_changes_since/2,
 
     start_compaction/1,
@@ -737,14 +798,14 @@ get_epochs(#db{} = Db) ->
     Engine:get_epochs(EngineState).
 
 
-get_last_purged(#db{} = Db) ->
+get_purge_seq(#db{} = Db) ->
     #db{engine = {Engine, EngineState}} = Db,
-    Engine:get_last_purged(EngineState).
+    Engine:get_purge_seq(EngineState).
 
 
-get_purge_seq(#db{} = Db) ->
+get_purge_infos_limit(#db{} = Db) ->
     #db{engine = {Engine, EngineState}} = Db,
-    Engine:get_purge_seq(EngineState).
+    Engine:get_purge_infos_limit(EngineState).
 
 
 get_revs_limit(#db{} = Db) ->
@@ -777,6 +838,12 @@ set_revs_limit(#db{} = Db, RevsLimit) ->
     {ok, Db#db{engine = {Engine, NewSt}}}.
 
 
+set_purge_infos_limit(#db{} = Db, PurgedDocsLimit) ->
+    #db{engine = {Engine, EngineState}} = Db,
+    {ok, NewSt} = Engine:set_purge_infos_limit(EngineState, PurgedDocsLimit),
+    {ok, Db#db{engine = {Engine, NewSt}}}.
+
+
 set_security(#db{} = Db, SecProps) ->
     #db{engine = {Engine, EngineState}} = Db,
     {ok, NewSt} = Engine:set_security(EngineState, SecProps),
@@ -798,6 +865,11 @@ read_doc_body(#db{} = Db, RawDoc) ->
     Engine:read_doc_body(EngineState, RawDoc).
 
 
+load_purge_infos(#db{} = Db, UUIDs) ->
+    #db{engine = {Engine, EngineState}} = Db,
+    Engine:open_purged_docs(EngineState, UUIDs).
+
+
 serialize_doc(#db{} = Db, #doc{} = Doc) ->
     #db{engine = {Engine, EngineState}} = Db,
     Engine:serialize_doc(EngineState, Doc).
@@ -808,10 +880,16 @@ write_doc_body(#db{} = Db, #doc{} = Doc) ->
     Engine:write_doc_body(EngineState, Doc).
 
 
-write_doc_infos(#db{} = Db, DocUpdates, LocalDocs, PurgedDocIdRevs) ->
+write_doc_infos(#db{} = Db, DocUpdates, LocalDocs) ->
     #db{engine = {Engine, EngineState}} = Db,
-    {ok, NewSt} = Engine:write_doc_infos(
-            EngineState, DocUpdates, LocalDocs, PurgedDocIdRevs),
+    {ok, NewSt} = Engine:write_doc_infos(EngineState, DocUpdates, LocalDocs),
+    {ok, Db#db{engine = {Engine, NewSt}}}.
+
+
+purge_doc_revs(#db{} = Db, DocUpdates, Purges) ->
+    #db{engine = {Engine, EngineState}} = Db,
+    {ok, NewSt} = Engine:purge_doc_revs(
+        EngineState, DocUpdates, Purges),
     {ok, Db#db{engine = {Engine, NewSt}}}.
 
 
@@ -851,6 +929,12 @@ fold_changes(#db{} = Db, StartSeq, UserFun, UserAcc, Options) ->
     Engine:fold_changes(EngineState, StartSeq, UserFun, UserAcc, Options).
 
 
+fold_purge_infos(#db{} = Db, StartPurgeSeq, UserFun, UserAcc, Options) ->
+    #db{engine = {Engine, EngineState}} = Db,
+    Engine:fold_purge_infos(
+            EngineState, StartPurgeSeq, UserFun, UserAcc, Options).
+
+
 count_changes_since(#db{} = Db, StartSeq) ->
     #db{engine = {Engine, EngineState}} = Db,
     Engine:count_changes_since(EngineState, StartSeq).

-- 
To stop receiving notification emails like this one, please contact
davisp@apache.org.