You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by ko...@apache.org on 2011/10/26 20:05:33 UTC

[32/50] git commit: Rest of the fix for COUCHDB-1265

Rest of the fix for COUCHDB-1265

As a follow up to COUCHDB-1265 I was missing the fact that after the
insertion of a new update_seq into an internal node it is quite possible
that a compaction runs before the doc is updated again. This is
important because compaction removes information of the largest update
seq from the tree itself.

The fix is simple to include the update_seq from the #full_doc_info{}
record when calculating #doc_info.high_seq. The way to think of this
is that it's the maximum value from all known values for the update
sequence which can be defined as all values known in the tree or in the
full_doc_info record.



git-svn-id: https://svn.apache.org/repos/asf/couchdb/trunk@1176701 13f79535-47bb-0310-9956-ffa450edef68


Project: http://git-wip-us.apache.org/repos/asf/couchdb/repo
Commit: http://git-wip-us.apache.org/repos/asf/couchdb/commit/017ebb3f
Tree: http://git-wip-us.apache.org/repos/asf/couchdb/tree/017ebb3f
Diff: http://git-wip-us.apache.org/repos/asf/couchdb/diff/017ebb3f

Branch: refs/heads/1319-large-headers-are-corrupted
Commit: 017ebb3feabbdf1bb569fea500c9d6b9f66bdf4a
Parents: 2c381d8
Author: Paul Joseph Davis <da...@apache.org>
Authored: Wed Sep 28 03:36:31 2011 +0000
Committer: Paul Joseph Davis <da...@apache.org>
Committed: Wed Sep 28 03:36:31 2011 +0000

----------------------------------------------------------------------
 share/www/script/test/recreate_doc.js |   88 ++++++++++++++++++----------
 src/couchdb/couch_doc.erl             |    8 +-
 2 files changed, 60 insertions(+), 36 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/couchdb/blob/017ebb3f/share/www/script/test/recreate_doc.js
----------------------------------------------------------------------
diff --git a/share/www/script/test/recreate_doc.js b/share/www/script/test/recreate_doc.js
index a1cfb8f..f972379 100644
--- a/share/www/script/test/recreate_doc.js
+++ b/share/www/script/test/recreate_doc.js
@@ -81,41 +81,65 @@ couchTests.recreate_doc = function(debug) {
   db.deleteDb();
   db.createDb();
 
-  // COUCHDB-1265
-  // Resuscitate an unavailable old revision and make sure that it
-  // doesn't introduce duplicates into the _changes feed.
-  
-  var doc = {_id: "bar", count: 0};
-  T(db.save(doc).ok);
-  var ghost = {_id: "bar", _rev: doc._rev, count: doc.count};
-  for(var i = 0; i < 2; i++) {
-    doc.count += 1;
-    T(db.save(doc).ok);
+  // Helper function to create a doc with multiple revisions
+  // that are compacted away to ?REV_MISSING.
+
+  var createDoc = function(docid) {
+    var ret = [{_id: docid, count: 0}];
+    T(db.save(ret[0]).ok);
+    for(var i = 0; i < 2; i++) {
+      ret[ret.length] = {
+        _id: docid,
+        _rev: ret[ret.length-1]._rev,
+        count: ret[ret.length-1].count+1
+      };
+      T(db.save(ret[ret.length-1]).ok);
+    }
+    db.compact();
+    while(db.info().compact_running) {}
+    return ret;
   }
 
-  // Compact so that the old revision to be resuscitated will be
-  // in the rev_tree as ?REV_MISSING
+  // Helper function to check that there are no duplicates
+  // in the changes feed and that it has proper update
+  // sequence ordering.
+
+  var checkChanges = function() {
+    // Assert that there are no duplicates in _changes.
+    var req = CouchDB.request("GET", "/test_suite_db/_changes");
+    var resp = JSON.parse(req.responseText);
+    var docids = {};
+    var prev_seq = -1;
+    for(var i = 0; i < resp.results.length; i++) {
+      row = resp.results[i];
+      T(row.seq > prev_seq, "Unordered _changes feed.");
+      T(docids[row.id] === undefined, "Duplicates in _changes feed.");
+      prev_seq = row.seq;
+      docids[row.id] = true;
+    }
+  };
+
+  // COUCHDB-1265 - Check that the changes feed remains proper
+  // after we try and break the update_seq tree.
+
+  // This first case is the one originally reported and "fixed"
+  // in COUCHDB-1265. Reinserting an old revision into the
+  // revision tree causes duplicates in the update_seq tree.
+
+  var revs = createDoc("a");
+  T(db.save(revs[1], {new_edits: false}).ok);
+  T(db.save(revs[revs.length-1]).ok);
+  checkChanges();
+
+  // The original fix for COUCHDB-1265 is not entirely correct
+  // as it didn't consider the possibility that a compaction
+  // might run after the original tree screw up.
+
+  revs = createDoc("b");
+  T(db.save(revs[1], {new_edits: false}).ok);
   db.compact();
   while(db.info().compact_running) {}
+  T(db.save(revs[revs.length-1]).ok);
+  checkChanges();
 
-  // Saving the ghost here puts it back in the rev_tree in such
-  // a way as to create a new update_seq but without changing a
-  // leaf revision. This would cause the #full_doc_info{} and
-  // #doc_info{} records to diverge in their idea of what the
-  // doc's update_seq is and end up introducing a duplicate in
-  // the _changes feed the next time this doc is updated.
-  T(db.save(ghost, {new_edits: false}).ok);
-
-  // The duplicate would have been introduce here becuase the #doc_info{}
-  // would not have been removed correctly.
-  T(db.save(doc).ok);
-
-  // And finally assert that there are no duplicates in _changes.
-  var req = CouchDB.request("GET", "/test_suite_db/_changes");
-  var resp = JSON.parse(req.responseText);
-  var docids = {};
-  for(var i = 0; i < resp.results.length; i++) {
-    T(docids[resp.results[i].id] === undefined, "Duplicates in _changes feed.");
-    docids[resp.results[i].id] = true;
-  }
 };

http://git-wip-us.apache.org/repos/asf/couchdb/blob/017ebb3f/src/couchdb/couch_doc.erl
----------------------------------------------------------------------
diff --git a/src/couchdb/couch_doc.erl b/src/couchdb/couch_doc.erl
index 5e2ee92..7199d07 100644
--- a/src/couchdb/couch_doc.erl
+++ b/src/couchdb/couch_doc.erl
@@ -319,7 +319,7 @@ to_doc_info(FullDocInfo) ->
     {DocInfo, _Path} = to_doc_info_path(FullDocInfo),
     DocInfo.
 
-max_seq(Tree) ->
+max_seq(Tree, UpdateSeq) ->
     FoldFun = fun({_Pos, _Key}, Value, _Type, MaxOldSeq) ->
         case Value of
             {_Deleted, _DiskPos, OldTreeSeq} ->
@@ -331,9 +331,9 @@ max_seq(Tree) ->
                 MaxOldSeq
         end
     end,
-    couch_key_tree:fold(FoldFun, 0, Tree).
+    couch_key_tree:fold(FoldFun, UpdateSeq, Tree).
 
-to_doc_info_path(#full_doc_info{id=Id,rev_tree=Tree}) ->
+to_doc_info_path(#full_doc_info{id=Id,rev_tree=Tree,update_seq=Seq}) ->
     RevInfosAndPath = [
         {#rev_info{
             deleted = element(1, LeafVal),
@@ -351,7 +351,7 @@ to_doc_info_path(#full_doc_info{id=Id,rev_tree=Tree}) ->
         end, RevInfosAndPath),
     [{_RevInfo, WinPath}|_] = SortedRevInfosAndPath,
     RevInfos = [RevInfo || {RevInfo, _Path} <- SortedRevInfosAndPath],
-    {#doc_info{id=Id, high_seq=max_seq(Tree), revs=RevInfos}, WinPath}.
+    {#doc_info{id=Id, high_seq=max_seq(Tree, Seq), revs=RevInfos}, WinPath}.