You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by ch...@apache.org on 2014/06/03 08:06:08 UTC

svn commit: r1599419 - in /jackrabbit/oak/branches/1.0: ./ oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/ oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/blob/ oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/ ...

Author: chetanm
Date: Tue Jun  3 06:06:08 2014
New Revision: 1599419

URL: http://svn.apache.org/r1599419
Log:
OAK-1865 - Merged revision 1592658

Added:
    jackrabbit/oak/branches/1.0/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/blob/FileLineDifferenceIteratorTest.java
      - copied unchanged from r1599416, jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/blob/FileLineDifferenceIteratorTest.java
Modified:
    jackrabbit/oak/branches/1.0/   (props changed)
    jackrabbit/oak/branches/1.0/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java
    jackrabbit/oak/branches/1.0/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/MongoBlobGCTest.java
    jackrabbit/oak/branches/1.0/oak-doc/   (props changed)

Propchange: jackrabbit/oak/branches/1.0/
------------------------------------------------------------------------------
  Merged /jackrabbit/oak/trunk:r1599416

Modified: jackrabbit/oak/branches/1.0/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.0/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java?rev=1599419&r1=1599418&r2=1599419&view=diff
==============================================================================
--- jackrabbit/oak/branches/1.0/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java (original)
+++ jackrabbit/oak/branches/1.0/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java Tue Jun  3 06:06:08 2014
@@ -17,16 +17,13 @@
 package org.apache.jackrabbit.oak.plugins.blob;
 
 import java.io.BufferedWriter;
+import java.io.Closeable;
 import java.io.File;
 import java.io.FileWriter;
 import java.io.IOException;
 import java.sql.Timestamp;
-import java.util.ArrayDeque;
 import java.util.Iterator;
 import java.util.List;
-import java.util.NoSuchElementException;
-import java.util.Set;
-import java.util.TreeSet;
 import java.util.concurrent.Callable;
 import java.util.concurrent.ConcurrentLinkedQueue;
 import java.util.concurrent.ExecutionException;
@@ -38,8 +35,10 @@ import com.google.common.base.Charsets;
 import com.google.common.base.Joiner;
 import com.google.common.base.StandardSystemProperty;
 import com.google.common.base.Stopwatch;
+import com.google.common.collect.AbstractIterator;
+import com.google.common.collect.Iterators;
 import com.google.common.collect.Lists;
-import com.google.common.collect.Sets;
+import com.google.common.collect.PeekingIterator;
 import com.google.common.io.Closeables;
 import com.google.common.io.Files;
 import com.google.common.util.concurrent.ListenableFutureTask;
@@ -97,7 +96,6 @@ public class MarkSweepGarbageCollector i
      * Creates an instance of MarkSweepGarbageCollector
      *
      * @param marker BlobReferenceRetriever instanced used to fetch refereedd blob entries
-     * @param blobStore
      * @param root the root absolute path of directory under which temporary
      *             files would be created
      * @param batchCount batch sized used for saving intermediate state
@@ -160,9 +158,6 @@ public class MarkSweepGarbageCollector i
 
     /**
      * Mark and sweep. Main method for GC.
-     * 
-     * @throws Exception
-     *             the exception
      */
     private void markAndSweep() throws IOException, InterruptedException {
         boolean threw = true;
@@ -222,7 +217,7 @@ public class MarkSweepGarbageCollector i
 
         FileLineDifferenceIterator iter = new FileLineDifferenceIterator(
                 fs.getMarkedRefs(),
-                fs.getAvailableRefs(), batchCount);
+                fs.getAvailableRefs());
 
         BufferedWriter bufferWriter = null;
         try {
@@ -245,6 +240,7 @@ public class MarkSweepGarbageCollector i
             LOG.debug("Found GC candidates - " + numCandidates);
         } finally {
             IOUtils.closeQuietly(bufferWriter);
+            IOUtils.closeQuietly(iter);
         }
 
         LOG.debug("Ending difference phase of the garbage collector");
@@ -449,138 +445,74 @@ public class MarkSweepGarbageCollector i
 
     }
 
+
     /**
      * FileLineDifferenceIterator class which iterates over the difference of 2 files line by line.
      */
-    static class FileLineDifferenceIterator implements Iterator<String> {
-
-        /** The marked references iterator. */
-        private final LineIterator markedIter;
-
-        /** The available references iter. */
-        private final LineIterator allIter;
-
-        private final ArrayDeque<String> queue;
-
-        private final int batchSize;
-
-        private boolean done;
-
-        /** Temporary buffer. */
-        private TreeSet<String> markedBuffer;
-
-        /**
-         * Instantiates a new file line difference iterator.
-         */
-        public FileLineDifferenceIterator(File marked, File available, int batchSize) throws IOException {
-            this.markedIter = FileUtils.lineIterator(marked);
-            this.allIter = FileUtils.lineIterator(available);
-            this.batchSize = batchSize;
-            queue = new ArrayDeque<String>(batchSize);
-            markedBuffer = Sets.newTreeSet();
-
+    static class FileLineDifferenceIterator extends AbstractIterator<String> implements Closeable{
+        private final PeekingIterator<String> peekMarked;
+        private final LineIterator marked;
+        private final LineIterator all;
+
+        public FileLineDifferenceIterator(File marked, File available) throws IOException {
+            this(FileUtils.lineIterator(marked), FileUtils.lineIterator(available));
         }
 
-        /**
-         * Close.
-         */
-        private void close() {
-            LineIterator.closeQuietly(markedIter);
-            LineIterator.closeQuietly(allIter);
+        public FileLineDifferenceIterator(LineIterator marked, LineIterator available) throws IOException {
+            this.marked = marked;
+            this.peekMarked = Iterators.peekingIterator(marked);
+            this.all = available;
         }
 
         @Override
-        public boolean hasNext() {
-            if (!queue.isEmpty()) {
-                return true;
-            } else if (done) {
-                return false;
-            } else {
-                if (!markedIter.hasNext() && !allIter.hasNext()) {
-                    done = true;
-                    close();
-                    return false;
-                } else {
-                    queue.addAll(difference());
-                    if (!queue.isEmpty()) {
-                        return true;
-                    } else {
-                        done = true;
-                        close();
-                    }
-                }
+        protected String computeNext() {
+            String diff = computeNextDiff();
+            if (diff == null) {
+                close();
+                return endOfData();
             }
-
-            return false;
+            return diff;
         }
 
         @Override
-        public String next() {
-            return nextDifference();
+        public void close() {
+            LineIterator.closeQuietly(marked);
+            LineIterator.closeQuietly(all);
         }
 
-        /**
-         * Next difference.
-         * 
-         * @return the string
-         */
-        public String nextDifference() {
-            if (!hasNext()) {
-                throw new NoSuchElementException("No more difference");
+        private String computeNextDiff() {
+            if (!all.hasNext()) {
+                return null;
             }
-            return queue.remove();
-        }
-
-        /**
-         * Difference.
-         * 
-         * @return the sets the
-         */
-        protected Set<String> difference() {
-            TreeSet<String> gcSet = new TreeSet<String>();
-
-            // Iterate till the gc candidate set is at least SAVE_BATCH_COUNT or
-            // the
-            // blob id set iteration is complete
-            while (allIter.hasNext() &&
-                    gcSet.size() < batchSize) {
-                TreeSet<String> allBuffer = new TreeSet<String>();
-
-                while (markedIter.hasNext() &&
-                        markedBuffer.size() < batchSize) {
-                    String stre = markedIter.next();
-                    markedBuffer.add(stre);
-                }
-                while (allIter.hasNext() &&
-                        allBuffer.size() < batchSize) {
-                    String stre = allIter.next();
-                    allBuffer.add(stre);
-                }
 
-                if (markedBuffer.isEmpty()) {
-                    gcSet = allBuffer;
-                } else {
-                    gcSet.addAll(
-                            Sets.difference(allBuffer, markedBuffer));
-
-                    if (allBuffer.last().compareTo(markedBuffer.last()) < 0) {
-                        // filling markedLeftoverBuffer
-                        TreeSet<String> markedLeftoverBuffer = Sets.newTreeSet();
-                        markedLeftoverBuffer.addAll(markedBuffer.tailSet(allBuffer.last(), false));
-                        markedBuffer = markedLeftoverBuffer;
-                        markedLeftoverBuffer = null;
+            //Marked finish the rest of all are part of diff
+            if (!peekMarked.hasNext()) {
+                return all.next();
+            }
+            
+            String diff = null;
+            while (all.hasNext() && diff == null) {
+                diff = all.next();
+                while (peekMarked.hasNext()) {
+                    String marked = peekMarked.peek();
+                    int comparisonResult = diff.compareTo(marked);
+                    if (comparisonResult > 0) {
+                        //Extra entries in marked. Ignore them and move on
+                        peekMarked.next();
+                    } else if (comparisonResult == 0) {
+                        //Matching entry found in marked move past it. Not a
+                        //dif candidate
+                        peekMarked.next();
+                        diff = null;
+                        break;
                     } else {
-                        markedBuffer.clear();
+                        //This entry is not found in marked entries
+                        //hence part of diff
+                        return diff;
                     }
                 }
             }
-
-            return gcSet;
-        }
-
-        @Override
-        public void remove() {
-            throw new UnsupportedOperationException();
+            return diff;
         }
     }
 

Modified: jackrabbit/oak/branches/1.0/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/MongoBlobGCTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.0/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/MongoBlobGCTest.java?rev=1599419&r1=1599418&r2=1599419&view=diff
==============================================================================
--- jackrabbit/oak/branches/1.0/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/MongoBlobGCTest.java (original)
+++ jackrabbit/oak/branches/1.0/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/MongoBlobGCTest.java Tue Jun  3 06:06:08 2014
@@ -16,10 +16,7 @@
  */
 package org.apache.jackrabbit.oak.plugins.document;
 
-import static org.junit.Assert.assertTrue;
-
 import java.io.ByteArrayInputStream;
-import java.io.IOException;
 import java.io.InputStream;
 import java.util.HashSet;
 import java.util.Iterator;
@@ -28,14 +25,12 @@ import java.util.Random;
 import java.util.Set;
 import java.util.concurrent.TimeUnit;
 
-import com.google.common.util.concurrent.MoreExecutors;
-import junit.framework.Assert;
-
 import com.google.common.collect.Lists;
 import com.google.common.collect.Sets;
+import com.google.common.util.concurrent.MoreExecutors;
 import com.mongodb.BasicDBObject;
 import com.mongodb.DBCollection;
-
+import junit.framework.Assert;
 import org.apache.jackrabbit.oak.api.Blob;
 import org.apache.jackrabbit.oak.plugins.blob.MarkSweepGarbageCollector;
 import org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.VersionGCStats;
@@ -46,6 +41,8 @@ import org.apache.jackrabbit.oak.spi.sta
 import org.apache.jackrabbit.oak.stats.Clock;
 import org.junit.Test;
 
+import static org.junit.Assert.assertTrue;
+
 /**
  * Tests for MongoMK GC
  */
@@ -61,7 +58,7 @@ public class MongoBlobGCTest extends Abs
         int number = 10;
         // track the number of the assets to be deleted
         List<Integer> processed = Lists.newArrayList();
-        Random rand = new Random();
+        Random rand = new Random(47);
         for (int i = 0; i < 5; i++) {
             int n = rand.nextInt(number);
             if (!processed.contains(n)) {
@@ -104,6 +101,18 @@ public class MongoBlobGCTest extends Abs
         return set;
     }
 
+    public HashSet<String> addInlined() throws Exception {
+        HashSet<String> set = new HashSet<String>();
+        DocumentNodeStore s = mk.getNodeStore();
+        NodeBuilder a = s.getRoot().builder();
+        int number = 12;
+        for (int i = 0; i < number; i++) {
+            Blob b = s.createBlob(randomStream(i, 50));
+            a.child("cinline" + i).setProperty("x", b);
+        }
+        s.merge(a, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+        return set;
+    }
     private void deleteFromMongo(String nodeId) {
         DBCollection coll = mongoConnection.getDB().getCollection("nodes");
         BasicDBObject blobNodeObj = new BasicDBObject();
@@ -123,18 +132,30 @@ public class MongoBlobGCTest extends Abs
         gc(set);
     }
 
+    @Test
+    public void gcDirectMongoDeleteWithInlined() throws Exception {
+        HashSet<String> set = setUp(true);
+        addInlined();
+        gc(set);
+    }
+    @Test
+    public void gcVersionDeleteWithInlined() throws Exception {
+        HashSet<String> set = setUp(false);
+        addInlined();
+        gc(set);
+    }
     private void gc(HashSet<String> set) throws Exception {
         DocumentNodeStore store = mk.getNodeStore();
         MarkSweepGarbageCollector gc = new MarkSweepGarbageCollector(
                 new DocumentBlobReferenceRetriever(store),
                 (GarbageCollectableBlobStore) store.getBlobStore(),
                 MoreExecutors.sameThreadExecutor(),
-                "./target", 2048, true, 0);
+                "./target", 5, true, 0);
         gc.collectGarbage();
 
         Set<String> existing = iterate();
         boolean empty = Sets.intersection(set, existing).isEmpty();
-        assertTrue(empty);
+        assertTrue(empty && !existing.isEmpty());
     }
 
     protected Set<String> iterate() throws Exception {

Propchange: jackrabbit/oak/branches/1.0/oak-doc/
------------------------------------------------------------------------------
  Merged /jackrabbit/oak/trunk/oak-doc:r1599416