You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by ch...@apache.org on 2014/06/03 08:00:40 UTC

svn commit: r1599416 - in /jackrabbit/oak/trunk/oak-core/src: main/java/org/apache/jackrabbit/oak/plugins/blob/ test/java/org/apache/jackrabbit/oak/plugins/blob/ test/java/org/apache/jackrabbit/oak/plugins/document/

Author: chetanm
Date: Tue Jun  3 06:00:39 2014
New Revision: 1599416

URL: http://svn.apache.org/r1599416
Log:
OAK-1865 - Blob garbage collector deletes referenced blobs for Jackrabbit 2.x DataStores

Applying updated patch from Thomas and Amit

Added:
    jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/blob/FileLineDifferenceIteratorTest.java   (with props)
Modified:
    jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java
    jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/MongoBlobGCTest.java

Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java?rev=1599416&r1=1599415&r2=1599416&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java Tue Jun  3 06:00:39 2014
@@ -17,16 +17,13 @@
 package org.apache.jackrabbit.oak.plugins.blob;
 
 import java.io.BufferedWriter;
+import java.io.Closeable;
 import java.io.File;
 import java.io.FileWriter;
 import java.io.IOException;
 import java.sql.Timestamp;
-import java.util.ArrayDeque;
 import java.util.Iterator;
 import java.util.List;
-import java.util.NoSuchElementException;
-import java.util.Set;
-import java.util.TreeSet;
 import java.util.concurrent.Callable;
 import java.util.concurrent.ConcurrentLinkedQueue;
 import java.util.concurrent.ExecutionException;
@@ -38,8 +35,10 @@ import com.google.common.base.Charsets;
 import com.google.common.base.Joiner;
 import com.google.common.base.StandardSystemProperty;
 import com.google.common.base.Stopwatch;
+import com.google.common.collect.AbstractIterator;
+import com.google.common.collect.Iterators;
 import com.google.common.collect.Lists;
-import com.google.common.collect.Sets;
+import com.google.common.collect.PeekingIterator;
 import com.google.common.io.Closeables;
 import com.google.common.io.Files;
 import com.google.common.util.concurrent.ListenableFutureTask;
@@ -97,7 +96,6 @@ public class MarkSweepGarbageCollector i
      * Creates an instance of MarkSweepGarbageCollector
      *
      * @param marker BlobReferenceRetriever instanced used to fetch refereed blob entries
-     * @param blobStore
      * @param root the root absolute path of directory under which temporary
      *             files would be created
      * @param batchCount batch sized used for saving intermediate state
@@ -160,9 +158,6 @@ public class MarkSweepGarbageCollector i
 
     /**
      * Mark and sweep. Main method for GC.
-     * 
-     * @throws Exception
-     *             the exception
      */
     private void markAndSweep() throws IOException, InterruptedException {
         boolean threw = true;
@@ -222,7 +217,7 @@ public class MarkSweepGarbageCollector i
 
         FileLineDifferenceIterator iter = new FileLineDifferenceIterator(
                 fs.getMarkedRefs(),
-                fs.getAvailableRefs(), batchCount);
+                fs.getAvailableRefs());
 
         BufferedWriter bufferWriter = null;
         try {
@@ -245,6 +240,7 @@ public class MarkSweepGarbageCollector i
             LOG.debug("Found GC candidates - " + numCandidates);
         } finally {
             IOUtils.closeQuietly(bufferWriter);
+            IOUtils.closeQuietly(iter);
         }
 
         LOG.debug("Ending difference phase of the garbage collector");
@@ -449,138 +445,74 @@ public class MarkSweepGarbageCollector i
 
     }
 
+
     /**
      * FileLineDifferenceIterator class which iterates over the difference of 2 files line by line.
      */
-    static class FileLineDifferenceIterator implements Iterator<String> {
-
-        /** The marked references iterator. */
-        private final LineIterator markedIter;
-
-        /** The available references iter. */
-        private final LineIterator allIter;
-
-        private final ArrayDeque<String> queue;
-
-        private final int batchSize;
-
-        private boolean done;
-
-        /** Temporary buffer. */
-        private TreeSet<String> markedBuffer;
-
-        /**
-         * Instantiates a new file line difference iterator.
-         */
-        public FileLineDifferenceIterator(File marked, File available, int batchSize) throws IOException {
-            this.markedIter = FileUtils.lineIterator(marked);
-            this.allIter = FileUtils.lineIterator(available);
-            this.batchSize = batchSize;
-            queue = new ArrayDeque<String>(batchSize);
-            markedBuffer = Sets.newTreeSet();
-
+    static class FileLineDifferenceIterator extends AbstractIterator<String> implements Closeable{
+        private final PeekingIterator<String> peekMarked;
+        private final LineIterator marked;
+        private final LineIterator all;
+
+        public FileLineDifferenceIterator(File marked, File available) throws IOException {
+            this(FileUtils.lineIterator(marked), FileUtils.lineIterator(available));
         }
 
-        /**
-         * Close.
-         */
-        private void close() {
-            LineIterator.closeQuietly(markedIter);
-            LineIterator.closeQuietly(allIter);
+        public FileLineDifferenceIterator(LineIterator marked, LineIterator available) throws IOException {
+            this.marked = marked;
+            this.peekMarked = Iterators.peekingIterator(marked);
+            this.all = available;
         }
 
         @Override
-        public boolean hasNext() {
-            if (!queue.isEmpty()) {
-                return true;
-            } else if (done) {
-                return false;
-            } else {
-                if (!markedIter.hasNext() && !allIter.hasNext()) {
-                    done = true;
-                    close();
-                    return false;
-                } else {
-                    queue.addAll(difference());
-                    if (!queue.isEmpty()) {
-                        return true;
-                    } else {
-                        done = true;
-                        close();
-                    }
-                }
+        protected String computeNext() {
+            String diff = computeNextDiff();
+            if (diff == null) {
+                close();
+                return endOfData();
             }
-
-            return false;
+            return diff;
         }
 
         @Override
-        public String next() {
-            return nextDifference();
+        public void close() {
+            LineIterator.closeQuietly(marked);
+            LineIterator.closeQuietly(all);
         }
 
-        /**
-         * Next difference.
-         * 
-         * @return the string
-         */
-        public String nextDifference() {
-            if (!hasNext()) {
-                throw new NoSuchElementException("No more difference");
+        private String computeNextDiff() {
+            if (!all.hasNext()) {
+                return null;
             }
-            return queue.remove();
-        }
-
-        /**
-         * Difference.
-         * 
-         * @return the sets the
-         */
-        protected Set<String> difference() {
-            TreeSet<String> gcSet = new TreeSet<String>();
-
-            // Iterate till the gc candidate set is at least SAVE_BATCH_COUNT or
-            // the
-            // blob id set iteration is complete
-            while (allIter.hasNext() &&
-                    gcSet.size() < batchSize) {
-                TreeSet<String> allBuffer = new TreeSet<String>();
-
-                while (markedIter.hasNext() &&
-                        markedBuffer.size() < batchSize) {
-                    String stre = markedIter.next();
-                    markedBuffer.add(stre);
-                }
-                while (allIter.hasNext() &&
-                        allBuffer.size() < batchSize) {
-                    String stre = allIter.next();
-                    allBuffer.add(stre);
-                }
 
-                if (markedBuffer.isEmpty()) {
-                    gcSet = allBuffer;
-                } else {
-                    gcSet.addAll(
-                            Sets.difference(allBuffer, markedBuffer));
-
-                    if (allBuffer.last().compareTo(markedBuffer.last()) < 0) {
-                        // filling markedLeftoverBuffer
-                        TreeSet<String> markedLeftoverBuffer = Sets.newTreeSet();
-                        markedLeftoverBuffer.addAll(markedBuffer.tailSet(allBuffer.last(), false));
-                        markedBuffer = markedLeftoverBuffer;
-                        markedLeftoverBuffer = null;
+            //Marked finish the rest of all are part of diff
+            if (!peekMarked.hasNext()) {
+                return all.next();
+            }
+            
+            String diff = null;
+            while (all.hasNext() && diff == null) {
+                diff = all.next();
+                while (peekMarked.hasNext()) {
+                    String marked = peekMarked.peek();
+                    int comparisonResult = diff.compareTo(marked);
+                    if (comparisonResult > 0) {
+                        //Extra entries in marked. Ignore them and move on
+                        peekMarked.next();
+                    } else if (comparisonResult == 0) {
+                        //Matching entry found in marked move past it. Not a
+                        //dif candidate
+                        peekMarked.next();
+                        diff = null;
+                        break;
                     } else {
-                        markedBuffer.clear();
+                        //This entry is not found in marked entries
+                        //hence part of diff
+                        return diff;
                     }
                 }
             }
-
-            return gcSet;
-        }
-
-        @Override
-        public void remove() {
-            throw new UnsupportedOperationException();
+            return diff;
         }
     }
 

Added: jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/blob/FileLineDifferenceIteratorTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/blob/FileLineDifferenceIteratorTest.java?rev=1599416&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/blob/FileLineDifferenceIteratorTest.java (added)
+++ jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/blob/FileLineDifferenceIteratorTest.java Tue Jun  3 06:00:39 2014
@@ -0,0 +1,108 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.jackrabbit.oak.plugins.blob;
+
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Random;
+import java.util.TreeSet;
+
+import com.google.common.base.Joiner;
+import com.google.common.base.Splitter;
+import com.google.common.base.StandardSystemProperty;
+import com.google.common.collect.ImmutableList;
+import org.apache.commons.io.LineIterator;
+import org.junit.Test;
+
+import static java.util.Arrays.asList;
+import static org.apache.jackrabbit.oak.plugins.blob.MarkSweepGarbageCollector.FileLineDifferenceIterator;
+import static org.hamcrest.CoreMatchers.is;
+import static org.junit.Assert.assertThat;
+
+public class FileLineDifferenceIteratorTest {
+    
+    @Test
+    public void testRandomized() throws Exception {
+        Random r = new Random(0);
+        for (int i = 0; i < 10000; i++) {
+            TreeSet<String> marked = new TreeSet<String>();
+            TreeSet<String> all = new TreeSet<String>();
+            TreeSet<String> diff = new TreeSet<String>();
+            int size = r.nextInt(5);
+            for (int a = 0; a < size; a++) {
+                marked.add("" + r.nextInt(10));
+            }
+            size = r.nextInt(5);
+            for (int a = 0; a < size; a++) {
+                all.add("" + r.nextInt(10));
+            }
+            diff.addAll(all);
+            diff.removeAll(marked);
+            String m = marked.toString().replaceAll("[ \\[\\]]", "");
+            String a = all.toString().replaceAll("[ \\[\\]]", "");
+            assertDiff(m, a,
+                    new ArrayList<String>(diff));
+        }
+    }
+    
+    @Test
+    public void testNoDiff() throws Exception {
+        assertDiff("a,b,c", "a,b,c", Collections.<String> emptyList());
+        assertDiff("a,b,c,d,f", "a,b,f", Collections.<String> emptyList());
+    }
+
+    @Test
+    public void testSimpleDiff() throws Exception {
+        assertDiff("a,b", "a,b,c", asList("c"));
+        assertDiff("a,b", "", Collections.<String> emptyList());
+        assertDiff("", "", Collections.<String> emptyList());
+        assertDiff("", "a", asList("a"));
+        assertDiff("", "a, b", asList("a", "b"));
+    }
+
+    @Test
+    public void testDiffWithExtraEntriesInMarked() throws IOException {
+        assertDiff("a,b", "a,b,c, e, h", asList("c", "e", "h"));
+        assertDiff("a,b,d,e", "a,b,c", asList("c"));
+        assertDiff("a,b,d,e,f", "a,b,c,f", asList("c"));
+        assertDiff("a,b,d,e,f", "a,b,c,f, h", asList("c", "h"));
+        assertDiff("3,7", "2,3,5,9", asList("2", "5", "9"));
+    }
+
+    private static void assertDiff(String marked, String all, List<String> diff) throws IOException {
+        Iterator<String> itr = createItr(marked, all);
+        assertThat("marked: " + marked + " all: " + all, ImmutableList.copyOf(itr), is(diff));
+    }
+
+    private static Iterator<String> createItr(String marked, String all) throws IOException {
+        return new FileLineDifferenceIterator(lineItr(marked), lineItr(all));
+    }
+
+    private static LineIterator lineItr(String seq) {
+        Iterable<String> seqItr = Splitter.on(',').trimResults().split(seq);
+        String lines = Joiner.on(StandardSystemProperty.LINE_SEPARATOR.value()).join(seqItr);
+        return new LineIterator(new StringReader(lines));
+    }
+
+}

Propchange: jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/blob/FileLineDifferenceIteratorTest.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/MongoBlobGCTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/MongoBlobGCTest.java?rev=1599416&r1=1599415&r2=1599416&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/MongoBlobGCTest.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/MongoBlobGCTest.java Tue Jun  3 06:00:39 2014
@@ -16,10 +16,7 @@
  */
 package org.apache.jackrabbit.oak.plugins.document;
 
-import static org.junit.Assert.assertTrue;
-
 import java.io.ByteArrayInputStream;
-import java.io.IOException;
 import java.io.InputStream;
 import java.util.HashSet;
 import java.util.Iterator;
@@ -28,14 +25,12 @@ import java.util.Random;
 import java.util.Set;
 import java.util.concurrent.TimeUnit;
 
-import com.google.common.util.concurrent.MoreExecutors;
-import junit.framework.Assert;
-
 import com.google.common.collect.Lists;
 import com.google.common.collect.Sets;
+import com.google.common.util.concurrent.MoreExecutors;
 import com.mongodb.BasicDBObject;
 import com.mongodb.DBCollection;
-
+import junit.framework.Assert;
 import org.apache.jackrabbit.oak.api.Blob;
 import org.apache.jackrabbit.oak.plugins.blob.MarkSweepGarbageCollector;
 import org.apache.jackrabbit.oak.plugins.document.VersionGarbageCollector.VersionGCStats;
@@ -46,6 +41,8 @@ import org.apache.jackrabbit.oak.spi.sta
 import org.apache.jackrabbit.oak.stats.Clock;
 import org.junit.Test;
 
+import static org.junit.Assert.assertTrue;
+
 /**
  * Tests for MongoMK GC
  */
@@ -61,7 +58,7 @@ public class MongoBlobGCTest extends Abs
         int number = 10;
         // track the number of the assets to be deleted
         List<Integer> processed = Lists.newArrayList();
-        Random rand = new Random();
+        Random rand = new Random(47);
         for (int i = 0; i < 5; i++) {
             int n = rand.nextInt(number);
             if (!processed.contains(n)) {
@@ -104,6 +101,18 @@ public class MongoBlobGCTest extends Abs
         return set;
     }
 
+    public HashSet<String> addInlined() throws Exception {
+        HashSet<String> set = new HashSet<String>();
+        DocumentNodeStore s = mk.getNodeStore();
+        NodeBuilder a = s.getRoot().builder();
+        int number = 12;
+        for (int i = 0; i < number; i++) {
+            Blob b = s.createBlob(randomStream(i, 50));
+            a.child("cinline" + i).setProperty("x", b);
+        }
+        s.merge(a, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+        return set;
+    }
     private void deleteFromMongo(String nodeId) {
         DBCollection coll = mongoConnection.getDB().getCollection("nodes");
         BasicDBObject blobNodeObj = new BasicDBObject();
@@ -123,18 +132,30 @@ public class MongoBlobGCTest extends Abs
         gc(set);
     }
 
+    @Test
+    public void gcDirectMongoDeleteWithInlined() throws Exception {
+        HashSet<String> set = setUp(true);
+        addInlined();
+        gc(set);
+    }
+    @Test
+    public void gcVersionDeleteWithInlined() throws Exception {
+        HashSet<String> set = setUp(false);
+        addInlined();
+        gc(set);
+    }
     private void gc(HashSet<String> set) throws Exception {
         DocumentNodeStore store = mk.getNodeStore();
         MarkSweepGarbageCollector gc = new MarkSweepGarbageCollector(
                 new DocumentBlobReferenceRetriever(store),
                 (GarbageCollectableBlobStore) store.getBlobStore(),
                 MoreExecutors.sameThreadExecutor(),
-                "./target", 2048, true, 0);
+                "./target", 5, true, 0);
         gc.collectGarbage();
 
         Set<String> existing = iterate();
         boolean empty = Sets.intersection(set, existing).isEmpty();
-        assertTrue(empty);
+        assertTrue(empty && !existing.isEmpty());
     }
 
     protected Set<String> iterate() throws Exception {