You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by ch...@apache.org on 2015/03/12 16:22:47 UTC

svn commit: r1666220 - in /jackrabbit/oak/trunk: oak-commons/ oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/ oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/ oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/d...

Author: chetanm
Date: Thu Mar 12 15:22:46 2015
New Revision: 1666220

URL: http://svn.apache.org/r1666220
Log:
OAK-2557 - VersionGC uses way too much memory if there is a large pile of garbage

Added:
    jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java   (with props)
    jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/StringSortTest.java   (with props)
Modified:
    jackrabbit/oak/trunk/oak-commons/pom.xml
    jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java
    jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
    jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCDeletionTest.java
    jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCWithSplitTest.java

Modified: jackrabbit/oak/trunk/oak-commons/pom.xml
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/pom.xml?rev=1666220&r1=1666219&r2=1666220&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-commons/pom.xml (original)
+++ jackrabbit/oak/trunk/oak-commons/pom.xml Thu Mar 12 15:22:46 2015
@@ -93,6 +93,11 @@
       <artifactId>oak-mk-api</artifactId>
       <version>${project.version}</version>
     </dependency>
+    <dependency>
+      <groupId>commons-io</groupId>
+      <artifactId>commons-io</artifactId>
+      <version>2.4</version>
+    </dependency>
 
     <!-- Test dependencies -->
     <dependency>

Added: jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java?rev=1666220&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java (added)
+++ jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java Thu Mar 12 15:22:46 2015
@@ -0,0 +1,255 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.jackrabbit.oak.commons.sort;
+
+import java.io.BufferedWriter;
+import java.io.Closeable;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.Reader;
+import java.nio.charset.Charset;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.Iterator;
+import java.util.List;
+
+import com.google.common.base.Charsets;
+import com.google.common.collect.Lists;
+import com.google.common.io.Closer;
+import com.google.common.io.Files;
+import org.apache.commons.io.FileUtils;
+import org.apache.commons.io.LineIterator;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Utility class to store a list of string and perform sort on that. For small size
+ * the list would be maintained in memory. If the size crosses the required threshold then
+ * the sorting would be performed externally
+ */
+public class StringSort implements Closeable {
+    private final Logger log = LoggerFactory.getLogger(getClass());
+    public static final int BATCH_SIZE = 2048;
+
+    private final int overflowToDiskThreshold;
+    private final Comparator<String> comparator;
+
+    private final List<String> ids = Lists.newArrayList();
+    private long size;
+
+    private final List<String> inMemBatch = Lists.newArrayList();
+
+    private boolean useFile;
+    private PersistentState persistentState;
+
+    public StringSort(int overflowToDiskThreshold, Comparator<String> comparator) {
+        this.overflowToDiskThreshold = overflowToDiskThreshold;
+        this.comparator = comparator;
+    }
+
+    public void add(String id) throws IOException {
+        if (useFile) {
+            addToBatch(id);
+        } else {
+            ids.add(id);
+            if (ids.size() >= overflowToDiskThreshold) {
+                flushToFile(ids);
+                useFile = true;
+                log.debug("In memory buffer crossed the threshold of {}. " +
+                        "Switching to filesystem [{}] to manage the state", overflowToDiskThreshold, persistentState);
+            }
+        }
+        size++;
+    }
+
+    public void sort() throws IOException {
+        if (useFile) {
+            //Flush the last batch
+            flushToFile(inMemBatch);
+            persistentState.sort();
+        } else {
+            Collections.sort(ids, comparator);
+        }
+    }
+
+    public Iterator<String> getIds() throws IOException {
+        if (useFile) {
+            return persistentState.getIterator();
+        } else {
+            return ids.iterator();
+        }
+    }
+
+    public long getSize() {
+        return size;
+    }
+
+    public boolean isEmpty() {
+        return size == 0;
+    }
+
+    public boolean usingFile() {
+        return useFile;
+    }
+
+    @Override
+    public void close() throws IOException {
+        if (persistentState != null) {
+            persistentState.close();
+        }
+    }
+
+    private void addToBatch(String id) throws IOException {
+        inMemBatch.add(id);
+        if (inMemBatch.size() >= BATCH_SIZE) {
+            flushToFile(inMemBatch);
+        }
+    }
+
+    private void flushToFile(List<String> ids) throws IOException {
+        BufferedWriter w = getPersistentState().getWriter();
+        for (String id : ids) {
+            w.write(id);
+            w.newLine();
+        }
+        ids.clear();
+    }
+
+    private PersistentState getPersistentState() {
+        //Lazily initialize the persistent state
+        if (persistentState == null) {
+            persistentState = new PersistentState(comparator);
+        }
+        return persistentState;
+    }
+
+    private static class PersistentState implements Closeable {
+        /**
+         * Maximum loop count when creating temp directories.
+         */
+        private static final int TEMP_DIR_ATTEMPTS = 10000;
+
+        private final Charset charset = Charsets.UTF_8;
+        private final File workDir;
+        private final Comparator<String> comparator;
+        private File idFile;
+        private File sortedFile;
+        private BufferedWriter writer;
+        private List<CloseableIterator> openedIterators = Lists.newArrayList();
+
+        public PersistentState(Comparator<String> comparator) {
+            this(comparator, createTempDir("oak-sorter-"));
+        }
+
+        public PersistentState(Comparator<String> comparator, File workDir) {
+            this.workDir = workDir;
+            this.comparator = comparator;
+        }
+
+        public BufferedWriter getWriter() throws FileNotFoundException {
+            if (idFile == null) {
+                idFile = new File(workDir, "strings.txt");
+                sortedFile = new File(workDir, "strings-sorted.txt");
+                writer = Files.newWriter(idFile, charset);
+            }
+            return writer;
+        }
+
+        public void sort() throws IOException {
+            closeWriter();
+
+            List<File> sortedFiles = ExternalSort.sortInBatch(idFile,
+                    comparator, //Comparator to use
+                    ExternalSort.DEFAULTMAXTEMPFILES,
+                    ExternalSort.DEFAULT_MAX_MEM_BYTES,
+                    charset, //charset
+                    workDir,  //temp directory where intermediate files are created
+                    true //distinct
+            );
+
+            ExternalSort.mergeSortedFiles(sortedFiles,
+                    sortedFile,
+                    comparator,
+                    charset,
+                    true
+            );
+        }
+
+        public Iterator<String> getIterator() throws IOException {
+            CloseableIterator itr = new CloseableIterator(Files.newReader(sortedFile, charset));
+            openedIterators.add(itr);
+            return itr;
+        }
+
+        @Override
+        public String toString() {
+            return "PersistentState : workDir=" + workDir.getAbsolutePath();
+        }
+
+        @Override
+        public void close() throws IOException {
+            Closer closer = Closer.create();
+            try {
+                closer.register(writer);
+                for (CloseableIterator citr : openedIterators) {
+                    closer.register(citr);
+                }
+                closer.register(new Closeable() {
+                    @Override
+                    public void close() throws IOException {
+                        FileUtils.deleteDirectory(workDir);
+                    }
+                });
+            } finally {
+                closer.close();
+            }
+        }
+
+        private void closeWriter() throws IOException {
+            writer.close();
+        }
+
+        /**
+         * Taken from com.google.common.io.Files#createTempDir()
+         * Modified to provide a prefix
+         */
+        private static File createTempDir(String prefix) {
+            File baseDir = new File(System.getProperty("java.io.tmpdir"));
+            String baseName = System.currentTimeMillis() + "-";
+
+            for (int counter = 0; counter < TEMP_DIR_ATTEMPTS; counter++) {
+                File tempDir = new File(baseDir, prefix + baseName + counter);
+                if (tempDir.mkdir()) {
+                    return tempDir;
+                }
+            }
+            throw new IllegalStateException("Failed to create directory within "
+                    + TEMP_DIR_ATTEMPTS + " attempts (tried "
+                    + baseName + "0 to " + baseName + (TEMP_DIR_ATTEMPTS - 1) + ')');
+        }
+    }
+
+    private static class CloseableIterator extends LineIterator implements Closeable {
+        public CloseableIterator(Reader reader) throws IllegalArgumentException {
+            super(reader);
+        }
+    }
+}

Propchange: jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/StringSortTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/StringSortTest.java?rev=1666220&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/StringSortTest.java (added)
+++ jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/StringSortTest.java Thu Mar 12 15:22:46 2015
@@ -0,0 +1,144 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.jackrabbit.oak.commons.sort;
+
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import com.google.common.base.Joiner;
+import com.google.common.collect.Collections2;
+import com.google.common.collect.ImmutableList;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+public class StringSortTest {
+    private Comparator<String> comparator = new PathComparator();
+    private StringSort collector;
+
+    @Test
+    public void inMemory() throws Exception{
+        List<String> paths = createTestPaths(5, false);
+        collector = new StringSort(paths.size() + 1,comparator);
+        addPathsToCollector(paths);
+
+        assertConstraints(paths);
+        assertFalse(collector.usingFile());
+        collector.close();
+    }
+
+    @Test
+    public void overflowToDisk() throws Exception{
+        //Create ~50k paths
+        List<String> paths = createTestPaths(10, true);
+        collector = new StringSort(1000, comparator);
+        addPathsToCollector(paths);
+
+        assertTrue(collector.usingFile());
+        assertConstraints(paths);
+
+        collector.close();
+    }
+
+    private void assertConstraints(List<String> paths) throws IOException {
+        assertEquals(paths.size(), collector.getSize());
+
+        Collections.sort(paths, comparator);
+        collector.sort();
+
+        List<String> sortedPaths = ImmutableList.copyOf(collector.getIds());
+        assertEquals(paths.size(), sortedPaths.size());
+        assertEquals(paths, sortedPaths);
+    }
+
+    private void addPathsToCollector(Iterable<String> paths) throws IOException {
+        for (String path : paths){
+            collector.add(path);
+        }
+    }
+
+    private static List<String> createTestPaths(int depth, boolean permutation){
+        List<String> rootPaths = Arrays.asList("a", "b", "c", "d", "e", "f", "g");
+        List<String> paths = new ArrayList<String>();
+
+
+        if (permutation){
+            List<String> newRoots = new ArrayList<String>();
+            for (List<String> permuts : Collections2.orderedPermutations(rootPaths)){
+                newRoots.add(Joiner.on("").join(permuts));
+            }
+            rootPaths = newRoots;
+        }
+
+        for (String root : rootPaths){
+            List<String> pathElements = new ArrayList<String>();
+            pathElements.add(root);
+            paths.add(createId(pathElements));
+            for (int i = 0; i < depth; i++){
+                pathElements.add(root + i);
+                paths.add(createId(pathElements));
+            }
+        }
+
+        Set<String> idSet = new HashSet<String>(paths);
+        assertEquals(paths.size(), idSet.size());
+
+        Collections.shuffle(paths);
+        return paths;
+    }
+
+    private static String createId(Iterable<String> pathElements){
+        return "/" + Joiner.on('/').join(pathElements);
+    }
+
+    private static  class PathComparator implements Comparator<String>, Serializable {
+        @Override
+        public int compare(String o1, String o2) {
+            int d1 = pathDepth(o1);
+            int d2 = pathDepth(o2);
+            if (d1 != d2) {
+                return Integer.signum(d2 - d1);
+            }
+            return o1.compareTo(o2);
+        }
+
+        private static int pathDepth(String path) {
+            if (path.equals("/")) {
+                return 0;
+            }
+            int depth = 0;
+            for (int i = 0; i < path.length(); i++) {
+                if (path.charAt(i) == '/') {
+                    depth++;
+                }
+            }
+            return depth;
+        }
+    }
+}

Propchange: jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/StringSortTest.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java?rev=1666220&r1=1666219&r2=1666220&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java Thu Mar 12 15:22:46 2015
@@ -574,7 +574,11 @@ public class DocumentNodeStoreService {
         RevisionGC revisionGC = new RevisionGC(new Runnable() {
             @Override
             public void run() {
-                store.getVersionGarbageCollector().gc(versionGcMaxAgeInSecs, TimeUnit.SECONDS);
+                try {
+                    store.getVersionGarbageCollector().gc(versionGcMaxAgeInSecs, TimeUnit.SECONDS);
+                } catch (IOException e) {
+                    log.warn("Error occurred while executing the Version Garbage Collector", e);
+                }
             }
         }, executor);
         registrations.add(registerMBean(whiteboard, RevisionGCMBean.class, revisionGC,

Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java?rev=1666220&r1=1666219&r2=1666220&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java Thu Mar 12 15:22:46 2015
@@ -19,9 +19,9 @@
 
 package org.apache.jackrabbit.oak.plugins.document;
 
-import java.util.ArrayList;
-import java.util.Collections;
+import java.io.IOException;
 import java.util.EnumSet;
+import java.util.Iterator;
 import java.util.List;
 import java.util.Set;
 import java.util.concurrent.TimeUnit;
@@ -31,18 +31,23 @@ import com.google.common.base.StandardSy
 import com.google.common.base.Stopwatch;
 import com.google.common.collect.ImmutableList;
 
+import org.apache.jackrabbit.oak.commons.sort.StringSort;
 import org.apache.jackrabbit.oak.plugins.document.util.Utils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import static com.google.common.collect.Iterators.partition;
 import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType.COMMIT_ROOT_ONLY;
 import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType.DEFAULT_LEAF;
 
 public class VersionGarbageCollector {
+    //Kept less than MongoDocumentStore.IN_CLAUSE_BATCH_SIZE to avoid re-partitioning
+    private static final int DELETE_BATCH_SIZE = 450;
     private final DocumentNodeStore nodeStore;
     private final VersionGCSupport versionStore;
+    private int overflowToDiskThreshold = 100000;
 
-    private final Logger log = LoggerFactory.getLogger(getClass());
+    private static final Logger log = LoggerFactory.getLogger(VersionGarbageCollector.class);
 
     /**
      * Split document types which can be safely garbage collected
@@ -56,7 +61,7 @@ public class VersionGarbageCollector {
         this.versionStore = gcSupport;
     }
 
-    public VersionGCStats gc(long maxRevisionAge, TimeUnit unit) {
+    public VersionGCStats gc(long maxRevisionAge, TimeUnit unit) throws IOException {
         long maxRevisionAgeInMillis = unit.toMillis(maxRevisionAge);
         Stopwatch sw = Stopwatch.createStarted();
         VersionGCStats stats = new VersionGCStats();
@@ -85,41 +90,60 @@ public class VersionGarbageCollector {
         return stats;
     }
 
+    public void setOverflowToDiskThreshold(int overflowToDiskThreshold) {
+        this.overflowToDiskThreshold = overflowToDiskThreshold;
+    }
+
     private void collectSplitDocuments(VersionGCStats stats, long oldestRevTimeStamp) {
         versionStore.deleteSplitDocuments(GC_TYPES, oldestRevTimeStamp, stats);
     }
 
-    private void collectDeletedDocuments(VersionGCStats stats, Revision headRevision, long oldestRevTimeStamp) {
-        List<String> docIdsToDelete = new ArrayList<String>();
-        Iterable<NodeDocument> itr = versionStore.getPossiblyDeletedDocs(oldestRevTimeStamp);
+    private void collectDeletedDocuments(VersionGCStats stats, Revision headRevision, long oldestRevTimeStamp)
+            throws IOException {
+        StringSort docIdsToDelete = new StringSort(overflowToDiskThreshold, NodeDocumentIdComparator.INSTANCE);
         try {
-            for (NodeDocument doc : itr) {
-                //Check if node is actually deleted at current revision
-                //As node is not modified since oldestRevTimeStamp then
-                //this node has not be revived again in past maxRevisionAge
-                //So deleting it is safe
-                if (doc.getNodeAtRevision(nodeStore, headRevision, null) == null) {
-                    docIdsToDelete.add(doc.getId());
-                    //Collect id of all previous docs also
-                    for (NodeDocument prevDoc : ImmutableList.copyOf(doc.getAllPreviousDocs())) {
-                        docIdsToDelete.add(prevDoc.getId());
+            Iterable<NodeDocument> itr = versionStore.getPossiblyDeletedDocs(oldestRevTimeStamp);
+            try {
+                for (NodeDocument doc : itr) {
+                    //Check if node is actually deleted at current revision
+                    //As node is not modified since oldestRevTimeStamp then
+                    //this node has not be revived again in past maxRevisionAge
+                    //So deleting it is safe
+                    if (doc.getNodeAtRevision(nodeStore, headRevision, null) == null) {
+                        docIdsToDelete.add(doc.getId());
+                        //Collect id of all previous docs also
+                        for (NodeDocument prevDoc : ImmutableList.copyOf(doc.getAllPreviousDocs())) {
+                            docIdsToDelete.add(prevDoc.getId());
+                        }
                     }
                 }
+            } finally {
+                Utils.closeIfCloseable(itr);
+            }
+
+            if (docIdsToDelete.isEmpty()){
+                return;
             }
-        } finally {
-            Utils.closeIfCloseable(itr);
-        }
 
-        Collections.sort(docIdsToDelete, NodeDocumentIdComparator.INSTANCE);
+            docIdsToDelete.sort();
+            log.info("Proceeding to delete [{}] documents", docIdsToDelete.getSize());
 
-        if(log.isDebugEnabled()) {
-            StringBuilder sb = new StringBuilder("Deleted document with following ids were deleted as part of GC \n");
-            Joiner.on(StandardSystemProperty.LINE_SEPARATOR.value()).appendTo(sb, docIdsToDelete);
-            log.debug(sb.toString());
+            if (log.isDebugEnabled() && docIdsToDelete.getSize() < 1000) {
+                StringBuilder sb = new StringBuilder("Deleted document with following ids were deleted as part of GC \n");
+                Joiner.on(StandardSystemProperty.LINE_SEPARATOR.value()).appendTo(sb, docIdsToDelete.getIds());
+                log.debug(sb.toString());
+            }
+
+            Iterator<List<String>> idListItr = partition(docIdsToDelete.getIds(), DELETE_BATCH_SIZE);
+            while (idListItr.hasNext()) {
+                nodeStore.getDocumentStore().remove(Collection.NODES, idListItr.next());
+            }
+
+            nodeStore.invalidateDocChildrenCache();
+            stats.deletedDocGCCount += docIdsToDelete.getSize();
+        } finally {
+            docIdsToDelete.close();
         }
-        nodeStore.getDocumentStore().remove(Collection.NODES, docIdsToDelete);
-        nodeStore.invalidateDocChildrenCache();
-        stats.deletedDocGCCount += docIdsToDelete.size();
     }
 
     public static class VersionGCStats {

Modified: jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCDeletionTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCDeletionTest.java?rev=1666220&r1=1666219&r2=1666220&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCDeletionTest.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCDeletionTest.java Thu Mar 12 15:22:46 2015
@@ -36,6 +36,7 @@ import org.junit.Before;
 import org.junit.Test;
 
 import static java.util.concurrent.TimeUnit.HOURS;
+import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNull;
 import static org.junit.Assert.fail;
 
@@ -104,6 +105,53 @@ public class VersionGCDeletionTest {
         assertNull(ts.find(Collection.NODES, "1:/x"));
     }
 
+    @Test
+    public void deleteLargeNumber() throws Exception{
+        int noOfDocsToDelete = 10000;
+        DocumentStore ts = new MemoryDocumentStore();
+        store = new DocumentMK.Builder()
+                .clock(clock)
+                .setDocumentStore(new MemoryDocumentStore())
+                .setAsyncDelay(0)
+                .getNodeStore();
+
+        //Baseline the clock
+        clock.waitUntil(Revision.getCurrentTimestamp());
+
+        NodeBuilder b1 = store.getRoot().builder();
+        NodeBuilder xb = b1.child("x");
+        for (int i = 0; i < noOfDocsToDelete; i++){
+            xb.child("a"+i).child("b"+i);
+        }
+        store.merge(b1, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+
+        long maxAge = 1; //hours
+        long delta = TimeUnit.MINUTES.toMillis(10);
+
+        //Remove x/y
+        NodeBuilder b2 = store.getRoot().builder();
+        b2.child("x").remove();
+        store.merge(b2, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+
+        store.runBackgroundOperations();
+
+        //3. Check that deleted doc does get collected post maxAge
+        clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) + delta);
+        VersionGarbageCollector gc = store.getVersionGarbageCollector();
+        gc.setOverflowToDiskThreshold(100);
+
+        VersionGarbageCollector.VersionGCStats stats = gc.gc(maxAge * 2, HOURS);
+        assertEquals(noOfDocsToDelete * 2 + 1, stats.deletedDocGCCount);
+
+
+        assertNull(ts.find(Collection.NODES, "1:/x"));
+
+        for (int i = 0; i < noOfDocsToDelete; i++){
+            assertNull(ts.find(Collection.NODES, "2:/a"+i+"/b"+i));
+            assertNull(ts.find(Collection.NODES, "1:/a"+i));
+        }
+    }
+
     private static class TestDocumentStore extends MemoryDocumentStore {
         boolean throwException;
         @Override

Modified: jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCWithSplitTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCWithSplitTest.java?rev=1666220&r1=1666219&r2=1666220&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCWithSplitTest.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCWithSplitTest.java Thu Mar 12 15:22:46 2015
@@ -137,7 +137,11 @@ public class VersionGCWithSplitTest {
         Thread t = new Thread(new Runnable() {
             @Override
             public void run() {
-                stats.set(gc.gc(1, HOURS));
+                try {
+                    stats.set(gc.gc(1, HOURS));
+                } catch (IOException e) {
+                    throw new RuntimeException(e);
+                }
             }
         });
 



Re: svn commit: r1666220 - in /jackrabbit/oak/trunk: oak-commons/ oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/ oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/ oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/d...

Posted by Chetan Mehrotra <ch...@gmail.com>.
Thanks Amit for confirming it!
Chetan Mehrotra


On Fri, Mar 13, 2015 at 11:53 AM, Amit Jain <am...@ieee.org> wrote:
> The tests are passing for me on windows with the latest change.
>
> Thanks
> Amit
>
> On Fri, Mar 13, 2015 at 9:21 AM, Chetan Mehrotra <ch...@gmail.com>
> wrote:
>
>> Looks like Closer closes the closeables in LIFO manner due to which
>> directory containing that file got deleted first. I have change the
>> logic now.
>>
>> Let me know if the test passes for you on Windows
>> Chetan Mehrotra
>>
>>
>> On Thu, Mar 12, 2015 at 10:21 PM, Julian Reschke <ju...@gmx.de>
>> wrote:
>> > With this change, I get a reliable test failure on Windows:
>> >
>> >
>> > Tests in error:
>> >
>> > overflowToDisk(org.apache.jackrabbit.oak.commons.sort.StringSortTest):
>> > Unable to delete file:
>> C:\tmp\oak-sorter-1426178913437-0\strings-sorted.txt
>> >
>> >
>> > Best regards, Julian
>> >
>> >
>> > On 2015-03-12 16:22, chetanm@apache.org wrote:
>> >>
>> >> Author: chetanm
>> >> Date: Thu Mar 12 15:22:46 2015
>> >> New Revision: 1666220
>> >>
>> >> URL: http://svn.apache.org/r1666220
>> >> Log:
>> >> OAK-2557 - VersionGC uses way too much memory if there is a large pile
>> of
>> >> garbage
>> >>
>> >> Added:
>> >>
>> >>
>> jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java
>> >> (with props)
>> >>
>> >>
>> jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/StringSortTest.java
>> >> (with props)
>> >> Modified:
>> >>      jackrabbit/oak/trunk/oak-commons/pom.xml
>> >>
>> >>
>> jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java
>> >>
>> >>
>> jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
>> >>
>> >>
>> jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCDeletionTest.java
>> >>
>> >>
>> jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCWithSplitTest.java
>> >>
>> >> Modified: jackrabbit/oak/trunk/oak-commons/pom.xml
>> >> URL:
>> >>
>> http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/pom.xml?rev=1666220&r1=1666219&r2=1666220&view=diff
>> >>
>> >>
>> ==============================================================================
>> >> --- jackrabbit/oak/trunk/oak-commons/pom.xml (original)
>> >> +++ jackrabbit/oak/trunk/oak-commons/pom.xml Thu Mar 12 15:22:46 2015
>> >> @@ -93,6 +93,11 @@
>> >>         <artifactId>oak-mk-api</artifactId>
>> >>         <version>${project.version}</version>
>> >>       </dependency>
>> >> +    <dependency>
>> >> +      <groupId>commons-io</groupId>
>> >> +      <artifactId>commons-io</artifactId>
>> >> +      <version>2.4</version>
>> >> +    </dependency>
>> >>
>> >>       <!-- Test dependencies -->
>> >>       <dependency>
>> >>
>> >> Added:
>> >>
>> jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java
>> >> URL:
>> >>
>> http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java?rev=1666220&view=auto
>> >>
>> >>
>> ==============================================================================
>> >> ---
>> >>
>> jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java
>> >> (added)
>> >> +++
>> >>
>> jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java
>> >> Thu Mar 12 15:22:46 2015
>> >> @@ -0,0 +1,255 @@
>> >> +/*
>> >> + * Licensed to the Apache Software Foundation (ASF) under one
>> >> + * or more contributor license agreements.  See the NOTICE file
>> >> + * distributed with this work for additional information
>> >> + * regarding copyright ownership.  The ASF licenses this file
>> >> + * to you under the Apache License, Version 2.0 (the
>> >> + * "License"); you may not use this file except in compliance
>> >> + * with the License.  You may obtain a copy of the License at
>> >> + *
>> >> + *   http://www.apache.org/licenses/LICENSE-2.0
>> >> + *
>> >> + * Unless required by applicable law or agreed to in writing,
>> >> + * software distributed under the License is distributed on an
>> >> + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
>> >> + * KIND, either express or implied.  See the License for the
>> >> + * specific language governing permissions and limitations
>> >> + * under the License.
>> >> + */
>> >> +
>> >> +package org.apache.jackrabbit.oak.commons.sort;
>> >> +
>> >> +import java.io.BufferedWriter;
>> >> +import java.io.Closeable;
>> >> +import java.io.File;
>> >> +import java.io.FileNotFoundException;
>> >> +import java.io.IOException;
>> >> +import java.io.Reader;
>> >> +import java.nio.charset.Charset;
>> >> +import java.util.Collections;
>> >> +import java.util.Comparator;
>> >> +import java.util.Iterator;
>> >> +import java.util.List;
>> >> +
>> >> +import com.google.common.base.Charsets;
>> >> +import com.google.common.collect.Lists;
>> >> +import com.google.common.io.Closer;
>> >> +import com.google.common.io.Files;
>> >> +import org.apache.commons.io.FileUtils;
>> >> +import org.apache.commons.io.LineIterator;
>> >> +import org.slf4j.Logger;
>> >> +import org.slf4j.LoggerFactory;
>> >> +
>> >> +/**
>> >> + * Utility class to store a list of string and perform sort on that.
>> For
>> >> small size
>> >> + * the list would be maintained in memory. If the size crosses the
>> >> required threshold then
>> >> + * the sorting would be performed externally
>> >> + */
>> >> +public class StringSort implements Closeable {
>> >> +    private final Logger log = LoggerFactory.getLogger(getClass());
>> >> +    public static final int BATCH_SIZE = 2048;
>> >> +
>> >> +    private final int overflowToDiskThreshold;
>> >> +    private final Comparator<String> comparator;
>> >> +
>> >> +    private final List<String> ids = Lists.newArrayList();
>> >> +    private long size;
>> >> +
>> >> +    private final List<String> inMemBatch = Lists.newArrayList();
>> >> +
>> >> +    private boolean useFile;
>> >> +    private PersistentState persistentState;
>> >> +
>> >> +    public StringSort(int overflowToDiskThreshold, Comparator<String>
>> >> comparator) {
>> >> +        this.overflowToDiskThreshold = overflowToDiskThreshold;
>> >> +        this.comparator = comparator;
>> >> +    }
>> >> +
>> >> +    public void add(String id) throws IOException {
>> >> +        if (useFile) {
>> >> +            addToBatch(id);
>> >> +        } else {
>> >> +            ids.add(id);
>> >> +            if (ids.size() >= overflowToDiskThreshold) {
>> >> +                flushToFile(ids);
>> >> +                useFile = true;
>> >> +                log.debug("In memory buffer crossed the threshold of
>> {}.
>> >> " +
>> >> +                        "Switching to filesystem [{}] to manage the
>> >> state", overflowToDiskThreshold, persistentState);
>> >> +            }
>> >> +        }
>> >> +        size++;
>> >> +    }
>> >> +
>> >> +    public void sort() throws IOException {
>> >> +        if (useFile) {
>> >> +            //Flush the last batch
>> >> +            flushToFile(inMemBatch);
>> >> +            persistentState.sort();
>> >> +        } else {
>> >> +            Collections.sort(ids, comparator);
>> >> +        }
>> >> +    }
>> >> +
>> >> +    public Iterator<String> getIds() throws IOException {
>> >> +        if (useFile) {
>> >> +            return persistentState.getIterator();
>> >> +        } else {
>> >> +            return ids.iterator();
>> >> +        }
>> >> +    }
>> >> +
>> >> +    public long getSize() {
>> >> +        return size;
>> >> +    }
>> >> +
>> >> +    public boolean isEmpty() {
>> >> +        return size == 0;
>> >> +    }
>> >> +
>> >> +    public boolean usingFile() {
>> >> +        return useFile;
>> >> +    }
>> >> +
>> >> +    @Override
>> >> +    public void close() throws IOException {
>> >> +        if (persistentState != null) {
>> >> +            persistentState.close();
>> >> +        }
>> >> +    }
>> >> +
>> >> +    private void addToBatch(String id) throws IOException {
>> >> +        inMemBatch.add(id);
>> >> +        if (inMemBatch.size() >= BATCH_SIZE) {
>> >> +            flushToFile(inMemBatch);
>> >> +        }
>> >> +    }
>> >> +
>> >> +    private void flushToFile(List<String> ids) throws IOException {
>> >> +        BufferedWriter w = getPersistentState().getWriter();
>> >> +        for (String id : ids) {
>> >> +            w.write(id);
>> >> +            w.newLine();
>> >> +        }
>> >> +        ids.clear();
>> >> +    }
>> >> +
>> >> +    private PersistentState getPersistentState() {
>> >> +        //Lazily initialize the persistent state
>> >> +        if (persistentState == null) {
>> >> +            persistentState = new PersistentState(comparator);
>> >> +        }
>> >> +        return persistentState;
>> >> +    }
>> >> +
>> >> +    private static class PersistentState implements Closeable {
>> >> +        /**
>> >> +         * Maximum loop count when creating temp directories.
>> >> +         */
>> >> +        private static final int TEMP_DIR_ATTEMPTS = 10000;
>> >> +
>> >> +        private final Charset charset = Charsets.UTF_8;
>> >> +        private final File workDir;
>> >> +        private final Comparator<String> comparator;
>> >> +        private File idFile;
>> >> +        private File sortedFile;
>> >> +        private BufferedWriter writer;
>> >> +        private List<CloseableIterator> openedIterators =
>> >> Lists.newArrayList();
>> >> +
>> >> +        public PersistentState(Comparator<String> comparator) {
>> >> +            this(comparator, createTempDir("oak-sorter-"));
>> >> +        }
>> >> +
>> >> +        public PersistentState(Comparator<String> comparator, File
>> >> workDir) {
>> >> +            this.workDir = workDir;
>> >> +            this.comparator = comparator;
>> >> +        }
>> >> +
>> >> +        public BufferedWriter getWriter() throws FileNotFoundException
>> {
>> >> +            if (idFile == null) {
>> >> +                idFile = new File(workDir, "strings.txt");
>> >> +                sortedFile = new File(workDir, "strings-sorted.txt");
>> >> +                writer = Files.newWriter(idFile, charset);
>> >> +            }
>> >> +            return writer;
>> >> +        }
>> >> +
>> >> +        public void sort() throws IOException {
>> >> +            closeWriter();
>> >> +
>> >> +            List<File> sortedFiles = ExternalSort.sortInBatch(idFile,
>> >> +                    comparator, //Comparator to use
>> >> +                    ExternalSort.DEFAULTMAXTEMPFILES,
>> >> +                    ExternalSort.DEFAULT_MAX_MEM_BYTES,
>> >> +                    charset, //charset
>> >> +                    workDir,  //temp directory where intermediate files
>> >> are created
>> >> +                    true //distinct
>> >> +            );
>> >> +
>> >> +            ExternalSort.mergeSortedFiles(sortedFiles,
>> >> +                    sortedFile,
>> >> +                    comparator,
>> >> +                    charset,
>> >> +                    true
>> >> +            );
>> >> +        }
>> >> +
>> >> +        public Iterator<String> getIterator() throws IOException {
>> >> +            CloseableIterator itr = new
>> >> CloseableIterator(Files.newReader(sortedFile, charset));
>> >> +            openedIterators.add(itr);
>> >> +            return itr;
>> >> +        }
>> >> +
>> >> +        @Override
>> >> +        public String toString() {
>> >> +            return "PersistentState : workDir=" +
>> >> workDir.getAbsolutePath();
>> >> +        }
>> >> +
>> >> +        @Override
>> >> +        public void close() throws IOException {
>> >> +            Closer closer = Closer.create();
>> >> +            try {
>> >> +                closer.register(writer);
>> >> +                for (CloseableIterator citr : openedIterators) {
>> >> +                    closer.register(citr);
>> >> +                }
>> >> +                closer.register(new Closeable() {
>> >> +                    @Override
>> >> +                    public void close() throws IOException {
>> >> +                        FileUtils.deleteDirectory(workDir);
>> >> +                    }
>> >> +                });
>> >> +            } finally {
>> >> +                closer.close();
>> >> +            }
>> >> +        }
>> >> +
>> >> +        private void closeWriter() throws IOException {
>> >> +            writer.close();
>> >> +        }
>> >> +
>> >> +        /**
>> >> +         * Taken from com.google.common.io.Files#createTempDir()
>> >> +         * Modified to provide a prefix
>> >> +         */
>> >> +        private static File createTempDir(String prefix) {
>> >> +            File baseDir = new
>> >> File(System.getProperty("java.io.tmpdir"));
>> >> +            String baseName = System.currentTimeMillis() + "-";
>> >> +
>> >> +            for (int counter = 0; counter < TEMP_DIR_ATTEMPTS;
>> counter++)
>> >> {
>> >> +                File tempDir = new File(baseDir, prefix + baseName +
>> >> counter);
>> >> +                if (tempDir.mkdir()) {
>> >> +                    return tempDir;
>> >> +                }
>> >> +            }
>> >> +            throw new IllegalStateException("Failed to create directory
>> >> within "
>> >> +                    + TEMP_DIR_ATTEMPTS + " attempts (tried "
>> >> +                    + baseName + "0 to " + baseName +
>> (TEMP_DIR_ATTEMPTS
>> >> - 1) + ')');
>> >> +        }
>> >> +    }
>> >> +
>> >> +    private static class CloseableIterator extends LineIterator
>> >> implements Closeable {
>> >> +        public CloseableIterator(Reader reader) throws
>> >> IllegalArgumentException {
>> >> +            super(reader);
>> >> +        }
>> >> +    }
>> >> +}
>> >>
>> >> Propchange:
>> >>
>> jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java
>> >>
>> >>
>> ------------------------------------------------------------------------------
>> >>      svn:eol-style = native
>> >>
>> >> Added:
>> >>
>> jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/StringSortTest.java
>> >> URL:
>> >>
>> http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/StringSortTest.java?rev=1666220&view=auto
>> >>
>> >>
>> ==============================================================================
>> >> ---
>> >>
>> jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/StringSortTest.java
>> >> (added)
>> >> +++
>> >>
>> jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/StringSortTest.java
>> >> Thu Mar 12 15:22:46 2015
>> >> @@ -0,0 +1,144 @@
>> >> +/*
>> >> + * Licensed to the Apache Software Foundation (ASF) under one
>> >> + * or more contributor license agreements.  See the NOTICE file
>> >> + * distributed with this work for additional information
>> >> + * regarding copyright ownership.  The ASF licenses this file
>> >> + * to you under the Apache License, Version 2.0 (the
>> >> + * "License"); you may not use this file except in compliance
>> >> + * with the License.  You may obtain a copy of the License at
>> >> + *
>> >> + *   http://www.apache.org/licenses/LICENSE-2.0
>> >> + *
>> >> + * Unless required by applicable law or agreed to in writing,
>> >> + * software distributed under the License is distributed on an
>> >> + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
>> >> + * KIND, either express or implied.  See the License for the
>> >> + * specific language governing permissions and limitations
>> >> + * under the License.
>> >> + */
>> >> +
>> >> +package org.apache.jackrabbit.oak.commons.sort;
>> >> +
>> >> +import java.io.IOException;
>> >> +import java.io.Serializable;
>> >> +import java.util.ArrayList;
>> >> +import java.util.Arrays;
>> >> +import java.util.Collections;
>> >> +import java.util.Comparator;
>> >> +import java.util.HashSet;
>> >> +import java.util.List;
>> >> +import java.util.Set;
>> >> +
>> >> +import com.google.common.base.Joiner;
>> >> +import com.google.common.collect.Collections2;
>> >> +import com.google.common.collect.ImmutableList;
>> >> +import org.junit.Test;
>> >> +
>> >> +import static org.junit.Assert.assertEquals;
>> >> +import static org.junit.Assert.assertFalse;
>> >> +import static org.junit.Assert.assertTrue;
>> >> +
>> >> +public class StringSortTest {
>> >> +    private Comparator<String> comparator = new PathComparator();
>> >> +    private StringSort collector;
>> >> +
>> >> +    @Test
>> >> +    public void inMemory() throws Exception{
>> >> +        List<String> paths = createTestPaths(5, false);
>> >> +        collector = new StringSort(paths.size() + 1,comparator);
>> >> +        addPathsToCollector(paths);
>> >> +
>> >> +        assertConstraints(paths);
>> >> +        assertFalse(collector.usingFile());
>> >> +        collector.close();
>> >> +    }
>> >> +
>> >> +    @Test
>> >> +    public void overflowToDisk() throws Exception{
>> >> +        //Create ~50k paths
>> >> +        List<String> paths = createTestPaths(10, true);
>> >> +        collector = new StringSort(1000, comparator);
>> >> +        addPathsToCollector(paths);
>> >> +
>> >> +        assertTrue(collector.usingFile());
>> >> +        assertConstraints(paths);
>> >> +
>> >> +        collector.close();
>> >> +    }
>> >> +
>> >> +    private void assertConstraints(List<String> paths) throws
>> IOException
>> >> {
>> >> +        assertEquals(paths.size(), collector.getSize());
>> >> +
>> >> +        Collections.sort(paths, comparator);
>> >> +        collector.sort();
>> >> +
>> >> +        List<String> sortedPaths =
>> >> ImmutableList.copyOf(collector.getIds());
>> >> +        assertEquals(paths.size(), sortedPaths.size());
>> >> +        assertEquals(paths, sortedPaths);
>> >> +    }
>> >> +
>> >> +    private void addPathsToCollector(Iterable<String> paths) throws
>> >> IOException {
>> >> +        for (String path : paths){
>> >> +            collector.add(path);
>> >> +        }
>> >> +    }
>> >> +
>> >> +    private static List<String> createTestPaths(int depth, boolean
>> >> permutation){
>> >> +        List<String> rootPaths = Arrays.asList("a", "b", "c", "d", "e",
>> >> "f", "g");
>> >> +        List<String> paths = new ArrayList<String>();
>> >> +
>> >> +
>> >> +        if (permutation){
>> >> +            List<String> newRoots = new ArrayList<String>();
>> >> +            for (List<String> permuts :
>> >> Collections2.orderedPermutations(rootPaths)){
>> >> +                newRoots.add(Joiner.on("").join(permuts));
>> >> +            }
>> >> +            rootPaths = newRoots;
>> >> +        }
>> >> +
>> >> +        for (String root : rootPaths){
>> >> +            List<String> pathElements = new ArrayList<String>();
>> >> +            pathElements.add(root);
>> >> +            paths.add(createId(pathElements));
>> >> +            for (int i = 0; i < depth; i++){
>> >> +                pathElements.add(root + i);
>> >> +                paths.add(createId(pathElements));
>> >> +            }
>> >> +        }
>> >> +
>> >> +        Set<String> idSet = new HashSet<String>(paths);
>> >> +        assertEquals(paths.size(), idSet.size());
>> >> +
>> >> +        Collections.shuffle(paths);
>> >> +        return paths;
>> >> +    }
>> >> +
>> >> +    private static String createId(Iterable<String> pathElements){
>> >> +        return "/" + Joiner.on('/').join(pathElements);
>> >> +    }
>> >> +
>> >> +    private static  class PathComparator implements Comparator<String>,
>> >> Serializable {
>> >> +        @Override
>> >> +        public int compare(String o1, String o2) {
>> >> +            int d1 = pathDepth(o1);
>> >> +            int d2 = pathDepth(o2);
>> >> +            if (d1 != d2) {
>> >> +                return Integer.signum(d2 - d1);
>> >> +            }
>> >> +            return o1.compareTo(o2);
>> >> +        }
>> >> +
>> >> +        private static int pathDepth(String path) {
>> >> +            if (path.equals("/")) {
>> >> +                return 0;
>> >> +            }
>> >> +            int depth = 0;
>> >> +            for (int i = 0; i < path.length(); i++) {
>> >> +                if (path.charAt(i) == '/') {
>> >> +                    depth++;
>> >> +                }
>> >> +            }
>> >> +            return depth;
>> >> +        }
>> >> +    }
>> >> +}
>> >>
>> >> Propchange:
>> >>
>> jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/StringSortTest.java
>> >>
>> >>
>> ------------------------------------------------------------------------------
>> >>      svn:eol-style = native
>> >>
>> >> Modified:
>> >>
>> jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java
>> >> URL:
>> >>
>> http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java?rev=1666220&r1=1666219&r2=1666220&view=diff
>> >>
>> >>
>> ==============================================================================
>> >> ---
>> >>
>> jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java
>> >> (original)
>> >> +++
>> >>
>> jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java
>> >> Thu Mar 12 15:22:46 2015
>> >> @@ -574,7 +574,11 @@ public class DocumentNodeStoreService {
>> >>           RevisionGC revisionGC = new RevisionGC(new Runnable() {
>> >>               @Override
>> >>               public void run() {
>> >> -
>> >> store.getVersionGarbageCollector().gc(versionGcMaxAgeInSecs,
>> >> TimeUnit.SECONDS);
>> >> +                try {
>> >> +
>> >> store.getVersionGarbageCollector().gc(versionGcMaxAgeInSecs,
>> >> TimeUnit.SECONDS);
>> >> +                } catch (IOException e) {
>> >> +                    log.warn("Error occurred while executing the
>> Version
>> >> Garbage Collector", e);
>> >> +                }
>> >>               }
>> >>           }, executor);
>> >>           registrations.add(registerMBean(whiteboard,
>> >> RevisionGCMBean.class, revisionGC,
>> >>
>> >> Modified:
>> >>
>> jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
>> >> URL:
>> >>
>> http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java?rev=1666220&r1=1666219&r2=1666220&view=diff
>> >>
>> >>
>> ==============================================================================
>> >> ---
>> >>
>> jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
>> >> (original)
>> >> +++
>> >>
>> jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
>> >> Thu Mar 12 15:22:46 2015
>> >> @@ -19,9 +19,9 @@
>> >>
>> >>   package org.apache.jackrabbit.oak.plugins.document;
>> >>
>> >> -import java.util.ArrayList;
>> >> -import java.util.Collections;
>> >> +import java.io.IOException;
>> >>   import java.util.EnumSet;
>> >> +import java.util.Iterator;
>> >>   import java.util.List;
>> >>   import java.util.Set;
>> >>   import java.util.concurrent.TimeUnit;
>> >> @@ -31,18 +31,23 @@ import com.google.common.base.StandardSy
>> >>   import com.google.common.base.Stopwatch;
>> >>   import com.google.common.collect.ImmutableList;
>> >>
>> >> +import org.apache.jackrabbit.oak.commons.sort.StringSort;
>> >>   import org.apache.jackrabbit.oak.plugins.document.util.Utils;
>> >>   import org.slf4j.Logger;
>> >>   import org.slf4j.LoggerFactory;
>> >>
>> >> +import static com.google.common.collect.Iterators.partition;
>> >>   import static
>> >>
>> org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType.COMMIT_ROOT_ONLY;
>> >>   import static
>> >>
>> org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType.DEFAULT_LEAF;
>> >>
>> >>   public class VersionGarbageCollector {
>> >> +    //Kept less than MongoDocumentStore.IN_CLAUSE_BATCH_SIZE to avoid
>> >> re-partitioning
>> >> +    private static final int DELETE_BATCH_SIZE = 450;
>> >>       private final DocumentNodeStore nodeStore;
>> >>       private final VersionGCSupport versionStore;
>> >> +    private int overflowToDiskThreshold = 100000;
>> >>
>> >> -    private final Logger log = LoggerFactory.getLogger(getClass());
>> >> +    private static final Logger log =
>> >> LoggerFactory.getLogger(VersionGarbageCollector.class);
>> >>
>> >>       /**
>> >>        * Split document types which can be safely garbage collected
>> >> @@ -56,7 +61,7 @@ public class VersionGarbageCollector {
>> >>           this.versionStore = gcSupport;
>> >>       }
>> >>
>> >> -    public VersionGCStats gc(long maxRevisionAge, TimeUnit unit) {
>> >> +    public VersionGCStats gc(long maxRevisionAge, TimeUnit unit) throws
>> >> IOException {
>> >>           long maxRevisionAgeInMillis = unit.toMillis(maxRevisionAge);
>> >>           Stopwatch sw = Stopwatch.createStarted();
>> >>           VersionGCStats stats = new VersionGCStats();
>> >> @@ -85,41 +90,60 @@ public class VersionGarbageCollector {
>> >>           return stats;
>> >>       }
>> >>
>> >> +    public void setOverflowToDiskThreshold(int
>> overflowToDiskThreshold) {
>> >> +        this.overflowToDiskThreshold = overflowToDiskThreshold;
>> >> +    }
>> >> +
>> >>       private void collectSplitDocuments(VersionGCStats stats, long
>> >> oldestRevTimeStamp) {
>> >>           versionStore.deleteSplitDocuments(GC_TYPES,
>> oldestRevTimeStamp,
>> >> stats);
>> >>       }
>> >>
>> >> -    private void collectDeletedDocuments(VersionGCStats stats, Revision
>> >> headRevision, long oldestRevTimeStamp) {
>> >> -        List<String> docIdsToDelete = new ArrayList<String>();
>> >> -        Iterable<NodeDocument> itr =
>> >> versionStore.getPossiblyDeletedDocs(oldestRevTimeStamp);
>> >> +    private void collectDeletedDocuments(VersionGCStats stats, Revision
>> >> headRevision, long oldestRevTimeStamp)
>> >> +            throws IOException {
>> >> +        StringSort docIdsToDelete = new
>> >> StringSort(overflowToDiskThreshold, NodeDocumentIdComparator.INSTANCE);
>> >>           try {
>> >> -            for (NodeDocument doc : itr) {
>> >> -                //Check if node is actually deleted at current revision
>> >> -                //As node is not modified since oldestRevTimeStamp then
>> >> -                //this node has not be revived again in past
>> >> maxRevisionAge
>> >> -                //So deleting it is safe
>> >> -                if (doc.getNodeAtRevision(nodeStore, headRevision,
>> null)
>> >> == null) {
>> >> -                    docIdsToDelete.add(doc.getId());
>> >> -                    //Collect id of all previous docs also
>> >> -                    for (NodeDocument prevDoc :
>> >> ImmutableList.copyOf(doc.getAllPreviousDocs())) {
>> >> -                        docIdsToDelete.add(prevDoc.getId());
>> >> +            Iterable<NodeDocument> itr =
>> >> versionStore.getPossiblyDeletedDocs(oldestRevTimeStamp);
>> >> +            try {
>> >> +                for (NodeDocument doc : itr) {
>> >> +                    //Check if node is actually deleted at current
>> >> revision
>> >> +                    //As node is not modified since oldestRevTimeStamp
>> >> then
>> >> +                    //this node has not be revived again in past
>> >> maxRevisionAge
>> >> +                    //So deleting it is safe
>> >> +                    if (doc.getNodeAtRevision(nodeStore, headRevision,
>> >> null) == null) {
>> >> +                        docIdsToDelete.add(doc.getId());
>> >> +                        //Collect id of all previous docs also
>> >> +                        for (NodeDocument prevDoc :
>> >> ImmutableList.copyOf(doc.getAllPreviousDocs())) {
>> >> +                            docIdsToDelete.add(prevDoc.getId());
>> >> +                        }
>> >>                       }
>> >>                   }
>> >> +            } finally {
>> >> +                Utils.closeIfCloseable(itr);
>> >> +            }
>> >> +
>> >> +            if (docIdsToDelete.isEmpty()){
>> >> +                return;
>> >>               }
>> >> -        } finally {
>> >> -            Utils.closeIfCloseable(itr);
>> >> -        }
>> >>
>> >> -        Collections.sort(docIdsToDelete,
>> >> NodeDocumentIdComparator.INSTANCE);
>> >> +            docIdsToDelete.sort();
>> >> +            log.info("Proceeding to delete [{}] documents",
>> >> docIdsToDelete.getSize());
>> >>
>> >> -        if(log.isDebugEnabled()) {
>> >> -            StringBuilder sb = new StringBuilder("Deleted document with
>> >> following ids were deleted as part of GC \n");
>> >> -
>> >> Joiner.on(StandardSystemProperty.LINE_SEPARATOR.value()).appendTo(sb,
>> >> docIdsToDelete);
>> >> -            log.debug(sb.toString());
>> >> +            if (log.isDebugEnabled() && docIdsToDelete.getSize() <
>> 1000)
>> >> {
>> >> +                StringBuilder sb = new StringBuilder("Deleted document
>> >> with following ids were deleted as part of GC \n");
>> >> +
>> >> Joiner.on(StandardSystemProperty.LINE_SEPARATOR.value()).appendTo(sb,
>> >> docIdsToDelete.getIds());
>> >> +                log.debug(sb.toString());
>> >> +            }
>> >> +
>> >> +            Iterator<List<String>> idListItr =
>> >> partition(docIdsToDelete.getIds(), DELETE_BATCH_SIZE);
>> >> +            while (idListItr.hasNext()) {
>> >> +                nodeStore.getDocumentStore().remove(Collection.NODES,
>> >> idListItr.next());
>> >> +            }
>> >> +
>> >> +            nodeStore.invalidateDocChildrenCache();
>> >> +            stats.deletedDocGCCount += docIdsToDelete.getSize();
>> >> +        } finally {
>> >> +            docIdsToDelete.close();
>> >>           }
>> >> -        nodeStore.getDocumentStore().remove(Collection.NODES,
>> >> docIdsToDelete);
>> >> -        nodeStore.invalidateDocChildrenCache();
>> >> -        stats.deletedDocGCCount += docIdsToDelete.size();
>> >>       }
>> >>
>> >>       public static class VersionGCStats {
>> >>
>> >> Modified:
>> >>
>> jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCDeletionTest.java
>> >> URL:
>> >>
>> http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCDeletionTest.java?rev=1666220&r1=1666219&r2=1666220&view=diff
>> >>
>> >>
>> ==============================================================================
>> >> ---
>> >>
>> jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCDeletionTest.java
>> >> (original)
>> >> +++
>> >>
>> jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCDeletionTest.java
>> >> Thu Mar 12 15:22:46 2015
>> >> @@ -36,6 +36,7 @@ import org.junit.Before;
>> >>   import org.junit.Test;
>> >>
>> >>   import static java.util.concurrent.TimeUnit.HOURS;
>> >> +import static org.junit.Assert.assertEquals;
>> >>   import static org.junit.Assert.assertNull;
>> >>   import static org.junit.Assert.fail;
>> >>
>> >> @@ -104,6 +105,53 @@ public class VersionGCDeletionTest {
>> >>           assertNull(ts.find(Collection.NODES, "1:/x"));
>> >>       }
>> >>
>> >> +    @Test
>> >> +    public void deleteLargeNumber() throws Exception{
>> >> +        int noOfDocsToDelete = 10000;
>> >> +        DocumentStore ts = new MemoryDocumentStore();
>> >> +        store = new DocumentMK.Builder()
>> >> +                .clock(clock)
>> >> +                .setDocumentStore(new MemoryDocumentStore())
>> >> +                .setAsyncDelay(0)
>> >> +                .getNodeStore();
>> >> +
>> >> +        //Baseline the clock
>> >> +        clock.waitUntil(Revision.getCurrentTimestamp());
>> >> +
>> >> +        NodeBuilder b1 = store.getRoot().builder();
>> >> +        NodeBuilder xb = b1.child("x");
>> >> +        for (int i = 0; i < noOfDocsToDelete; i++){
>> >> +            xb.child("a"+i).child("b"+i);
>> >> +        }
>> >> +        store.merge(b1, EmptyHook.INSTANCE, CommitInfo.EMPTY);
>> >> +
>> >> +        long maxAge = 1; //hours
>> >> +        long delta = TimeUnit.MINUTES.toMillis(10);
>> >> +
>> >> +        //Remove x/y
>> >> +        NodeBuilder b2 = store.getRoot().builder();
>> >> +        b2.child("x").remove();
>> >> +        store.merge(b2, EmptyHook.INSTANCE, CommitInfo.EMPTY);
>> >> +
>> >> +        store.runBackgroundOperations();
>> >> +
>> >> +        //3. Check that deleted doc does get collected post maxAge
>> >> +        clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) +
>> >> delta);
>> >> +        VersionGarbageCollector gc =
>> store.getVersionGarbageCollector();
>> >> +        gc.setOverflowToDiskThreshold(100);
>> >> +
>> >> +        VersionGarbageCollector.VersionGCStats stats = gc.gc(maxAge *
>> 2,
>> >> HOURS);
>> >> +        assertEquals(noOfDocsToDelete * 2 + 1,
>> stats.deletedDocGCCount);
>> >> +
>> >> +
>> >> +        assertNull(ts.find(Collection.NODES, "1:/x"));
>> >> +
>> >> +        for (int i = 0; i < noOfDocsToDelete; i++){
>> >> +            assertNull(ts.find(Collection.NODES, "2:/a"+i+"/b"+i));
>> >> +            assertNull(ts.find(Collection.NODES, "1:/a"+i));
>> >> +        }
>> >> +    }
>> >> +
>> >>       private static class TestDocumentStore extends
>> MemoryDocumentStore {
>> >>           boolean throwException;
>> >>           @Override
>> >>
>> >> Modified:
>> >>
>> jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCWithSplitTest.java
>> >> URL:
>> >>
>> http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCWithSplitTest.java?rev=1666220&r1=1666219&r2=1666220&view=diff
>> >>
>> >>
>> ==============================================================================
>> >> ---
>> >>
>> jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCWithSplitTest.java
>> >> (original)
>> >> +++
>> >>
>> jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCWithSplitTest.java
>> >> Thu Mar 12 15:22:46 2015
>> >> @@ -137,7 +137,11 @@ public class VersionGCWithSplitTest {
>> >>           Thread t = new Thread(new Runnable() {
>> >>               @Override
>> >>               public void run() {
>> >> -                stats.set(gc.gc(1, HOURS));
>> >> +                try {
>> >> +                    stats.set(gc.gc(1, HOURS));
>> >> +                } catch (IOException e) {
>> >> +                    throw new RuntimeException(e);
>> >> +                }
>> >>               }
>> >>           });
>> >>
>> >>
>> >>
>> >>
>> >
>>

Re: svn commit: r1666220 - in /jackrabbit/oak/trunk: oak-commons/ oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/ oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/ oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/d...

Posted by Amit Jain <am...@ieee.org>.
The tests are passing for me on windows with the latest change.

Thanks
Amit

On Fri, Mar 13, 2015 at 9:21 AM, Chetan Mehrotra <ch...@gmail.com>
wrote:

> Looks like Closer closes the closeables in LIFO manner due to which
> directory containing that file got deleted first. I have change the
> logic now.
>
> Let me know if the test passes for you on Windows
> Chetan Mehrotra
>
>
> On Thu, Mar 12, 2015 at 10:21 PM, Julian Reschke <ju...@gmx.de>
> wrote:
> > With this change, I get a reliable test failure on Windows:
> >
> >
> > Tests in error:
> >
> > overflowToDisk(org.apache.jackrabbit.oak.commons.sort.StringSortTest):
> > Unable to delete file:
> C:\tmp\oak-sorter-1426178913437-0\strings-sorted.txt
> >
> >
> > Best regards, Julian
> >
> >
> > On 2015-03-12 16:22, chetanm@apache.org wrote:
> >>
> >> Author: chetanm
> >> Date: Thu Mar 12 15:22:46 2015
> >> New Revision: 1666220
> >>
> >> URL: http://svn.apache.org/r1666220
> >> Log:
> >> OAK-2557 - VersionGC uses way too much memory if there is a large pile
> of
> >> garbage
> >>
> >> Added:
> >>
> >>
> jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java
> >> (with props)
> >>
> >>
> jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/StringSortTest.java
> >> (with props)
> >> Modified:
> >>      jackrabbit/oak/trunk/oak-commons/pom.xml
> >>
> >>
> jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java
> >>
> >>
> jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
> >>
> >>
> jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCDeletionTest.java
> >>
> >>
> jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCWithSplitTest.java
> >>
> >> Modified: jackrabbit/oak/trunk/oak-commons/pom.xml
> >> URL:
> >>
> http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/pom.xml?rev=1666220&r1=1666219&r2=1666220&view=diff
> >>
> >>
> ==============================================================================
> >> --- jackrabbit/oak/trunk/oak-commons/pom.xml (original)
> >> +++ jackrabbit/oak/trunk/oak-commons/pom.xml Thu Mar 12 15:22:46 2015
> >> @@ -93,6 +93,11 @@
> >>         <artifactId>oak-mk-api</artifactId>
> >>         <version>${project.version}</version>
> >>       </dependency>
> >> +    <dependency>
> >> +      <groupId>commons-io</groupId>
> >> +      <artifactId>commons-io</artifactId>
> >> +      <version>2.4</version>
> >> +    </dependency>
> >>
> >>       <!-- Test dependencies -->
> >>       <dependency>
> >>
> >> Added:
> >>
> jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java
> >> URL:
> >>
> http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java?rev=1666220&view=auto
> >>
> >>
> ==============================================================================
> >> ---
> >>
> jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java
> >> (added)
> >> +++
> >>
> jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java
> >> Thu Mar 12 15:22:46 2015
> >> @@ -0,0 +1,255 @@
> >> +/*
> >> + * Licensed to the Apache Software Foundation (ASF) under one
> >> + * or more contributor license agreements.  See the NOTICE file
> >> + * distributed with this work for additional information
> >> + * regarding copyright ownership.  The ASF licenses this file
> >> + * to you under the Apache License, Version 2.0 (the
> >> + * "License"); you may not use this file except in compliance
> >> + * with the License.  You may obtain a copy of the License at
> >> + *
> >> + *   http://www.apache.org/licenses/LICENSE-2.0
> >> + *
> >> + * Unless required by applicable law or agreed to in writing,
> >> + * software distributed under the License is distributed on an
> >> + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
> >> + * KIND, either express or implied.  See the License for the
> >> + * specific language governing permissions and limitations
> >> + * under the License.
> >> + */
> >> +
> >> +package org.apache.jackrabbit.oak.commons.sort;
> >> +
> >> +import java.io.BufferedWriter;
> >> +import java.io.Closeable;
> >> +import java.io.File;
> >> +import java.io.FileNotFoundException;
> >> +import java.io.IOException;
> >> +import java.io.Reader;
> >> +import java.nio.charset.Charset;
> >> +import java.util.Collections;
> >> +import java.util.Comparator;
> >> +import java.util.Iterator;
> >> +import java.util.List;
> >> +
> >> +import com.google.common.base.Charsets;
> >> +import com.google.common.collect.Lists;
> >> +import com.google.common.io.Closer;
> >> +import com.google.common.io.Files;
> >> +import org.apache.commons.io.FileUtils;
> >> +import org.apache.commons.io.LineIterator;
> >> +import org.slf4j.Logger;
> >> +import org.slf4j.LoggerFactory;
> >> +
> >> +/**
> >> + * Utility class to store a list of string and perform sort on that.
> For
> >> small size
> >> + * the list would be maintained in memory. If the size crosses the
> >> required threshold then
> >> + * the sorting would be performed externally
> >> + */
> >> +public class StringSort implements Closeable {
> >> +    private final Logger log = LoggerFactory.getLogger(getClass());
> >> +    public static final int BATCH_SIZE = 2048;
> >> +
> >> +    private final int overflowToDiskThreshold;
> >> +    private final Comparator<String> comparator;
> >> +
> >> +    private final List<String> ids = Lists.newArrayList();
> >> +    private long size;
> >> +
> >> +    private final List<String> inMemBatch = Lists.newArrayList();
> >> +
> >> +    private boolean useFile;
> >> +    private PersistentState persistentState;
> >> +
> >> +    public StringSort(int overflowToDiskThreshold, Comparator<String>
> >> comparator) {
> >> +        this.overflowToDiskThreshold = overflowToDiskThreshold;
> >> +        this.comparator = comparator;
> >> +    }
> >> +
> >> +    public void add(String id) throws IOException {
> >> +        if (useFile) {
> >> +            addToBatch(id);
> >> +        } else {
> >> +            ids.add(id);
> >> +            if (ids.size() >= overflowToDiskThreshold) {
> >> +                flushToFile(ids);
> >> +                useFile = true;
> >> +                log.debug("In memory buffer crossed the threshold of
> {}.
> >> " +
> >> +                        "Switching to filesystem [{}] to manage the
> >> state", overflowToDiskThreshold, persistentState);
> >> +            }
> >> +        }
> >> +        size++;
> >> +    }
> >> +
> >> +    public void sort() throws IOException {
> >> +        if (useFile) {
> >> +            //Flush the last batch
> >> +            flushToFile(inMemBatch);
> >> +            persistentState.sort();
> >> +        } else {
> >> +            Collections.sort(ids, comparator);
> >> +        }
> >> +    }
> >> +
> >> +    public Iterator<String> getIds() throws IOException {
> >> +        if (useFile) {
> >> +            return persistentState.getIterator();
> >> +        } else {
> >> +            return ids.iterator();
> >> +        }
> >> +    }
> >> +
> >> +    public long getSize() {
> >> +        return size;
> >> +    }
> >> +
> >> +    public boolean isEmpty() {
> >> +        return size == 0;
> >> +    }
> >> +
> >> +    public boolean usingFile() {
> >> +        return useFile;
> >> +    }
> >> +
> >> +    @Override
> >> +    public void close() throws IOException {
> >> +        if (persistentState != null) {
> >> +            persistentState.close();
> >> +        }
> >> +    }
> >> +
> >> +    private void addToBatch(String id) throws IOException {
> >> +        inMemBatch.add(id);
> >> +        if (inMemBatch.size() >= BATCH_SIZE) {
> >> +            flushToFile(inMemBatch);
> >> +        }
> >> +    }
> >> +
> >> +    private void flushToFile(List<String> ids) throws IOException {
> >> +        BufferedWriter w = getPersistentState().getWriter();
> >> +        for (String id : ids) {
> >> +            w.write(id);
> >> +            w.newLine();
> >> +        }
> >> +        ids.clear();
> >> +    }
> >> +
> >> +    private PersistentState getPersistentState() {
> >> +        //Lazily initialize the persistent state
> >> +        if (persistentState == null) {
> >> +            persistentState = new PersistentState(comparator);
> >> +        }
> >> +        return persistentState;
> >> +    }
> >> +
> >> +    private static class PersistentState implements Closeable {
> >> +        /**
> >> +         * Maximum loop count when creating temp directories.
> >> +         */
> >> +        private static final int TEMP_DIR_ATTEMPTS = 10000;
> >> +
> >> +        private final Charset charset = Charsets.UTF_8;
> >> +        private final File workDir;
> >> +        private final Comparator<String> comparator;
> >> +        private File idFile;
> >> +        private File sortedFile;
> >> +        private BufferedWriter writer;
> >> +        private List<CloseableIterator> openedIterators =
> >> Lists.newArrayList();
> >> +
> >> +        public PersistentState(Comparator<String> comparator) {
> >> +            this(comparator, createTempDir("oak-sorter-"));
> >> +        }
> >> +
> >> +        public PersistentState(Comparator<String> comparator, File
> >> workDir) {
> >> +            this.workDir = workDir;
> >> +            this.comparator = comparator;
> >> +        }
> >> +
> >> +        public BufferedWriter getWriter() throws FileNotFoundException
> {
> >> +            if (idFile == null) {
> >> +                idFile = new File(workDir, "strings.txt");
> >> +                sortedFile = new File(workDir, "strings-sorted.txt");
> >> +                writer = Files.newWriter(idFile, charset);
> >> +            }
> >> +            return writer;
> >> +        }
> >> +
> >> +        public void sort() throws IOException {
> >> +            closeWriter();
> >> +
> >> +            List<File> sortedFiles = ExternalSort.sortInBatch(idFile,
> >> +                    comparator, //Comparator to use
> >> +                    ExternalSort.DEFAULTMAXTEMPFILES,
> >> +                    ExternalSort.DEFAULT_MAX_MEM_BYTES,
> >> +                    charset, //charset
> >> +                    workDir,  //temp directory where intermediate files
> >> are created
> >> +                    true //distinct
> >> +            );
> >> +
> >> +            ExternalSort.mergeSortedFiles(sortedFiles,
> >> +                    sortedFile,
> >> +                    comparator,
> >> +                    charset,
> >> +                    true
> >> +            );
> >> +        }
> >> +
> >> +        public Iterator<String> getIterator() throws IOException {
> >> +            CloseableIterator itr = new
> >> CloseableIterator(Files.newReader(sortedFile, charset));
> >> +            openedIterators.add(itr);
> >> +            return itr;
> >> +        }
> >> +
> >> +        @Override
> >> +        public String toString() {
> >> +            return "PersistentState : workDir=" +
> >> workDir.getAbsolutePath();
> >> +        }
> >> +
> >> +        @Override
> >> +        public void close() throws IOException {
> >> +            Closer closer = Closer.create();
> >> +            try {
> >> +                closer.register(writer);
> >> +                for (CloseableIterator citr : openedIterators) {
> >> +                    closer.register(citr);
> >> +                }
> >> +                closer.register(new Closeable() {
> >> +                    @Override
> >> +                    public void close() throws IOException {
> >> +                        FileUtils.deleteDirectory(workDir);
> >> +                    }
> >> +                });
> >> +            } finally {
> >> +                closer.close();
> >> +            }
> >> +        }
> >> +
> >> +        private void closeWriter() throws IOException {
> >> +            writer.close();
> >> +        }
> >> +
> >> +        /**
> >> +         * Taken from com.google.common.io.Files#createTempDir()
> >> +         * Modified to provide a prefix
> >> +         */
> >> +        private static File createTempDir(String prefix) {
> >> +            File baseDir = new
> >> File(System.getProperty("java.io.tmpdir"));
> >> +            String baseName = System.currentTimeMillis() + "-";
> >> +
> >> +            for (int counter = 0; counter < TEMP_DIR_ATTEMPTS;
> counter++)
> >> {
> >> +                File tempDir = new File(baseDir, prefix + baseName +
> >> counter);
> >> +                if (tempDir.mkdir()) {
> >> +                    return tempDir;
> >> +                }
> >> +            }
> >> +            throw new IllegalStateException("Failed to create directory
> >> within "
> >> +                    + TEMP_DIR_ATTEMPTS + " attempts (tried "
> >> +                    + baseName + "0 to " + baseName +
> (TEMP_DIR_ATTEMPTS
> >> - 1) + ')');
> >> +        }
> >> +    }
> >> +
> >> +    private static class CloseableIterator extends LineIterator
> >> implements Closeable {
> >> +        public CloseableIterator(Reader reader) throws
> >> IllegalArgumentException {
> >> +            super(reader);
> >> +        }
> >> +    }
> >> +}
> >>
> >> Propchange:
> >>
> jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java
> >>
> >>
> ------------------------------------------------------------------------------
> >>      svn:eol-style = native
> >>
> >> Added:
> >>
> jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/StringSortTest.java
> >> URL:
> >>
> http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/StringSortTest.java?rev=1666220&view=auto
> >>
> >>
> ==============================================================================
> >> ---
> >>
> jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/StringSortTest.java
> >> (added)
> >> +++
> >>
> jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/StringSortTest.java
> >> Thu Mar 12 15:22:46 2015
> >> @@ -0,0 +1,144 @@
> >> +/*
> >> + * Licensed to the Apache Software Foundation (ASF) under one
> >> + * or more contributor license agreements.  See the NOTICE file
> >> + * distributed with this work for additional information
> >> + * regarding copyright ownership.  The ASF licenses this file
> >> + * to you under the Apache License, Version 2.0 (the
> >> + * "License"); you may not use this file except in compliance
> >> + * with the License.  You may obtain a copy of the License at
> >> + *
> >> + *   http://www.apache.org/licenses/LICENSE-2.0
> >> + *
> >> + * Unless required by applicable law or agreed to in writing,
> >> + * software distributed under the License is distributed on an
> >> + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
> >> + * KIND, either express or implied.  See the License for the
> >> + * specific language governing permissions and limitations
> >> + * under the License.
> >> + */
> >> +
> >> +package org.apache.jackrabbit.oak.commons.sort;
> >> +
> >> +import java.io.IOException;
> >> +import java.io.Serializable;
> >> +import java.util.ArrayList;
> >> +import java.util.Arrays;
> >> +import java.util.Collections;
> >> +import java.util.Comparator;
> >> +import java.util.HashSet;
> >> +import java.util.List;
> >> +import java.util.Set;
> >> +
> >> +import com.google.common.base.Joiner;
> >> +import com.google.common.collect.Collections2;
> >> +import com.google.common.collect.ImmutableList;
> >> +import org.junit.Test;
> >> +
> >> +import static org.junit.Assert.assertEquals;
> >> +import static org.junit.Assert.assertFalse;
> >> +import static org.junit.Assert.assertTrue;
> >> +
> >> +public class StringSortTest {
> >> +    private Comparator<String> comparator = new PathComparator();
> >> +    private StringSort collector;
> >> +
> >> +    @Test
> >> +    public void inMemory() throws Exception{
> >> +        List<String> paths = createTestPaths(5, false);
> >> +        collector = new StringSort(paths.size() + 1,comparator);
> >> +        addPathsToCollector(paths);
> >> +
> >> +        assertConstraints(paths);
> >> +        assertFalse(collector.usingFile());
> >> +        collector.close();
> >> +    }
> >> +
> >> +    @Test
> >> +    public void overflowToDisk() throws Exception{
> >> +        //Create ~50k paths
> >> +        List<String> paths = createTestPaths(10, true);
> >> +        collector = new StringSort(1000, comparator);
> >> +        addPathsToCollector(paths);
> >> +
> >> +        assertTrue(collector.usingFile());
> >> +        assertConstraints(paths);
> >> +
> >> +        collector.close();
> >> +    }
> >> +
> >> +    private void assertConstraints(List<String> paths) throws
> IOException
> >> {
> >> +        assertEquals(paths.size(), collector.getSize());
> >> +
> >> +        Collections.sort(paths, comparator);
> >> +        collector.sort();
> >> +
> >> +        List<String> sortedPaths =
> >> ImmutableList.copyOf(collector.getIds());
> >> +        assertEquals(paths.size(), sortedPaths.size());
> >> +        assertEquals(paths, sortedPaths);
> >> +    }
> >> +
> >> +    private void addPathsToCollector(Iterable<String> paths) throws
> >> IOException {
> >> +        for (String path : paths){
> >> +            collector.add(path);
> >> +        }
> >> +    }
> >> +
> >> +    private static List<String> createTestPaths(int depth, boolean
> >> permutation){
> >> +        List<String> rootPaths = Arrays.asList("a", "b", "c", "d", "e",
> >> "f", "g");
> >> +        List<String> paths = new ArrayList<String>();
> >> +
> >> +
> >> +        if (permutation){
> >> +            List<String> newRoots = new ArrayList<String>();
> >> +            for (List<String> permuts :
> >> Collections2.orderedPermutations(rootPaths)){
> >> +                newRoots.add(Joiner.on("").join(permuts));
> >> +            }
> >> +            rootPaths = newRoots;
> >> +        }
> >> +
> >> +        for (String root : rootPaths){
> >> +            List<String> pathElements = new ArrayList<String>();
> >> +            pathElements.add(root);
> >> +            paths.add(createId(pathElements));
> >> +            for (int i = 0; i < depth; i++){
> >> +                pathElements.add(root + i);
> >> +                paths.add(createId(pathElements));
> >> +            }
> >> +        }
> >> +
> >> +        Set<String> idSet = new HashSet<String>(paths);
> >> +        assertEquals(paths.size(), idSet.size());
> >> +
> >> +        Collections.shuffle(paths);
> >> +        return paths;
> >> +    }
> >> +
> >> +    private static String createId(Iterable<String> pathElements){
> >> +        return "/" + Joiner.on('/').join(pathElements);
> >> +    }
> >> +
> >> +    private static  class PathComparator implements Comparator<String>,
> >> Serializable {
> >> +        @Override
> >> +        public int compare(String o1, String o2) {
> >> +            int d1 = pathDepth(o1);
> >> +            int d2 = pathDepth(o2);
> >> +            if (d1 != d2) {
> >> +                return Integer.signum(d2 - d1);
> >> +            }
> >> +            return o1.compareTo(o2);
> >> +        }
> >> +
> >> +        private static int pathDepth(String path) {
> >> +            if (path.equals("/")) {
> >> +                return 0;
> >> +            }
> >> +            int depth = 0;
> >> +            for (int i = 0; i < path.length(); i++) {
> >> +                if (path.charAt(i) == '/') {
> >> +                    depth++;
> >> +                }
> >> +            }
> >> +            return depth;
> >> +        }
> >> +    }
> >> +}
> >>
> >> Propchange:
> >>
> jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/StringSortTest.java
> >>
> >>
> ------------------------------------------------------------------------------
> >>      svn:eol-style = native
> >>
> >> Modified:
> >>
> jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java
> >> URL:
> >>
> http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java?rev=1666220&r1=1666219&r2=1666220&view=diff
> >>
> >>
> ==============================================================================
> >> ---
> >>
> jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java
> >> (original)
> >> +++
> >>
> jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java
> >> Thu Mar 12 15:22:46 2015
> >> @@ -574,7 +574,11 @@ public class DocumentNodeStoreService {
> >>           RevisionGC revisionGC = new RevisionGC(new Runnable() {
> >>               @Override
> >>               public void run() {
> >> -
> >> store.getVersionGarbageCollector().gc(versionGcMaxAgeInSecs,
> >> TimeUnit.SECONDS);
> >> +                try {
> >> +
> >> store.getVersionGarbageCollector().gc(versionGcMaxAgeInSecs,
> >> TimeUnit.SECONDS);
> >> +                } catch (IOException e) {
> >> +                    log.warn("Error occurred while executing the
> Version
> >> Garbage Collector", e);
> >> +                }
> >>               }
> >>           }, executor);
> >>           registrations.add(registerMBean(whiteboard,
> >> RevisionGCMBean.class, revisionGC,
> >>
> >> Modified:
> >>
> jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
> >> URL:
> >>
> http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java?rev=1666220&r1=1666219&r2=1666220&view=diff
> >>
> >>
> ==============================================================================
> >> ---
> >>
> jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
> >> (original)
> >> +++
> >>
> jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
> >> Thu Mar 12 15:22:46 2015
> >> @@ -19,9 +19,9 @@
> >>
> >>   package org.apache.jackrabbit.oak.plugins.document;
> >>
> >> -import java.util.ArrayList;
> >> -import java.util.Collections;
> >> +import java.io.IOException;
> >>   import java.util.EnumSet;
> >> +import java.util.Iterator;
> >>   import java.util.List;
> >>   import java.util.Set;
> >>   import java.util.concurrent.TimeUnit;
> >> @@ -31,18 +31,23 @@ import com.google.common.base.StandardSy
> >>   import com.google.common.base.Stopwatch;
> >>   import com.google.common.collect.ImmutableList;
> >>
> >> +import org.apache.jackrabbit.oak.commons.sort.StringSort;
> >>   import org.apache.jackrabbit.oak.plugins.document.util.Utils;
> >>   import org.slf4j.Logger;
> >>   import org.slf4j.LoggerFactory;
> >>
> >> +import static com.google.common.collect.Iterators.partition;
> >>   import static
> >>
> org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType.COMMIT_ROOT_ONLY;
> >>   import static
> >>
> org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType.DEFAULT_LEAF;
> >>
> >>   public class VersionGarbageCollector {
> >> +    //Kept less than MongoDocumentStore.IN_CLAUSE_BATCH_SIZE to avoid
> >> re-partitioning
> >> +    private static final int DELETE_BATCH_SIZE = 450;
> >>       private final DocumentNodeStore nodeStore;
> >>       private final VersionGCSupport versionStore;
> >> +    private int overflowToDiskThreshold = 100000;
> >>
> >> -    private final Logger log = LoggerFactory.getLogger(getClass());
> >> +    private static final Logger log =
> >> LoggerFactory.getLogger(VersionGarbageCollector.class);
> >>
> >>       /**
> >>        * Split document types which can be safely garbage collected
> >> @@ -56,7 +61,7 @@ public class VersionGarbageCollector {
> >>           this.versionStore = gcSupport;
> >>       }
> >>
> >> -    public VersionGCStats gc(long maxRevisionAge, TimeUnit unit) {
> >> +    public VersionGCStats gc(long maxRevisionAge, TimeUnit unit) throws
> >> IOException {
> >>           long maxRevisionAgeInMillis = unit.toMillis(maxRevisionAge);
> >>           Stopwatch sw = Stopwatch.createStarted();
> >>           VersionGCStats stats = new VersionGCStats();
> >> @@ -85,41 +90,60 @@ public class VersionGarbageCollector {
> >>           return stats;
> >>       }
> >>
> >> +    public void setOverflowToDiskThreshold(int
> overflowToDiskThreshold) {
> >> +        this.overflowToDiskThreshold = overflowToDiskThreshold;
> >> +    }
> >> +
> >>       private void collectSplitDocuments(VersionGCStats stats, long
> >> oldestRevTimeStamp) {
> >>           versionStore.deleteSplitDocuments(GC_TYPES,
> oldestRevTimeStamp,
> >> stats);
> >>       }
> >>
> >> -    private void collectDeletedDocuments(VersionGCStats stats, Revision
> >> headRevision, long oldestRevTimeStamp) {
> >> -        List<String> docIdsToDelete = new ArrayList<String>();
> >> -        Iterable<NodeDocument> itr =
> >> versionStore.getPossiblyDeletedDocs(oldestRevTimeStamp);
> >> +    private void collectDeletedDocuments(VersionGCStats stats, Revision
> >> headRevision, long oldestRevTimeStamp)
> >> +            throws IOException {
> >> +        StringSort docIdsToDelete = new
> >> StringSort(overflowToDiskThreshold, NodeDocumentIdComparator.INSTANCE);
> >>           try {
> >> -            for (NodeDocument doc : itr) {
> >> -                //Check if node is actually deleted at current revision
> >> -                //As node is not modified since oldestRevTimeStamp then
> >> -                //this node has not be revived again in past
> >> maxRevisionAge
> >> -                //So deleting it is safe
> >> -                if (doc.getNodeAtRevision(nodeStore, headRevision,
> null)
> >> == null) {
> >> -                    docIdsToDelete.add(doc.getId());
> >> -                    //Collect id of all previous docs also
> >> -                    for (NodeDocument prevDoc :
> >> ImmutableList.copyOf(doc.getAllPreviousDocs())) {
> >> -                        docIdsToDelete.add(prevDoc.getId());
> >> +            Iterable<NodeDocument> itr =
> >> versionStore.getPossiblyDeletedDocs(oldestRevTimeStamp);
> >> +            try {
> >> +                for (NodeDocument doc : itr) {
> >> +                    //Check if node is actually deleted at current
> >> revision
> >> +                    //As node is not modified since oldestRevTimeStamp
> >> then
> >> +                    //this node has not be revived again in past
> >> maxRevisionAge
> >> +                    //So deleting it is safe
> >> +                    if (doc.getNodeAtRevision(nodeStore, headRevision,
> >> null) == null) {
> >> +                        docIdsToDelete.add(doc.getId());
> >> +                        //Collect id of all previous docs also
> >> +                        for (NodeDocument prevDoc :
> >> ImmutableList.copyOf(doc.getAllPreviousDocs())) {
> >> +                            docIdsToDelete.add(prevDoc.getId());
> >> +                        }
> >>                       }
> >>                   }
> >> +            } finally {
> >> +                Utils.closeIfCloseable(itr);
> >> +            }
> >> +
> >> +            if (docIdsToDelete.isEmpty()){
> >> +                return;
> >>               }
> >> -        } finally {
> >> -            Utils.closeIfCloseable(itr);
> >> -        }
> >>
> >> -        Collections.sort(docIdsToDelete,
> >> NodeDocumentIdComparator.INSTANCE);
> >> +            docIdsToDelete.sort();
> >> +            log.info("Proceeding to delete [{}] documents",
> >> docIdsToDelete.getSize());
> >>
> >> -        if(log.isDebugEnabled()) {
> >> -            StringBuilder sb = new StringBuilder("Deleted document with
> >> following ids were deleted as part of GC \n");
> >> -
> >> Joiner.on(StandardSystemProperty.LINE_SEPARATOR.value()).appendTo(sb,
> >> docIdsToDelete);
> >> -            log.debug(sb.toString());
> >> +            if (log.isDebugEnabled() && docIdsToDelete.getSize() <
> 1000)
> >> {
> >> +                StringBuilder sb = new StringBuilder("Deleted document
> >> with following ids were deleted as part of GC \n");
> >> +
> >> Joiner.on(StandardSystemProperty.LINE_SEPARATOR.value()).appendTo(sb,
> >> docIdsToDelete.getIds());
> >> +                log.debug(sb.toString());
> >> +            }
> >> +
> >> +            Iterator<List<String>> idListItr =
> >> partition(docIdsToDelete.getIds(), DELETE_BATCH_SIZE);
> >> +            while (idListItr.hasNext()) {
> >> +                nodeStore.getDocumentStore().remove(Collection.NODES,
> >> idListItr.next());
> >> +            }
> >> +
> >> +            nodeStore.invalidateDocChildrenCache();
> >> +            stats.deletedDocGCCount += docIdsToDelete.getSize();
> >> +        } finally {
> >> +            docIdsToDelete.close();
> >>           }
> >> -        nodeStore.getDocumentStore().remove(Collection.NODES,
> >> docIdsToDelete);
> >> -        nodeStore.invalidateDocChildrenCache();
> >> -        stats.deletedDocGCCount += docIdsToDelete.size();
> >>       }
> >>
> >>       public static class VersionGCStats {
> >>
> >> Modified:
> >>
> jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCDeletionTest.java
> >> URL:
> >>
> http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCDeletionTest.java?rev=1666220&r1=1666219&r2=1666220&view=diff
> >>
> >>
> ==============================================================================
> >> ---
> >>
> jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCDeletionTest.java
> >> (original)
> >> +++
> >>
> jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCDeletionTest.java
> >> Thu Mar 12 15:22:46 2015
> >> @@ -36,6 +36,7 @@ import org.junit.Before;
> >>   import org.junit.Test;
> >>
> >>   import static java.util.concurrent.TimeUnit.HOURS;
> >> +import static org.junit.Assert.assertEquals;
> >>   import static org.junit.Assert.assertNull;
> >>   import static org.junit.Assert.fail;
> >>
> >> @@ -104,6 +105,53 @@ public class VersionGCDeletionTest {
> >>           assertNull(ts.find(Collection.NODES, "1:/x"));
> >>       }
> >>
> >> +    @Test
> >> +    public void deleteLargeNumber() throws Exception{
> >> +        int noOfDocsToDelete = 10000;
> >> +        DocumentStore ts = new MemoryDocumentStore();
> >> +        store = new DocumentMK.Builder()
> >> +                .clock(clock)
> >> +                .setDocumentStore(new MemoryDocumentStore())
> >> +                .setAsyncDelay(0)
> >> +                .getNodeStore();
> >> +
> >> +        //Baseline the clock
> >> +        clock.waitUntil(Revision.getCurrentTimestamp());
> >> +
> >> +        NodeBuilder b1 = store.getRoot().builder();
> >> +        NodeBuilder xb = b1.child("x");
> >> +        for (int i = 0; i < noOfDocsToDelete; i++){
> >> +            xb.child("a"+i).child("b"+i);
> >> +        }
> >> +        store.merge(b1, EmptyHook.INSTANCE, CommitInfo.EMPTY);
> >> +
> >> +        long maxAge = 1; //hours
> >> +        long delta = TimeUnit.MINUTES.toMillis(10);
> >> +
> >> +        //Remove x/y
> >> +        NodeBuilder b2 = store.getRoot().builder();
> >> +        b2.child("x").remove();
> >> +        store.merge(b2, EmptyHook.INSTANCE, CommitInfo.EMPTY);
> >> +
> >> +        store.runBackgroundOperations();
> >> +
> >> +        //3. Check that deleted doc does get collected post maxAge
> >> +        clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) +
> >> delta);
> >> +        VersionGarbageCollector gc =
> store.getVersionGarbageCollector();
> >> +        gc.setOverflowToDiskThreshold(100);
> >> +
> >> +        VersionGarbageCollector.VersionGCStats stats = gc.gc(maxAge *
> 2,
> >> HOURS);
> >> +        assertEquals(noOfDocsToDelete * 2 + 1,
> stats.deletedDocGCCount);
> >> +
> >> +
> >> +        assertNull(ts.find(Collection.NODES, "1:/x"));
> >> +
> >> +        for (int i = 0; i < noOfDocsToDelete; i++){
> >> +            assertNull(ts.find(Collection.NODES, "2:/a"+i+"/b"+i));
> >> +            assertNull(ts.find(Collection.NODES, "1:/a"+i));
> >> +        }
> >> +    }
> >> +
> >>       private static class TestDocumentStore extends
> MemoryDocumentStore {
> >>           boolean throwException;
> >>           @Override
> >>
> >> Modified:
> >>
> jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCWithSplitTest.java
> >> URL:
> >>
> http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCWithSplitTest.java?rev=1666220&r1=1666219&r2=1666220&view=diff
> >>
> >>
> ==============================================================================
> >> ---
> >>
> jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCWithSplitTest.java
> >> (original)
> >> +++
> >>
> jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCWithSplitTest.java
> >> Thu Mar 12 15:22:46 2015
> >> @@ -137,7 +137,11 @@ public class VersionGCWithSplitTest {
> >>           Thread t = new Thread(new Runnable() {
> >>               @Override
> >>               public void run() {
> >> -                stats.set(gc.gc(1, HOURS));
> >> +                try {
> >> +                    stats.set(gc.gc(1, HOURS));
> >> +                } catch (IOException e) {
> >> +                    throw new RuntimeException(e);
> >> +                }
> >>               }
> >>           });
> >>
> >>
> >>
> >>
> >
>

Re: svn commit: r1666220 - in /jackrabbit/oak/trunk: oak-commons/ oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/ oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/ oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/d...

Posted by Chetan Mehrotra <ch...@gmail.com>.
Looks like Closer closes the closeables in LIFO manner due to which
directory containing that file got deleted first. I have change the
logic now.

Let me know if the test passes for you on Windows
Chetan Mehrotra


On Thu, Mar 12, 2015 at 10:21 PM, Julian Reschke <ju...@gmx.de> wrote:
> With this change, I get a reliable test failure on Windows:
>
>
> Tests in error:
>
> overflowToDisk(org.apache.jackrabbit.oak.commons.sort.StringSortTest):
> Unable to delete file: C:\tmp\oak-sorter-1426178913437-0\strings-sorted.txt
>
>
> Best regards, Julian
>
>
> On 2015-03-12 16:22, chetanm@apache.org wrote:
>>
>> Author: chetanm
>> Date: Thu Mar 12 15:22:46 2015
>> New Revision: 1666220
>>
>> URL: http://svn.apache.org/r1666220
>> Log:
>> OAK-2557 - VersionGC uses way too much memory if there is a large pile of
>> garbage
>>
>> Added:
>>
>> jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java
>> (with props)
>>
>> jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/StringSortTest.java
>> (with props)
>> Modified:
>>      jackrabbit/oak/trunk/oak-commons/pom.xml
>>
>> jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java
>>
>> jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
>>
>> jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCDeletionTest.java
>>
>> jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCWithSplitTest.java
>>
>> Modified: jackrabbit/oak/trunk/oak-commons/pom.xml
>> URL:
>> http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/pom.xml?rev=1666220&r1=1666219&r2=1666220&view=diff
>>
>> ==============================================================================
>> --- jackrabbit/oak/trunk/oak-commons/pom.xml (original)
>> +++ jackrabbit/oak/trunk/oak-commons/pom.xml Thu Mar 12 15:22:46 2015
>> @@ -93,6 +93,11 @@
>>         <artifactId>oak-mk-api</artifactId>
>>         <version>${project.version}</version>
>>       </dependency>
>> +    <dependency>
>> +      <groupId>commons-io</groupId>
>> +      <artifactId>commons-io</artifactId>
>> +      <version>2.4</version>
>> +    </dependency>
>>
>>       <!-- Test dependencies -->
>>       <dependency>
>>
>> Added:
>> jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java
>> URL:
>> http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java?rev=1666220&view=auto
>>
>> ==============================================================================
>> ---
>> jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java
>> (added)
>> +++
>> jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java
>> Thu Mar 12 15:22:46 2015
>> @@ -0,0 +1,255 @@
>> +/*
>> + * Licensed to the Apache Software Foundation (ASF) under one
>> + * or more contributor license agreements.  See the NOTICE file
>> + * distributed with this work for additional information
>> + * regarding copyright ownership.  The ASF licenses this file
>> + * to you under the Apache License, Version 2.0 (the
>> + * "License"); you may not use this file except in compliance
>> + * with the License.  You may obtain a copy of the License at
>> + *
>> + *   http://www.apache.org/licenses/LICENSE-2.0
>> + *
>> + * Unless required by applicable law or agreed to in writing,
>> + * software distributed under the License is distributed on an
>> + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
>> + * KIND, either express or implied.  See the License for the
>> + * specific language governing permissions and limitations
>> + * under the License.
>> + */
>> +
>> +package org.apache.jackrabbit.oak.commons.sort;
>> +
>> +import java.io.BufferedWriter;
>> +import java.io.Closeable;
>> +import java.io.File;
>> +import java.io.FileNotFoundException;
>> +import java.io.IOException;
>> +import java.io.Reader;
>> +import java.nio.charset.Charset;
>> +import java.util.Collections;
>> +import java.util.Comparator;
>> +import java.util.Iterator;
>> +import java.util.List;
>> +
>> +import com.google.common.base.Charsets;
>> +import com.google.common.collect.Lists;
>> +import com.google.common.io.Closer;
>> +import com.google.common.io.Files;
>> +import org.apache.commons.io.FileUtils;
>> +import org.apache.commons.io.LineIterator;
>> +import org.slf4j.Logger;
>> +import org.slf4j.LoggerFactory;
>> +
>> +/**
>> + * Utility class to store a list of string and perform sort on that. For
>> small size
>> + * the list would be maintained in memory. If the size crosses the
>> required threshold then
>> + * the sorting would be performed externally
>> + */
>> +public class StringSort implements Closeable {
>> +    private final Logger log = LoggerFactory.getLogger(getClass());
>> +    public static final int BATCH_SIZE = 2048;
>> +
>> +    private final int overflowToDiskThreshold;
>> +    private final Comparator<String> comparator;
>> +
>> +    private final List<String> ids = Lists.newArrayList();
>> +    private long size;
>> +
>> +    private final List<String> inMemBatch = Lists.newArrayList();
>> +
>> +    private boolean useFile;
>> +    private PersistentState persistentState;
>> +
>> +    public StringSort(int overflowToDiskThreshold, Comparator<String>
>> comparator) {
>> +        this.overflowToDiskThreshold = overflowToDiskThreshold;
>> +        this.comparator = comparator;
>> +    }
>> +
>> +    public void add(String id) throws IOException {
>> +        if (useFile) {
>> +            addToBatch(id);
>> +        } else {
>> +            ids.add(id);
>> +            if (ids.size() >= overflowToDiskThreshold) {
>> +                flushToFile(ids);
>> +                useFile = true;
>> +                log.debug("In memory buffer crossed the threshold of {}.
>> " +
>> +                        "Switching to filesystem [{}] to manage the
>> state", overflowToDiskThreshold, persistentState);
>> +            }
>> +        }
>> +        size++;
>> +    }
>> +
>> +    public void sort() throws IOException {
>> +        if (useFile) {
>> +            //Flush the last batch
>> +            flushToFile(inMemBatch);
>> +            persistentState.sort();
>> +        } else {
>> +            Collections.sort(ids, comparator);
>> +        }
>> +    }
>> +
>> +    public Iterator<String> getIds() throws IOException {
>> +        if (useFile) {
>> +            return persistentState.getIterator();
>> +        } else {
>> +            return ids.iterator();
>> +        }
>> +    }
>> +
>> +    public long getSize() {
>> +        return size;
>> +    }
>> +
>> +    public boolean isEmpty() {
>> +        return size == 0;
>> +    }
>> +
>> +    public boolean usingFile() {
>> +        return useFile;
>> +    }
>> +
>> +    @Override
>> +    public void close() throws IOException {
>> +        if (persistentState != null) {
>> +            persistentState.close();
>> +        }
>> +    }
>> +
>> +    private void addToBatch(String id) throws IOException {
>> +        inMemBatch.add(id);
>> +        if (inMemBatch.size() >= BATCH_SIZE) {
>> +            flushToFile(inMemBatch);
>> +        }
>> +    }
>> +
>> +    private void flushToFile(List<String> ids) throws IOException {
>> +        BufferedWriter w = getPersistentState().getWriter();
>> +        for (String id : ids) {
>> +            w.write(id);
>> +            w.newLine();
>> +        }
>> +        ids.clear();
>> +    }
>> +
>> +    private PersistentState getPersistentState() {
>> +        //Lazily initialize the persistent state
>> +        if (persistentState == null) {
>> +            persistentState = new PersistentState(comparator);
>> +        }
>> +        return persistentState;
>> +    }
>> +
>> +    private static class PersistentState implements Closeable {
>> +        /**
>> +         * Maximum loop count when creating temp directories.
>> +         */
>> +        private static final int TEMP_DIR_ATTEMPTS = 10000;
>> +
>> +        private final Charset charset = Charsets.UTF_8;
>> +        private final File workDir;
>> +        private final Comparator<String> comparator;
>> +        private File idFile;
>> +        private File sortedFile;
>> +        private BufferedWriter writer;
>> +        private List<CloseableIterator> openedIterators =
>> Lists.newArrayList();
>> +
>> +        public PersistentState(Comparator<String> comparator) {
>> +            this(comparator, createTempDir("oak-sorter-"));
>> +        }
>> +
>> +        public PersistentState(Comparator<String> comparator, File
>> workDir) {
>> +            this.workDir = workDir;
>> +            this.comparator = comparator;
>> +        }
>> +
>> +        public BufferedWriter getWriter() throws FileNotFoundException {
>> +            if (idFile == null) {
>> +                idFile = new File(workDir, "strings.txt");
>> +                sortedFile = new File(workDir, "strings-sorted.txt");
>> +                writer = Files.newWriter(idFile, charset);
>> +            }
>> +            return writer;
>> +        }
>> +
>> +        public void sort() throws IOException {
>> +            closeWriter();
>> +
>> +            List<File> sortedFiles = ExternalSort.sortInBatch(idFile,
>> +                    comparator, //Comparator to use
>> +                    ExternalSort.DEFAULTMAXTEMPFILES,
>> +                    ExternalSort.DEFAULT_MAX_MEM_BYTES,
>> +                    charset, //charset
>> +                    workDir,  //temp directory where intermediate files
>> are created
>> +                    true //distinct
>> +            );
>> +
>> +            ExternalSort.mergeSortedFiles(sortedFiles,
>> +                    sortedFile,
>> +                    comparator,
>> +                    charset,
>> +                    true
>> +            );
>> +        }
>> +
>> +        public Iterator<String> getIterator() throws IOException {
>> +            CloseableIterator itr = new
>> CloseableIterator(Files.newReader(sortedFile, charset));
>> +            openedIterators.add(itr);
>> +            return itr;
>> +        }
>> +
>> +        @Override
>> +        public String toString() {
>> +            return "PersistentState : workDir=" +
>> workDir.getAbsolutePath();
>> +        }
>> +
>> +        @Override
>> +        public void close() throws IOException {
>> +            Closer closer = Closer.create();
>> +            try {
>> +                closer.register(writer);
>> +                for (CloseableIterator citr : openedIterators) {
>> +                    closer.register(citr);
>> +                }
>> +                closer.register(new Closeable() {
>> +                    @Override
>> +                    public void close() throws IOException {
>> +                        FileUtils.deleteDirectory(workDir);
>> +                    }
>> +                });
>> +            } finally {
>> +                closer.close();
>> +            }
>> +        }
>> +
>> +        private void closeWriter() throws IOException {
>> +            writer.close();
>> +        }
>> +
>> +        /**
>> +         * Taken from com.google.common.io.Files#createTempDir()
>> +         * Modified to provide a prefix
>> +         */
>> +        private static File createTempDir(String prefix) {
>> +            File baseDir = new
>> File(System.getProperty("java.io.tmpdir"));
>> +            String baseName = System.currentTimeMillis() + "-";
>> +
>> +            for (int counter = 0; counter < TEMP_DIR_ATTEMPTS; counter++)
>> {
>> +                File tempDir = new File(baseDir, prefix + baseName +
>> counter);
>> +                if (tempDir.mkdir()) {
>> +                    return tempDir;
>> +                }
>> +            }
>> +            throw new IllegalStateException("Failed to create directory
>> within "
>> +                    + TEMP_DIR_ATTEMPTS + " attempts (tried "
>> +                    + baseName + "0 to " + baseName + (TEMP_DIR_ATTEMPTS
>> - 1) + ')');
>> +        }
>> +    }
>> +
>> +    private static class CloseableIterator extends LineIterator
>> implements Closeable {
>> +        public CloseableIterator(Reader reader) throws
>> IllegalArgumentException {
>> +            super(reader);
>> +        }
>> +    }
>> +}
>>
>> Propchange:
>> jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java
>>
>> ------------------------------------------------------------------------------
>>      svn:eol-style = native
>>
>> Added:
>> jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/StringSortTest.java
>> URL:
>> http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/StringSortTest.java?rev=1666220&view=auto
>>
>> ==============================================================================
>> ---
>> jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/StringSortTest.java
>> (added)
>> +++
>> jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/StringSortTest.java
>> Thu Mar 12 15:22:46 2015
>> @@ -0,0 +1,144 @@
>> +/*
>> + * Licensed to the Apache Software Foundation (ASF) under one
>> + * or more contributor license agreements.  See the NOTICE file
>> + * distributed with this work for additional information
>> + * regarding copyright ownership.  The ASF licenses this file
>> + * to you under the Apache License, Version 2.0 (the
>> + * "License"); you may not use this file except in compliance
>> + * with the License.  You may obtain a copy of the License at
>> + *
>> + *   http://www.apache.org/licenses/LICENSE-2.0
>> + *
>> + * Unless required by applicable law or agreed to in writing,
>> + * software distributed under the License is distributed on an
>> + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
>> + * KIND, either express or implied.  See the License for the
>> + * specific language governing permissions and limitations
>> + * under the License.
>> + */
>> +
>> +package org.apache.jackrabbit.oak.commons.sort;
>> +
>> +import java.io.IOException;
>> +import java.io.Serializable;
>> +import java.util.ArrayList;
>> +import java.util.Arrays;
>> +import java.util.Collections;
>> +import java.util.Comparator;
>> +import java.util.HashSet;
>> +import java.util.List;
>> +import java.util.Set;
>> +
>> +import com.google.common.base.Joiner;
>> +import com.google.common.collect.Collections2;
>> +import com.google.common.collect.ImmutableList;
>> +import org.junit.Test;
>> +
>> +import static org.junit.Assert.assertEquals;
>> +import static org.junit.Assert.assertFalse;
>> +import static org.junit.Assert.assertTrue;
>> +
>> +public class StringSortTest {
>> +    private Comparator<String> comparator = new PathComparator();
>> +    private StringSort collector;
>> +
>> +    @Test
>> +    public void inMemory() throws Exception{
>> +        List<String> paths = createTestPaths(5, false);
>> +        collector = new StringSort(paths.size() + 1,comparator);
>> +        addPathsToCollector(paths);
>> +
>> +        assertConstraints(paths);
>> +        assertFalse(collector.usingFile());
>> +        collector.close();
>> +    }
>> +
>> +    @Test
>> +    public void overflowToDisk() throws Exception{
>> +        //Create ~50k paths
>> +        List<String> paths = createTestPaths(10, true);
>> +        collector = new StringSort(1000, comparator);
>> +        addPathsToCollector(paths);
>> +
>> +        assertTrue(collector.usingFile());
>> +        assertConstraints(paths);
>> +
>> +        collector.close();
>> +    }
>> +
>> +    private void assertConstraints(List<String> paths) throws IOException
>> {
>> +        assertEquals(paths.size(), collector.getSize());
>> +
>> +        Collections.sort(paths, comparator);
>> +        collector.sort();
>> +
>> +        List<String> sortedPaths =
>> ImmutableList.copyOf(collector.getIds());
>> +        assertEquals(paths.size(), sortedPaths.size());
>> +        assertEquals(paths, sortedPaths);
>> +    }
>> +
>> +    private void addPathsToCollector(Iterable<String> paths) throws
>> IOException {
>> +        for (String path : paths){
>> +            collector.add(path);
>> +        }
>> +    }
>> +
>> +    private static List<String> createTestPaths(int depth, boolean
>> permutation){
>> +        List<String> rootPaths = Arrays.asList("a", "b", "c", "d", "e",
>> "f", "g");
>> +        List<String> paths = new ArrayList<String>();
>> +
>> +
>> +        if (permutation){
>> +            List<String> newRoots = new ArrayList<String>();
>> +            for (List<String> permuts :
>> Collections2.orderedPermutations(rootPaths)){
>> +                newRoots.add(Joiner.on("").join(permuts));
>> +            }
>> +            rootPaths = newRoots;
>> +        }
>> +
>> +        for (String root : rootPaths){
>> +            List<String> pathElements = new ArrayList<String>();
>> +            pathElements.add(root);
>> +            paths.add(createId(pathElements));
>> +            for (int i = 0; i < depth; i++){
>> +                pathElements.add(root + i);
>> +                paths.add(createId(pathElements));
>> +            }
>> +        }
>> +
>> +        Set<String> idSet = new HashSet<String>(paths);
>> +        assertEquals(paths.size(), idSet.size());
>> +
>> +        Collections.shuffle(paths);
>> +        return paths;
>> +    }
>> +
>> +    private static String createId(Iterable<String> pathElements){
>> +        return "/" + Joiner.on('/').join(pathElements);
>> +    }
>> +
>> +    private static  class PathComparator implements Comparator<String>,
>> Serializable {
>> +        @Override
>> +        public int compare(String o1, String o2) {
>> +            int d1 = pathDepth(o1);
>> +            int d2 = pathDepth(o2);
>> +            if (d1 != d2) {
>> +                return Integer.signum(d2 - d1);
>> +            }
>> +            return o1.compareTo(o2);
>> +        }
>> +
>> +        private static int pathDepth(String path) {
>> +            if (path.equals("/")) {
>> +                return 0;
>> +            }
>> +            int depth = 0;
>> +            for (int i = 0; i < path.length(); i++) {
>> +                if (path.charAt(i) == '/') {
>> +                    depth++;
>> +                }
>> +            }
>> +            return depth;
>> +        }
>> +    }
>> +}
>>
>> Propchange:
>> jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/StringSortTest.java
>>
>> ------------------------------------------------------------------------------
>>      svn:eol-style = native
>>
>> Modified:
>> jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java
>> URL:
>> http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java?rev=1666220&r1=1666219&r2=1666220&view=diff
>>
>> ==============================================================================
>> ---
>> jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java
>> (original)
>> +++
>> jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java
>> Thu Mar 12 15:22:46 2015
>> @@ -574,7 +574,11 @@ public class DocumentNodeStoreService {
>>           RevisionGC revisionGC = new RevisionGC(new Runnable() {
>>               @Override
>>               public void run() {
>> -
>> store.getVersionGarbageCollector().gc(versionGcMaxAgeInSecs,
>> TimeUnit.SECONDS);
>> +                try {
>> +
>> store.getVersionGarbageCollector().gc(versionGcMaxAgeInSecs,
>> TimeUnit.SECONDS);
>> +                } catch (IOException e) {
>> +                    log.warn("Error occurred while executing the Version
>> Garbage Collector", e);
>> +                }
>>               }
>>           }, executor);
>>           registrations.add(registerMBean(whiteboard,
>> RevisionGCMBean.class, revisionGC,
>>
>> Modified:
>> jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
>> URL:
>> http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java?rev=1666220&r1=1666219&r2=1666220&view=diff
>>
>> ==============================================================================
>> ---
>> jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
>> (original)
>> +++
>> jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
>> Thu Mar 12 15:22:46 2015
>> @@ -19,9 +19,9 @@
>>
>>   package org.apache.jackrabbit.oak.plugins.document;
>>
>> -import java.util.ArrayList;
>> -import java.util.Collections;
>> +import java.io.IOException;
>>   import java.util.EnumSet;
>> +import java.util.Iterator;
>>   import java.util.List;
>>   import java.util.Set;
>>   import java.util.concurrent.TimeUnit;
>> @@ -31,18 +31,23 @@ import com.google.common.base.StandardSy
>>   import com.google.common.base.Stopwatch;
>>   import com.google.common.collect.ImmutableList;
>>
>> +import org.apache.jackrabbit.oak.commons.sort.StringSort;
>>   import org.apache.jackrabbit.oak.plugins.document.util.Utils;
>>   import org.slf4j.Logger;
>>   import org.slf4j.LoggerFactory;
>>
>> +import static com.google.common.collect.Iterators.partition;
>>   import static
>> org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType.COMMIT_ROOT_ONLY;
>>   import static
>> org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType.DEFAULT_LEAF;
>>
>>   public class VersionGarbageCollector {
>> +    //Kept less than MongoDocumentStore.IN_CLAUSE_BATCH_SIZE to avoid
>> re-partitioning
>> +    private static final int DELETE_BATCH_SIZE = 450;
>>       private final DocumentNodeStore nodeStore;
>>       private final VersionGCSupport versionStore;
>> +    private int overflowToDiskThreshold = 100000;
>>
>> -    private final Logger log = LoggerFactory.getLogger(getClass());
>> +    private static final Logger log =
>> LoggerFactory.getLogger(VersionGarbageCollector.class);
>>
>>       /**
>>        * Split document types which can be safely garbage collected
>> @@ -56,7 +61,7 @@ public class VersionGarbageCollector {
>>           this.versionStore = gcSupport;
>>       }
>>
>> -    public VersionGCStats gc(long maxRevisionAge, TimeUnit unit) {
>> +    public VersionGCStats gc(long maxRevisionAge, TimeUnit unit) throws
>> IOException {
>>           long maxRevisionAgeInMillis = unit.toMillis(maxRevisionAge);
>>           Stopwatch sw = Stopwatch.createStarted();
>>           VersionGCStats stats = new VersionGCStats();
>> @@ -85,41 +90,60 @@ public class VersionGarbageCollector {
>>           return stats;
>>       }
>>
>> +    public void setOverflowToDiskThreshold(int overflowToDiskThreshold) {
>> +        this.overflowToDiskThreshold = overflowToDiskThreshold;
>> +    }
>> +
>>       private void collectSplitDocuments(VersionGCStats stats, long
>> oldestRevTimeStamp) {
>>           versionStore.deleteSplitDocuments(GC_TYPES, oldestRevTimeStamp,
>> stats);
>>       }
>>
>> -    private void collectDeletedDocuments(VersionGCStats stats, Revision
>> headRevision, long oldestRevTimeStamp) {
>> -        List<String> docIdsToDelete = new ArrayList<String>();
>> -        Iterable<NodeDocument> itr =
>> versionStore.getPossiblyDeletedDocs(oldestRevTimeStamp);
>> +    private void collectDeletedDocuments(VersionGCStats stats, Revision
>> headRevision, long oldestRevTimeStamp)
>> +            throws IOException {
>> +        StringSort docIdsToDelete = new
>> StringSort(overflowToDiskThreshold, NodeDocumentIdComparator.INSTANCE);
>>           try {
>> -            for (NodeDocument doc : itr) {
>> -                //Check if node is actually deleted at current revision
>> -                //As node is not modified since oldestRevTimeStamp then
>> -                //this node has not be revived again in past
>> maxRevisionAge
>> -                //So deleting it is safe
>> -                if (doc.getNodeAtRevision(nodeStore, headRevision, null)
>> == null) {
>> -                    docIdsToDelete.add(doc.getId());
>> -                    //Collect id of all previous docs also
>> -                    for (NodeDocument prevDoc :
>> ImmutableList.copyOf(doc.getAllPreviousDocs())) {
>> -                        docIdsToDelete.add(prevDoc.getId());
>> +            Iterable<NodeDocument> itr =
>> versionStore.getPossiblyDeletedDocs(oldestRevTimeStamp);
>> +            try {
>> +                for (NodeDocument doc : itr) {
>> +                    //Check if node is actually deleted at current
>> revision
>> +                    //As node is not modified since oldestRevTimeStamp
>> then
>> +                    //this node has not be revived again in past
>> maxRevisionAge
>> +                    //So deleting it is safe
>> +                    if (doc.getNodeAtRevision(nodeStore, headRevision,
>> null) == null) {
>> +                        docIdsToDelete.add(doc.getId());
>> +                        //Collect id of all previous docs also
>> +                        for (NodeDocument prevDoc :
>> ImmutableList.copyOf(doc.getAllPreviousDocs())) {
>> +                            docIdsToDelete.add(prevDoc.getId());
>> +                        }
>>                       }
>>                   }
>> +            } finally {
>> +                Utils.closeIfCloseable(itr);
>> +            }
>> +
>> +            if (docIdsToDelete.isEmpty()){
>> +                return;
>>               }
>> -        } finally {
>> -            Utils.closeIfCloseable(itr);
>> -        }
>>
>> -        Collections.sort(docIdsToDelete,
>> NodeDocumentIdComparator.INSTANCE);
>> +            docIdsToDelete.sort();
>> +            log.info("Proceeding to delete [{}] documents",
>> docIdsToDelete.getSize());
>>
>> -        if(log.isDebugEnabled()) {
>> -            StringBuilder sb = new StringBuilder("Deleted document with
>> following ids were deleted as part of GC \n");
>> -
>> Joiner.on(StandardSystemProperty.LINE_SEPARATOR.value()).appendTo(sb,
>> docIdsToDelete);
>> -            log.debug(sb.toString());
>> +            if (log.isDebugEnabled() && docIdsToDelete.getSize() < 1000)
>> {
>> +                StringBuilder sb = new StringBuilder("Deleted document
>> with following ids were deleted as part of GC \n");
>> +
>> Joiner.on(StandardSystemProperty.LINE_SEPARATOR.value()).appendTo(sb,
>> docIdsToDelete.getIds());
>> +                log.debug(sb.toString());
>> +            }
>> +
>> +            Iterator<List<String>> idListItr =
>> partition(docIdsToDelete.getIds(), DELETE_BATCH_SIZE);
>> +            while (idListItr.hasNext()) {
>> +                nodeStore.getDocumentStore().remove(Collection.NODES,
>> idListItr.next());
>> +            }
>> +
>> +            nodeStore.invalidateDocChildrenCache();
>> +            stats.deletedDocGCCount += docIdsToDelete.getSize();
>> +        } finally {
>> +            docIdsToDelete.close();
>>           }
>> -        nodeStore.getDocumentStore().remove(Collection.NODES,
>> docIdsToDelete);
>> -        nodeStore.invalidateDocChildrenCache();
>> -        stats.deletedDocGCCount += docIdsToDelete.size();
>>       }
>>
>>       public static class VersionGCStats {
>>
>> Modified:
>> jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCDeletionTest.java
>> URL:
>> http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCDeletionTest.java?rev=1666220&r1=1666219&r2=1666220&view=diff
>>
>> ==============================================================================
>> ---
>> jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCDeletionTest.java
>> (original)
>> +++
>> jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCDeletionTest.java
>> Thu Mar 12 15:22:46 2015
>> @@ -36,6 +36,7 @@ import org.junit.Before;
>>   import org.junit.Test;
>>
>>   import static java.util.concurrent.TimeUnit.HOURS;
>> +import static org.junit.Assert.assertEquals;
>>   import static org.junit.Assert.assertNull;
>>   import static org.junit.Assert.fail;
>>
>> @@ -104,6 +105,53 @@ public class VersionGCDeletionTest {
>>           assertNull(ts.find(Collection.NODES, "1:/x"));
>>       }
>>
>> +    @Test
>> +    public void deleteLargeNumber() throws Exception{
>> +        int noOfDocsToDelete = 10000;
>> +        DocumentStore ts = new MemoryDocumentStore();
>> +        store = new DocumentMK.Builder()
>> +                .clock(clock)
>> +                .setDocumentStore(new MemoryDocumentStore())
>> +                .setAsyncDelay(0)
>> +                .getNodeStore();
>> +
>> +        //Baseline the clock
>> +        clock.waitUntil(Revision.getCurrentTimestamp());
>> +
>> +        NodeBuilder b1 = store.getRoot().builder();
>> +        NodeBuilder xb = b1.child("x");
>> +        for (int i = 0; i < noOfDocsToDelete; i++){
>> +            xb.child("a"+i).child("b"+i);
>> +        }
>> +        store.merge(b1, EmptyHook.INSTANCE, CommitInfo.EMPTY);
>> +
>> +        long maxAge = 1; //hours
>> +        long delta = TimeUnit.MINUTES.toMillis(10);
>> +
>> +        //Remove x/y
>> +        NodeBuilder b2 = store.getRoot().builder();
>> +        b2.child("x").remove();
>> +        store.merge(b2, EmptyHook.INSTANCE, CommitInfo.EMPTY);
>> +
>> +        store.runBackgroundOperations();
>> +
>> +        //3. Check that deleted doc does get collected post maxAge
>> +        clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) +
>> delta);
>> +        VersionGarbageCollector gc = store.getVersionGarbageCollector();
>> +        gc.setOverflowToDiskThreshold(100);
>> +
>> +        VersionGarbageCollector.VersionGCStats stats = gc.gc(maxAge * 2,
>> HOURS);
>> +        assertEquals(noOfDocsToDelete * 2 + 1, stats.deletedDocGCCount);
>> +
>> +
>> +        assertNull(ts.find(Collection.NODES, "1:/x"));
>> +
>> +        for (int i = 0; i < noOfDocsToDelete; i++){
>> +            assertNull(ts.find(Collection.NODES, "2:/a"+i+"/b"+i));
>> +            assertNull(ts.find(Collection.NODES, "1:/a"+i));
>> +        }
>> +    }
>> +
>>       private static class TestDocumentStore extends MemoryDocumentStore {
>>           boolean throwException;
>>           @Override
>>
>> Modified:
>> jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCWithSplitTest.java
>> URL:
>> http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCWithSplitTest.java?rev=1666220&r1=1666219&r2=1666220&view=diff
>>
>> ==============================================================================
>> ---
>> jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCWithSplitTest.java
>> (original)
>> +++
>> jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCWithSplitTest.java
>> Thu Mar 12 15:22:46 2015
>> @@ -137,7 +137,11 @@ public class VersionGCWithSplitTest {
>>           Thread t = new Thread(new Runnable() {
>>               @Override
>>               public void run() {
>> -                stats.set(gc.gc(1, HOURS));
>> +                try {
>> +                    stats.set(gc.gc(1, HOURS));
>> +                } catch (IOException e) {
>> +                    throw new RuntimeException(e);
>> +                }
>>               }
>>           });
>>
>>
>>
>>
>

Re: svn commit: r1666220 - in /jackrabbit/oak/trunk: oak-commons/ oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/ oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/ oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/d...

Posted by Julian Reschke <ju...@gmx.de>.
With this change, I get a reliable test failure on Windows:


Tests in error:
 
overflowToDisk(org.apache.jackrabbit.oak.commons.sort.StringSortTest): 
Unable to delete file: C:\tmp\oak-sorter-1426178913437-0\strings-sorted.txt


Best regards, Julian

On 2015-03-12 16:22, chetanm@apache.org wrote:
> Author: chetanm
> Date: Thu Mar 12 15:22:46 2015
> New Revision: 1666220
>
> URL: http://svn.apache.org/r1666220
> Log:
> OAK-2557 - VersionGC uses way too much memory if there is a large pile of garbage
>
> Added:
>      jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java   (with props)
>      jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/StringSortTest.java   (with props)
> Modified:
>      jackrabbit/oak/trunk/oak-commons/pom.xml
>      jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java
>      jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
>      jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCDeletionTest.java
>      jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCWithSplitTest.java
>
> Modified: jackrabbit/oak/trunk/oak-commons/pom.xml
> URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/pom.xml?rev=1666220&r1=1666219&r2=1666220&view=diff
> ==============================================================================
> --- jackrabbit/oak/trunk/oak-commons/pom.xml (original)
> +++ jackrabbit/oak/trunk/oak-commons/pom.xml Thu Mar 12 15:22:46 2015
> @@ -93,6 +93,11 @@
>         <artifactId>oak-mk-api</artifactId>
>         <version>${project.version}</version>
>       </dependency>
> +    <dependency>
> +      <groupId>commons-io</groupId>
> +      <artifactId>commons-io</artifactId>
> +      <version>2.4</version>
> +    </dependency>
>
>       <!-- Test dependencies -->
>       <dependency>
>
> Added: jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java
> URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java?rev=1666220&view=auto
> ==============================================================================
> --- jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java (added)
> +++ jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java Thu Mar 12 15:22:46 2015
> @@ -0,0 +1,255 @@
> +/*
> + * Licensed to the Apache Software Foundation (ASF) under one
> + * or more contributor license agreements.  See the NOTICE file
> + * distributed with this work for additional information
> + * regarding copyright ownership.  The ASF licenses this file
> + * to you under the Apache License, Version 2.0 (the
> + * "License"); you may not use this file except in compliance
> + * with the License.  You may obtain a copy of the License at
> + *
> + *   http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing,
> + * software distributed under the License is distributed on an
> + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
> + * KIND, either express or implied.  See the License for the
> + * specific language governing permissions and limitations
> + * under the License.
> + */
> +
> +package org.apache.jackrabbit.oak.commons.sort;
> +
> +import java.io.BufferedWriter;
> +import java.io.Closeable;
> +import java.io.File;
> +import java.io.FileNotFoundException;
> +import java.io.IOException;
> +import java.io.Reader;
> +import java.nio.charset.Charset;
> +import java.util.Collections;
> +import java.util.Comparator;
> +import java.util.Iterator;
> +import java.util.List;
> +
> +import com.google.common.base.Charsets;
> +import com.google.common.collect.Lists;
> +import com.google.common.io.Closer;
> +import com.google.common.io.Files;
> +import org.apache.commons.io.FileUtils;
> +import org.apache.commons.io.LineIterator;
> +import org.slf4j.Logger;
> +import org.slf4j.LoggerFactory;
> +
> +/**
> + * Utility class to store a list of string and perform sort on that. For small size
> + * the list would be maintained in memory. If the size crosses the required threshold then
> + * the sorting would be performed externally
> + */
> +public class StringSort implements Closeable {
> +    private final Logger log = LoggerFactory.getLogger(getClass());
> +    public static final int BATCH_SIZE = 2048;
> +
> +    private final int overflowToDiskThreshold;
> +    private final Comparator<String> comparator;
> +
> +    private final List<String> ids = Lists.newArrayList();
> +    private long size;
> +
> +    private final List<String> inMemBatch = Lists.newArrayList();
> +
> +    private boolean useFile;
> +    private PersistentState persistentState;
> +
> +    public StringSort(int overflowToDiskThreshold, Comparator<String> comparator) {
> +        this.overflowToDiskThreshold = overflowToDiskThreshold;
> +        this.comparator = comparator;
> +    }
> +
> +    public void add(String id) throws IOException {
> +        if (useFile) {
> +            addToBatch(id);
> +        } else {
> +            ids.add(id);
> +            if (ids.size() >= overflowToDiskThreshold) {
> +                flushToFile(ids);
> +                useFile = true;
> +                log.debug("In memory buffer crossed the threshold of {}. " +
> +                        "Switching to filesystem [{}] to manage the state", overflowToDiskThreshold, persistentState);
> +            }
> +        }
> +        size++;
> +    }
> +
> +    public void sort() throws IOException {
> +        if (useFile) {
> +            //Flush the last batch
> +            flushToFile(inMemBatch);
> +            persistentState.sort();
> +        } else {
> +            Collections.sort(ids, comparator);
> +        }
> +    }
> +
> +    public Iterator<String> getIds() throws IOException {
> +        if (useFile) {
> +            return persistentState.getIterator();
> +        } else {
> +            return ids.iterator();
> +        }
> +    }
> +
> +    public long getSize() {
> +        return size;
> +    }
> +
> +    public boolean isEmpty() {
> +        return size == 0;
> +    }
> +
> +    public boolean usingFile() {
> +        return useFile;
> +    }
> +
> +    @Override
> +    public void close() throws IOException {
> +        if (persistentState != null) {
> +            persistentState.close();
> +        }
> +    }
> +
> +    private void addToBatch(String id) throws IOException {
> +        inMemBatch.add(id);
> +        if (inMemBatch.size() >= BATCH_SIZE) {
> +            flushToFile(inMemBatch);
> +        }
> +    }
> +
> +    private void flushToFile(List<String> ids) throws IOException {
> +        BufferedWriter w = getPersistentState().getWriter();
> +        for (String id : ids) {
> +            w.write(id);
> +            w.newLine();
> +        }
> +        ids.clear();
> +    }
> +
> +    private PersistentState getPersistentState() {
> +        //Lazily initialize the persistent state
> +        if (persistentState == null) {
> +            persistentState = new PersistentState(comparator);
> +        }
> +        return persistentState;
> +    }
> +
> +    private static class PersistentState implements Closeable {
> +        /**
> +         * Maximum loop count when creating temp directories.
> +         */
> +        private static final int TEMP_DIR_ATTEMPTS = 10000;
> +
> +        private final Charset charset = Charsets.UTF_8;
> +        private final File workDir;
> +        private final Comparator<String> comparator;
> +        private File idFile;
> +        private File sortedFile;
> +        private BufferedWriter writer;
> +        private List<CloseableIterator> openedIterators = Lists.newArrayList();
> +
> +        public PersistentState(Comparator<String> comparator) {
> +            this(comparator, createTempDir("oak-sorter-"));
> +        }
> +
> +        public PersistentState(Comparator<String> comparator, File workDir) {
> +            this.workDir = workDir;
> +            this.comparator = comparator;
> +        }
> +
> +        public BufferedWriter getWriter() throws FileNotFoundException {
> +            if (idFile == null) {
> +                idFile = new File(workDir, "strings.txt");
> +                sortedFile = new File(workDir, "strings-sorted.txt");
> +                writer = Files.newWriter(idFile, charset);
> +            }
> +            return writer;
> +        }
> +
> +        public void sort() throws IOException {
> +            closeWriter();
> +
> +            List<File> sortedFiles = ExternalSort.sortInBatch(idFile,
> +                    comparator, //Comparator to use
> +                    ExternalSort.DEFAULTMAXTEMPFILES,
> +                    ExternalSort.DEFAULT_MAX_MEM_BYTES,
> +                    charset, //charset
> +                    workDir,  //temp directory where intermediate files are created
> +                    true //distinct
> +            );
> +
> +            ExternalSort.mergeSortedFiles(sortedFiles,
> +                    sortedFile,
> +                    comparator,
> +                    charset,
> +                    true
> +            );
> +        }
> +
> +        public Iterator<String> getIterator() throws IOException {
> +            CloseableIterator itr = new CloseableIterator(Files.newReader(sortedFile, charset));
> +            openedIterators.add(itr);
> +            return itr;
> +        }
> +
> +        @Override
> +        public String toString() {
> +            return "PersistentState : workDir=" + workDir.getAbsolutePath();
> +        }
> +
> +        @Override
> +        public void close() throws IOException {
> +            Closer closer = Closer.create();
> +            try {
> +                closer.register(writer);
> +                for (CloseableIterator citr : openedIterators) {
> +                    closer.register(citr);
> +                }
> +                closer.register(new Closeable() {
> +                    @Override
> +                    public void close() throws IOException {
> +                        FileUtils.deleteDirectory(workDir);
> +                    }
> +                });
> +            } finally {
> +                closer.close();
> +            }
> +        }
> +
> +        private void closeWriter() throws IOException {
> +            writer.close();
> +        }
> +
> +        /**
> +         * Taken from com.google.common.io.Files#createTempDir()
> +         * Modified to provide a prefix
> +         */
> +        private static File createTempDir(String prefix) {
> +            File baseDir = new File(System.getProperty("java.io.tmpdir"));
> +            String baseName = System.currentTimeMillis() + "-";
> +
> +            for (int counter = 0; counter < TEMP_DIR_ATTEMPTS; counter++) {
> +                File tempDir = new File(baseDir, prefix + baseName + counter);
> +                if (tempDir.mkdir()) {
> +                    return tempDir;
> +                }
> +            }
> +            throw new IllegalStateException("Failed to create directory within "
> +                    + TEMP_DIR_ATTEMPTS + " attempts (tried "
> +                    + baseName + "0 to " + baseName + (TEMP_DIR_ATTEMPTS - 1) + ')');
> +        }
> +    }
> +
> +    private static class CloseableIterator extends LineIterator implements Closeable {
> +        public CloseableIterator(Reader reader) throws IllegalArgumentException {
> +            super(reader);
> +        }
> +    }
> +}
>
> Propchange: jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java
> ------------------------------------------------------------------------------
>      svn:eol-style = native
>
> Added: jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/StringSortTest.java
> URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/StringSortTest.java?rev=1666220&view=auto
> ==============================================================================
> --- jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/StringSortTest.java (added)
> +++ jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/StringSortTest.java Thu Mar 12 15:22:46 2015
> @@ -0,0 +1,144 @@
> +/*
> + * Licensed to the Apache Software Foundation (ASF) under one
> + * or more contributor license agreements.  See the NOTICE file
> + * distributed with this work for additional information
> + * regarding copyright ownership.  The ASF licenses this file
> + * to you under the Apache License, Version 2.0 (the
> + * "License"); you may not use this file except in compliance
> + * with the License.  You may obtain a copy of the License at
> + *
> + *   http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing,
> + * software distributed under the License is distributed on an
> + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
> + * KIND, either express or implied.  See the License for the
> + * specific language governing permissions and limitations
> + * under the License.
> + */
> +
> +package org.apache.jackrabbit.oak.commons.sort;
> +
> +import java.io.IOException;
> +import java.io.Serializable;
> +import java.util.ArrayList;
> +import java.util.Arrays;
> +import java.util.Collections;
> +import java.util.Comparator;
> +import java.util.HashSet;
> +import java.util.List;
> +import java.util.Set;
> +
> +import com.google.common.base.Joiner;
> +import com.google.common.collect.Collections2;
> +import com.google.common.collect.ImmutableList;
> +import org.junit.Test;
> +
> +import static org.junit.Assert.assertEquals;
> +import static org.junit.Assert.assertFalse;
> +import static org.junit.Assert.assertTrue;
> +
> +public class StringSortTest {
> +    private Comparator<String> comparator = new PathComparator();
> +    private StringSort collector;
> +
> +    @Test
> +    public void inMemory() throws Exception{
> +        List<String> paths = createTestPaths(5, false);
> +        collector = new StringSort(paths.size() + 1,comparator);
> +        addPathsToCollector(paths);
> +
> +        assertConstraints(paths);
> +        assertFalse(collector.usingFile());
> +        collector.close();
> +    }
> +
> +    @Test
> +    public void overflowToDisk() throws Exception{
> +        //Create ~50k paths
> +        List<String> paths = createTestPaths(10, true);
> +        collector = new StringSort(1000, comparator);
> +        addPathsToCollector(paths);
> +
> +        assertTrue(collector.usingFile());
> +        assertConstraints(paths);
> +
> +        collector.close();
> +    }
> +
> +    private void assertConstraints(List<String> paths) throws IOException {
> +        assertEquals(paths.size(), collector.getSize());
> +
> +        Collections.sort(paths, comparator);
> +        collector.sort();
> +
> +        List<String> sortedPaths = ImmutableList.copyOf(collector.getIds());
> +        assertEquals(paths.size(), sortedPaths.size());
> +        assertEquals(paths, sortedPaths);
> +    }
> +
> +    private void addPathsToCollector(Iterable<String> paths) throws IOException {
> +        for (String path : paths){
> +            collector.add(path);
> +        }
> +    }
> +
> +    private static List<String> createTestPaths(int depth, boolean permutation){
> +        List<String> rootPaths = Arrays.asList("a", "b", "c", "d", "e", "f", "g");
> +        List<String> paths = new ArrayList<String>();
> +
> +
> +        if (permutation){
> +            List<String> newRoots = new ArrayList<String>();
> +            for (List<String> permuts : Collections2.orderedPermutations(rootPaths)){
> +                newRoots.add(Joiner.on("").join(permuts));
> +            }
> +            rootPaths = newRoots;
> +        }
> +
> +        for (String root : rootPaths){
> +            List<String> pathElements = new ArrayList<String>();
> +            pathElements.add(root);
> +            paths.add(createId(pathElements));
> +            for (int i = 0; i < depth; i++){
> +                pathElements.add(root + i);
> +                paths.add(createId(pathElements));
> +            }
> +        }
> +
> +        Set<String> idSet = new HashSet<String>(paths);
> +        assertEquals(paths.size(), idSet.size());
> +
> +        Collections.shuffle(paths);
> +        return paths;
> +    }
> +
> +    private static String createId(Iterable<String> pathElements){
> +        return "/" + Joiner.on('/').join(pathElements);
> +    }
> +
> +    private static  class PathComparator implements Comparator<String>, Serializable {
> +        @Override
> +        public int compare(String o1, String o2) {
> +            int d1 = pathDepth(o1);
> +            int d2 = pathDepth(o2);
> +            if (d1 != d2) {
> +                return Integer.signum(d2 - d1);
> +            }
> +            return o1.compareTo(o2);
> +        }
> +
> +        private static int pathDepth(String path) {
> +            if (path.equals("/")) {
> +                return 0;
> +            }
> +            int depth = 0;
> +            for (int i = 0; i < path.length(); i++) {
> +                if (path.charAt(i) == '/') {
> +                    depth++;
> +                }
> +            }
> +            return depth;
> +        }
> +    }
> +}
>
> Propchange: jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/StringSortTest.java
> ------------------------------------------------------------------------------
>      svn:eol-style = native
>
> Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java
> URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java?rev=1666220&r1=1666219&r2=1666220&view=diff
> ==============================================================================
> --- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java (original)
> +++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java Thu Mar 12 15:22:46 2015
> @@ -574,7 +574,11 @@ public class DocumentNodeStoreService {
>           RevisionGC revisionGC = new RevisionGC(new Runnable() {
>               @Override
>               public void run() {
> -                store.getVersionGarbageCollector().gc(versionGcMaxAgeInSecs, TimeUnit.SECONDS);
> +                try {
> +                    store.getVersionGarbageCollector().gc(versionGcMaxAgeInSecs, TimeUnit.SECONDS);
> +                } catch (IOException e) {
> +                    log.warn("Error occurred while executing the Version Garbage Collector", e);
> +                }
>               }
>           }, executor);
>           registrations.add(registerMBean(whiteboard, RevisionGCMBean.class, revisionGC,
>
> Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
> URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java?rev=1666220&r1=1666219&r2=1666220&view=diff
> ==============================================================================
> --- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java (original)
> +++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java Thu Mar 12 15:22:46 2015
> @@ -19,9 +19,9 @@
>
>   package org.apache.jackrabbit.oak.plugins.document;
>
> -import java.util.ArrayList;
> -import java.util.Collections;
> +import java.io.IOException;
>   import java.util.EnumSet;
> +import java.util.Iterator;
>   import java.util.List;
>   import java.util.Set;
>   import java.util.concurrent.TimeUnit;
> @@ -31,18 +31,23 @@ import com.google.common.base.StandardSy
>   import com.google.common.base.Stopwatch;
>   import com.google.common.collect.ImmutableList;
>
> +import org.apache.jackrabbit.oak.commons.sort.StringSort;
>   import org.apache.jackrabbit.oak.plugins.document.util.Utils;
>   import org.slf4j.Logger;
>   import org.slf4j.LoggerFactory;
>
> +import static com.google.common.collect.Iterators.partition;
>   import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType.COMMIT_ROOT_ONLY;
>   import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType.DEFAULT_LEAF;
>
>   public class VersionGarbageCollector {
> +    //Kept less than MongoDocumentStore.IN_CLAUSE_BATCH_SIZE to avoid re-partitioning
> +    private static final int DELETE_BATCH_SIZE = 450;
>       private final DocumentNodeStore nodeStore;
>       private final VersionGCSupport versionStore;
> +    private int overflowToDiskThreshold = 100000;
>
> -    private final Logger log = LoggerFactory.getLogger(getClass());
> +    private static final Logger log = LoggerFactory.getLogger(VersionGarbageCollector.class);
>
>       /**
>        * Split document types which can be safely garbage collected
> @@ -56,7 +61,7 @@ public class VersionGarbageCollector {
>           this.versionStore = gcSupport;
>       }
>
> -    public VersionGCStats gc(long maxRevisionAge, TimeUnit unit) {
> +    public VersionGCStats gc(long maxRevisionAge, TimeUnit unit) throws IOException {
>           long maxRevisionAgeInMillis = unit.toMillis(maxRevisionAge);
>           Stopwatch sw = Stopwatch.createStarted();
>           VersionGCStats stats = new VersionGCStats();
> @@ -85,41 +90,60 @@ public class VersionGarbageCollector {
>           return stats;
>       }
>
> +    public void setOverflowToDiskThreshold(int overflowToDiskThreshold) {
> +        this.overflowToDiskThreshold = overflowToDiskThreshold;
> +    }
> +
>       private void collectSplitDocuments(VersionGCStats stats, long oldestRevTimeStamp) {
>           versionStore.deleteSplitDocuments(GC_TYPES, oldestRevTimeStamp, stats);
>       }
>
> -    private void collectDeletedDocuments(VersionGCStats stats, Revision headRevision, long oldestRevTimeStamp) {
> -        List<String> docIdsToDelete = new ArrayList<String>();
> -        Iterable<NodeDocument> itr = versionStore.getPossiblyDeletedDocs(oldestRevTimeStamp);
> +    private void collectDeletedDocuments(VersionGCStats stats, Revision headRevision, long oldestRevTimeStamp)
> +            throws IOException {
> +        StringSort docIdsToDelete = new StringSort(overflowToDiskThreshold, NodeDocumentIdComparator.INSTANCE);
>           try {
> -            for (NodeDocument doc : itr) {
> -                //Check if node is actually deleted at current revision
> -                //As node is not modified since oldestRevTimeStamp then
> -                //this node has not be revived again in past maxRevisionAge
> -                //So deleting it is safe
> -                if (doc.getNodeAtRevision(nodeStore, headRevision, null) == null) {
> -                    docIdsToDelete.add(doc.getId());
> -                    //Collect id of all previous docs also
> -                    for (NodeDocument prevDoc : ImmutableList.copyOf(doc.getAllPreviousDocs())) {
> -                        docIdsToDelete.add(prevDoc.getId());
> +            Iterable<NodeDocument> itr = versionStore.getPossiblyDeletedDocs(oldestRevTimeStamp);
> +            try {
> +                for (NodeDocument doc : itr) {
> +                    //Check if node is actually deleted at current revision
> +                    //As node is not modified since oldestRevTimeStamp then
> +                    //this node has not be revived again in past maxRevisionAge
> +                    //So deleting it is safe
> +                    if (doc.getNodeAtRevision(nodeStore, headRevision, null) == null) {
> +                        docIdsToDelete.add(doc.getId());
> +                        //Collect id of all previous docs also
> +                        for (NodeDocument prevDoc : ImmutableList.copyOf(doc.getAllPreviousDocs())) {
> +                            docIdsToDelete.add(prevDoc.getId());
> +                        }
>                       }
>                   }
> +            } finally {
> +                Utils.closeIfCloseable(itr);
> +            }
> +
> +            if (docIdsToDelete.isEmpty()){
> +                return;
>               }
> -        } finally {
> -            Utils.closeIfCloseable(itr);
> -        }
>
> -        Collections.sort(docIdsToDelete, NodeDocumentIdComparator.INSTANCE);
> +            docIdsToDelete.sort();
> +            log.info("Proceeding to delete [{}] documents", docIdsToDelete.getSize());
>
> -        if(log.isDebugEnabled()) {
> -            StringBuilder sb = new StringBuilder("Deleted document with following ids were deleted as part of GC \n");
> -            Joiner.on(StandardSystemProperty.LINE_SEPARATOR.value()).appendTo(sb, docIdsToDelete);
> -            log.debug(sb.toString());
> +            if (log.isDebugEnabled() && docIdsToDelete.getSize() < 1000) {
> +                StringBuilder sb = new StringBuilder("Deleted document with following ids were deleted as part of GC \n");
> +                Joiner.on(StandardSystemProperty.LINE_SEPARATOR.value()).appendTo(sb, docIdsToDelete.getIds());
> +                log.debug(sb.toString());
> +            }
> +
> +            Iterator<List<String>> idListItr = partition(docIdsToDelete.getIds(), DELETE_BATCH_SIZE);
> +            while (idListItr.hasNext()) {
> +                nodeStore.getDocumentStore().remove(Collection.NODES, idListItr.next());
> +            }
> +
> +            nodeStore.invalidateDocChildrenCache();
> +            stats.deletedDocGCCount += docIdsToDelete.getSize();
> +        } finally {
> +            docIdsToDelete.close();
>           }
> -        nodeStore.getDocumentStore().remove(Collection.NODES, docIdsToDelete);
> -        nodeStore.invalidateDocChildrenCache();
> -        stats.deletedDocGCCount += docIdsToDelete.size();
>       }
>
>       public static class VersionGCStats {
>
> Modified: jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCDeletionTest.java
> URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCDeletionTest.java?rev=1666220&r1=1666219&r2=1666220&view=diff
> ==============================================================================
> --- jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCDeletionTest.java (original)
> +++ jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCDeletionTest.java Thu Mar 12 15:22:46 2015
> @@ -36,6 +36,7 @@ import org.junit.Before;
>   import org.junit.Test;
>
>   import static java.util.concurrent.TimeUnit.HOURS;
> +import static org.junit.Assert.assertEquals;
>   import static org.junit.Assert.assertNull;
>   import static org.junit.Assert.fail;
>
> @@ -104,6 +105,53 @@ public class VersionGCDeletionTest {
>           assertNull(ts.find(Collection.NODES, "1:/x"));
>       }
>
> +    @Test
> +    public void deleteLargeNumber() throws Exception{
> +        int noOfDocsToDelete = 10000;
> +        DocumentStore ts = new MemoryDocumentStore();
> +        store = new DocumentMK.Builder()
> +                .clock(clock)
> +                .setDocumentStore(new MemoryDocumentStore())
> +                .setAsyncDelay(0)
> +                .getNodeStore();
> +
> +        //Baseline the clock
> +        clock.waitUntil(Revision.getCurrentTimestamp());
> +
> +        NodeBuilder b1 = store.getRoot().builder();
> +        NodeBuilder xb = b1.child("x");
> +        for (int i = 0; i < noOfDocsToDelete; i++){
> +            xb.child("a"+i).child("b"+i);
> +        }
> +        store.merge(b1, EmptyHook.INSTANCE, CommitInfo.EMPTY);
> +
> +        long maxAge = 1; //hours
> +        long delta = TimeUnit.MINUTES.toMillis(10);
> +
> +        //Remove x/y
> +        NodeBuilder b2 = store.getRoot().builder();
> +        b2.child("x").remove();
> +        store.merge(b2, EmptyHook.INSTANCE, CommitInfo.EMPTY);
> +
> +        store.runBackgroundOperations();
> +
> +        //3. Check that deleted doc does get collected post maxAge
> +        clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) + delta);
> +        VersionGarbageCollector gc = store.getVersionGarbageCollector();
> +        gc.setOverflowToDiskThreshold(100);
> +
> +        VersionGarbageCollector.VersionGCStats stats = gc.gc(maxAge * 2, HOURS);
> +        assertEquals(noOfDocsToDelete * 2 + 1, stats.deletedDocGCCount);
> +
> +
> +        assertNull(ts.find(Collection.NODES, "1:/x"));
> +
> +        for (int i = 0; i < noOfDocsToDelete; i++){
> +            assertNull(ts.find(Collection.NODES, "2:/a"+i+"/b"+i));
> +            assertNull(ts.find(Collection.NODES, "1:/a"+i));
> +        }
> +    }
> +
>       private static class TestDocumentStore extends MemoryDocumentStore {
>           boolean throwException;
>           @Override
>
> Modified: jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCWithSplitTest.java
> URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCWithSplitTest.java?rev=1666220&r1=1666219&r2=1666220&view=diff
> ==============================================================================
> --- jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCWithSplitTest.java (original)
> +++ jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCWithSplitTest.java Thu Mar 12 15:22:46 2015
> @@ -137,7 +137,11 @@ public class VersionGCWithSplitTest {
>           Thread t = new Thread(new Runnable() {
>               @Override
>               public void run() {
> -                stats.set(gc.gc(1, HOURS));
> +                try {
> +                    stats.set(gc.gc(1, HOURS));
> +                } catch (IOException e) {
> +                    throw new RuntimeException(e);
> +                }
>               }
>           });
>
>
>
>