You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by ch...@apache.org on 2015/03/12 16:22:47 UTC
svn commit: r1666220 - in /jackrabbit/oak/trunk: oak-commons/
oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/
oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/
oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/d...
Author: chetanm
Date: Thu Mar 12 15:22:46 2015
New Revision: 1666220
URL: http://svn.apache.org/r1666220
Log:
OAK-2557 - VersionGC uses way too much memory if there is a large pile of garbage
Added:
jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java (with props)
jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/StringSortTest.java (with props)
Modified:
jackrabbit/oak/trunk/oak-commons/pom.xml
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCDeletionTest.java
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCWithSplitTest.java
Modified: jackrabbit/oak/trunk/oak-commons/pom.xml
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/pom.xml?rev=1666220&r1=1666219&r2=1666220&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-commons/pom.xml (original)
+++ jackrabbit/oak/trunk/oak-commons/pom.xml Thu Mar 12 15:22:46 2015
@@ -93,6 +93,11 @@
<artifactId>oak-mk-api</artifactId>
<version>${project.version}</version>
</dependency>
+ <dependency>
+ <groupId>commons-io</groupId>
+ <artifactId>commons-io</artifactId>
+ <version>2.4</version>
+ </dependency>
<!-- Test dependencies -->
<dependency>
Added: jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java?rev=1666220&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java (added)
+++ jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java Thu Mar 12 15:22:46 2015
@@ -0,0 +1,255 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.jackrabbit.oak.commons.sort;
+
+import java.io.BufferedWriter;
+import java.io.Closeable;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.Reader;
+import java.nio.charset.Charset;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.Iterator;
+import java.util.List;
+
+import com.google.common.base.Charsets;
+import com.google.common.collect.Lists;
+import com.google.common.io.Closer;
+import com.google.common.io.Files;
+import org.apache.commons.io.FileUtils;
+import org.apache.commons.io.LineIterator;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Utility class to store a list of string and perform sort on that. For small size
+ * the list would be maintained in memory. If the size crosses the required threshold then
+ * the sorting would be performed externally
+ */
+public class StringSort implements Closeable {
+ private final Logger log = LoggerFactory.getLogger(getClass());
+ public static final int BATCH_SIZE = 2048;
+
+ private final int overflowToDiskThreshold;
+ private final Comparator<String> comparator;
+
+ private final List<String> ids = Lists.newArrayList();
+ private long size;
+
+ private final List<String> inMemBatch = Lists.newArrayList();
+
+ private boolean useFile;
+ private PersistentState persistentState;
+
+ public StringSort(int overflowToDiskThreshold, Comparator<String> comparator) {
+ this.overflowToDiskThreshold = overflowToDiskThreshold;
+ this.comparator = comparator;
+ }
+
+ public void add(String id) throws IOException {
+ if (useFile) {
+ addToBatch(id);
+ } else {
+ ids.add(id);
+ if (ids.size() >= overflowToDiskThreshold) {
+ flushToFile(ids);
+ useFile = true;
+ log.debug("In memory buffer crossed the threshold of {}. " +
+ "Switching to filesystem [{}] to manage the state", overflowToDiskThreshold, persistentState);
+ }
+ }
+ size++;
+ }
+
+ public void sort() throws IOException {
+ if (useFile) {
+ //Flush the last batch
+ flushToFile(inMemBatch);
+ persistentState.sort();
+ } else {
+ Collections.sort(ids, comparator);
+ }
+ }
+
+ public Iterator<String> getIds() throws IOException {
+ if (useFile) {
+ return persistentState.getIterator();
+ } else {
+ return ids.iterator();
+ }
+ }
+
+ public long getSize() {
+ return size;
+ }
+
+ public boolean isEmpty() {
+ return size == 0;
+ }
+
+ public boolean usingFile() {
+ return useFile;
+ }
+
+ @Override
+ public void close() throws IOException {
+ if (persistentState != null) {
+ persistentState.close();
+ }
+ }
+
+ private void addToBatch(String id) throws IOException {
+ inMemBatch.add(id);
+ if (inMemBatch.size() >= BATCH_SIZE) {
+ flushToFile(inMemBatch);
+ }
+ }
+
+ private void flushToFile(List<String> ids) throws IOException {
+ BufferedWriter w = getPersistentState().getWriter();
+ for (String id : ids) {
+ w.write(id);
+ w.newLine();
+ }
+ ids.clear();
+ }
+
+ private PersistentState getPersistentState() {
+ //Lazily initialize the persistent state
+ if (persistentState == null) {
+ persistentState = new PersistentState(comparator);
+ }
+ return persistentState;
+ }
+
+ private static class PersistentState implements Closeable {
+ /**
+ * Maximum loop count when creating temp directories.
+ */
+ private static final int TEMP_DIR_ATTEMPTS = 10000;
+
+ private final Charset charset = Charsets.UTF_8;
+ private final File workDir;
+ private final Comparator<String> comparator;
+ private File idFile;
+ private File sortedFile;
+ private BufferedWriter writer;
+ private List<CloseableIterator> openedIterators = Lists.newArrayList();
+
+ public PersistentState(Comparator<String> comparator) {
+ this(comparator, createTempDir("oak-sorter-"));
+ }
+
+ public PersistentState(Comparator<String> comparator, File workDir) {
+ this.workDir = workDir;
+ this.comparator = comparator;
+ }
+
+ public BufferedWriter getWriter() throws FileNotFoundException {
+ if (idFile == null) {
+ idFile = new File(workDir, "strings.txt");
+ sortedFile = new File(workDir, "strings-sorted.txt");
+ writer = Files.newWriter(idFile, charset);
+ }
+ return writer;
+ }
+
+ public void sort() throws IOException {
+ closeWriter();
+
+ List<File> sortedFiles = ExternalSort.sortInBatch(idFile,
+ comparator, //Comparator to use
+ ExternalSort.DEFAULTMAXTEMPFILES,
+ ExternalSort.DEFAULT_MAX_MEM_BYTES,
+ charset, //charset
+ workDir, //temp directory where intermediate files are created
+ true //distinct
+ );
+
+ ExternalSort.mergeSortedFiles(sortedFiles,
+ sortedFile,
+ comparator,
+ charset,
+ true
+ );
+ }
+
+ public Iterator<String> getIterator() throws IOException {
+ CloseableIterator itr = new CloseableIterator(Files.newReader(sortedFile, charset));
+ openedIterators.add(itr);
+ return itr;
+ }
+
+ @Override
+ public String toString() {
+ return "PersistentState : workDir=" + workDir.getAbsolutePath();
+ }
+
+ @Override
+ public void close() throws IOException {
+ Closer closer = Closer.create();
+ try {
+ closer.register(writer);
+ for (CloseableIterator citr : openedIterators) {
+ closer.register(citr);
+ }
+ closer.register(new Closeable() {
+ @Override
+ public void close() throws IOException {
+ FileUtils.deleteDirectory(workDir);
+ }
+ });
+ } finally {
+ closer.close();
+ }
+ }
+
+ private void closeWriter() throws IOException {
+ writer.close();
+ }
+
+ /**
+ * Taken from com.google.common.io.Files#createTempDir()
+ * Modified to provide a prefix
+ */
+ private static File createTempDir(String prefix) {
+ File baseDir = new File(System.getProperty("java.io.tmpdir"));
+ String baseName = System.currentTimeMillis() + "-";
+
+ for (int counter = 0; counter < TEMP_DIR_ATTEMPTS; counter++) {
+ File tempDir = new File(baseDir, prefix + baseName + counter);
+ if (tempDir.mkdir()) {
+ return tempDir;
+ }
+ }
+ throw new IllegalStateException("Failed to create directory within "
+ + TEMP_DIR_ATTEMPTS + " attempts (tried "
+ + baseName + "0 to " + baseName + (TEMP_DIR_ATTEMPTS - 1) + ')');
+ }
+ }
+
+ private static class CloseableIterator extends LineIterator implements Closeable {
+ public CloseableIterator(Reader reader) throws IllegalArgumentException {
+ super(reader);
+ }
+ }
+}
Propchange: jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/StringSortTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/StringSortTest.java?rev=1666220&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/StringSortTest.java (added)
+++ jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/StringSortTest.java Thu Mar 12 15:22:46 2015
@@ -0,0 +1,144 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.jackrabbit.oak.commons.sort;
+
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import com.google.common.base.Joiner;
+import com.google.common.collect.Collections2;
+import com.google.common.collect.ImmutableList;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+public class StringSortTest {
+ private Comparator<String> comparator = new PathComparator();
+ private StringSort collector;
+
+ @Test
+ public void inMemory() throws Exception{
+ List<String> paths = createTestPaths(5, false);
+ collector = new StringSort(paths.size() + 1,comparator);
+ addPathsToCollector(paths);
+
+ assertConstraints(paths);
+ assertFalse(collector.usingFile());
+ collector.close();
+ }
+
+ @Test
+ public void overflowToDisk() throws Exception{
+ //Create ~50k paths
+ List<String> paths = createTestPaths(10, true);
+ collector = new StringSort(1000, comparator);
+ addPathsToCollector(paths);
+
+ assertTrue(collector.usingFile());
+ assertConstraints(paths);
+
+ collector.close();
+ }
+
+ private void assertConstraints(List<String> paths) throws IOException {
+ assertEquals(paths.size(), collector.getSize());
+
+ Collections.sort(paths, comparator);
+ collector.sort();
+
+ List<String> sortedPaths = ImmutableList.copyOf(collector.getIds());
+ assertEquals(paths.size(), sortedPaths.size());
+ assertEquals(paths, sortedPaths);
+ }
+
+ private void addPathsToCollector(Iterable<String> paths) throws IOException {
+ for (String path : paths){
+ collector.add(path);
+ }
+ }
+
+ private static List<String> createTestPaths(int depth, boolean permutation){
+ List<String> rootPaths = Arrays.asList("a", "b", "c", "d", "e", "f", "g");
+ List<String> paths = new ArrayList<String>();
+
+
+ if (permutation){
+ List<String> newRoots = new ArrayList<String>();
+ for (List<String> permuts : Collections2.orderedPermutations(rootPaths)){
+ newRoots.add(Joiner.on("").join(permuts));
+ }
+ rootPaths = newRoots;
+ }
+
+ for (String root : rootPaths){
+ List<String> pathElements = new ArrayList<String>();
+ pathElements.add(root);
+ paths.add(createId(pathElements));
+ for (int i = 0; i < depth; i++){
+ pathElements.add(root + i);
+ paths.add(createId(pathElements));
+ }
+ }
+
+ Set<String> idSet = new HashSet<String>(paths);
+ assertEquals(paths.size(), idSet.size());
+
+ Collections.shuffle(paths);
+ return paths;
+ }
+
+ private static String createId(Iterable<String> pathElements){
+ return "/" + Joiner.on('/').join(pathElements);
+ }
+
+ private static class PathComparator implements Comparator<String>, Serializable {
+ @Override
+ public int compare(String o1, String o2) {
+ int d1 = pathDepth(o1);
+ int d2 = pathDepth(o2);
+ if (d1 != d2) {
+ return Integer.signum(d2 - d1);
+ }
+ return o1.compareTo(o2);
+ }
+
+ private static int pathDepth(String path) {
+ if (path.equals("/")) {
+ return 0;
+ }
+ int depth = 0;
+ for (int i = 0; i < path.length(); i++) {
+ if (path.charAt(i) == '/') {
+ depth++;
+ }
+ }
+ return depth;
+ }
+ }
+}
Propchange: jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/StringSortTest.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java?rev=1666220&r1=1666219&r2=1666220&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java Thu Mar 12 15:22:46 2015
@@ -574,7 +574,11 @@ public class DocumentNodeStoreService {
RevisionGC revisionGC = new RevisionGC(new Runnable() {
@Override
public void run() {
- store.getVersionGarbageCollector().gc(versionGcMaxAgeInSecs, TimeUnit.SECONDS);
+ try {
+ store.getVersionGarbageCollector().gc(versionGcMaxAgeInSecs, TimeUnit.SECONDS);
+ } catch (IOException e) {
+ log.warn("Error occurred while executing the Version Garbage Collector", e);
+ }
}
}, executor);
registrations.add(registerMBean(whiteboard, RevisionGCMBean.class, revisionGC,
Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java?rev=1666220&r1=1666219&r2=1666220&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java Thu Mar 12 15:22:46 2015
@@ -19,9 +19,9 @@
package org.apache.jackrabbit.oak.plugins.document;
-import java.util.ArrayList;
-import java.util.Collections;
+import java.io.IOException;
import java.util.EnumSet;
+import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.concurrent.TimeUnit;
@@ -31,18 +31,23 @@ import com.google.common.base.StandardSy
import com.google.common.base.Stopwatch;
import com.google.common.collect.ImmutableList;
+import org.apache.jackrabbit.oak.commons.sort.StringSort;
import org.apache.jackrabbit.oak.plugins.document.util.Utils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import static com.google.common.collect.Iterators.partition;
import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType.COMMIT_ROOT_ONLY;
import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType.DEFAULT_LEAF;
public class VersionGarbageCollector {
+ //Kept less than MongoDocumentStore.IN_CLAUSE_BATCH_SIZE to avoid re-partitioning
+ private static final int DELETE_BATCH_SIZE = 450;
private final DocumentNodeStore nodeStore;
private final VersionGCSupport versionStore;
+ private int overflowToDiskThreshold = 100000;
- private final Logger log = LoggerFactory.getLogger(getClass());
+ private static final Logger log = LoggerFactory.getLogger(VersionGarbageCollector.class);
/**
* Split document types which can be safely garbage collected
@@ -56,7 +61,7 @@ public class VersionGarbageCollector {
this.versionStore = gcSupport;
}
- public VersionGCStats gc(long maxRevisionAge, TimeUnit unit) {
+ public VersionGCStats gc(long maxRevisionAge, TimeUnit unit) throws IOException {
long maxRevisionAgeInMillis = unit.toMillis(maxRevisionAge);
Stopwatch sw = Stopwatch.createStarted();
VersionGCStats stats = new VersionGCStats();
@@ -85,41 +90,60 @@ public class VersionGarbageCollector {
return stats;
}
+ public void setOverflowToDiskThreshold(int overflowToDiskThreshold) {
+ this.overflowToDiskThreshold = overflowToDiskThreshold;
+ }
+
private void collectSplitDocuments(VersionGCStats stats, long oldestRevTimeStamp) {
versionStore.deleteSplitDocuments(GC_TYPES, oldestRevTimeStamp, stats);
}
- private void collectDeletedDocuments(VersionGCStats stats, Revision headRevision, long oldestRevTimeStamp) {
- List<String> docIdsToDelete = new ArrayList<String>();
- Iterable<NodeDocument> itr = versionStore.getPossiblyDeletedDocs(oldestRevTimeStamp);
+ private void collectDeletedDocuments(VersionGCStats stats, Revision headRevision, long oldestRevTimeStamp)
+ throws IOException {
+ StringSort docIdsToDelete = new StringSort(overflowToDiskThreshold, NodeDocumentIdComparator.INSTANCE);
try {
- for (NodeDocument doc : itr) {
- //Check if node is actually deleted at current revision
- //As node is not modified since oldestRevTimeStamp then
- //this node has not be revived again in past maxRevisionAge
- //So deleting it is safe
- if (doc.getNodeAtRevision(nodeStore, headRevision, null) == null) {
- docIdsToDelete.add(doc.getId());
- //Collect id of all previous docs also
- for (NodeDocument prevDoc : ImmutableList.copyOf(doc.getAllPreviousDocs())) {
- docIdsToDelete.add(prevDoc.getId());
+ Iterable<NodeDocument> itr = versionStore.getPossiblyDeletedDocs(oldestRevTimeStamp);
+ try {
+ for (NodeDocument doc : itr) {
+ //Check if node is actually deleted at current revision
+ //As node is not modified since oldestRevTimeStamp then
+ //this node has not be revived again in past maxRevisionAge
+ //So deleting it is safe
+ if (doc.getNodeAtRevision(nodeStore, headRevision, null) == null) {
+ docIdsToDelete.add(doc.getId());
+ //Collect id of all previous docs also
+ for (NodeDocument prevDoc : ImmutableList.copyOf(doc.getAllPreviousDocs())) {
+ docIdsToDelete.add(prevDoc.getId());
+ }
}
}
+ } finally {
+ Utils.closeIfCloseable(itr);
+ }
+
+ if (docIdsToDelete.isEmpty()){
+ return;
}
- } finally {
- Utils.closeIfCloseable(itr);
- }
- Collections.sort(docIdsToDelete, NodeDocumentIdComparator.INSTANCE);
+ docIdsToDelete.sort();
+ log.info("Proceeding to delete [{}] documents", docIdsToDelete.getSize());
- if(log.isDebugEnabled()) {
- StringBuilder sb = new StringBuilder("Deleted document with following ids were deleted as part of GC \n");
- Joiner.on(StandardSystemProperty.LINE_SEPARATOR.value()).appendTo(sb, docIdsToDelete);
- log.debug(sb.toString());
+ if (log.isDebugEnabled() && docIdsToDelete.getSize() < 1000) {
+ StringBuilder sb = new StringBuilder("Deleted document with following ids were deleted as part of GC \n");
+ Joiner.on(StandardSystemProperty.LINE_SEPARATOR.value()).appendTo(sb, docIdsToDelete.getIds());
+ log.debug(sb.toString());
+ }
+
+ Iterator<List<String>> idListItr = partition(docIdsToDelete.getIds(), DELETE_BATCH_SIZE);
+ while (idListItr.hasNext()) {
+ nodeStore.getDocumentStore().remove(Collection.NODES, idListItr.next());
+ }
+
+ nodeStore.invalidateDocChildrenCache();
+ stats.deletedDocGCCount += docIdsToDelete.getSize();
+ } finally {
+ docIdsToDelete.close();
}
- nodeStore.getDocumentStore().remove(Collection.NODES, docIdsToDelete);
- nodeStore.invalidateDocChildrenCache();
- stats.deletedDocGCCount += docIdsToDelete.size();
}
public static class VersionGCStats {
Modified: jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCDeletionTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCDeletionTest.java?rev=1666220&r1=1666219&r2=1666220&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCDeletionTest.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCDeletionTest.java Thu Mar 12 15:22:46 2015
@@ -36,6 +36,7 @@ import org.junit.Before;
import org.junit.Test;
import static java.util.concurrent.TimeUnit.HOURS;
+import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.fail;
@@ -104,6 +105,53 @@ public class VersionGCDeletionTest {
assertNull(ts.find(Collection.NODES, "1:/x"));
}
+ @Test
+ public void deleteLargeNumber() throws Exception{
+ int noOfDocsToDelete = 10000;
+ DocumentStore ts = new MemoryDocumentStore();
+ store = new DocumentMK.Builder()
+ .clock(clock)
+ .setDocumentStore(new MemoryDocumentStore())
+ .setAsyncDelay(0)
+ .getNodeStore();
+
+ //Baseline the clock
+ clock.waitUntil(Revision.getCurrentTimestamp());
+
+ NodeBuilder b1 = store.getRoot().builder();
+ NodeBuilder xb = b1.child("x");
+ for (int i = 0; i < noOfDocsToDelete; i++){
+ xb.child("a"+i).child("b"+i);
+ }
+ store.merge(b1, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+
+ long maxAge = 1; //hours
+ long delta = TimeUnit.MINUTES.toMillis(10);
+
+ //Remove x/y
+ NodeBuilder b2 = store.getRoot().builder();
+ b2.child("x").remove();
+ store.merge(b2, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+
+ store.runBackgroundOperations();
+
+ //3. Check that deleted doc does get collected post maxAge
+ clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) + delta);
+ VersionGarbageCollector gc = store.getVersionGarbageCollector();
+ gc.setOverflowToDiskThreshold(100);
+
+ VersionGarbageCollector.VersionGCStats stats = gc.gc(maxAge * 2, HOURS);
+ assertEquals(noOfDocsToDelete * 2 + 1, stats.deletedDocGCCount);
+
+
+ assertNull(ts.find(Collection.NODES, "1:/x"));
+
+ for (int i = 0; i < noOfDocsToDelete; i++){
+ assertNull(ts.find(Collection.NODES, "2:/a"+i+"/b"+i));
+ assertNull(ts.find(Collection.NODES, "1:/a"+i));
+ }
+ }
+
private static class TestDocumentStore extends MemoryDocumentStore {
boolean throwException;
@Override
Modified: jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCWithSplitTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCWithSplitTest.java?rev=1666220&r1=1666219&r2=1666220&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCWithSplitTest.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCWithSplitTest.java Thu Mar 12 15:22:46 2015
@@ -137,7 +137,11 @@ public class VersionGCWithSplitTest {
Thread t = new Thread(new Runnable() {
@Override
public void run() {
- stats.set(gc.gc(1, HOURS));
+ try {
+ stats.set(gc.gc(1, HOURS));
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
}
});
Re: svn commit: r1666220 - in /jackrabbit/oak/trunk: oak-commons/
oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/
oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/ oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/d...
Posted by Chetan Mehrotra <ch...@gmail.com>.
Thanks Amit for confirming it!
Chetan Mehrotra
On Fri, Mar 13, 2015 at 11:53 AM, Amit Jain <am...@ieee.org> wrote:
> The tests are passing for me on windows with the latest change.
>
> Thanks
> Amit
>
> On Fri, Mar 13, 2015 at 9:21 AM, Chetan Mehrotra <ch...@gmail.com>
> wrote:
>
>> Looks like Closer closes the closeables in LIFO manner due to which
>> directory containing that file got deleted first. I have change the
>> logic now.
>>
>> Let me know if the test passes for you on Windows
>> Chetan Mehrotra
>>
>>
>> On Thu, Mar 12, 2015 at 10:21 PM, Julian Reschke <ju...@gmx.de>
>> wrote:
>> > With this change, I get a reliable test failure on Windows:
>> >
>> >
>> > Tests in error:
>> >
>> > overflowToDisk(org.apache.jackrabbit.oak.commons.sort.StringSortTest):
>> > Unable to delete file:
>> C:\tmp\oak-sorter-1426178913437-0\strings-sorted.txt
>> >
>> >
>> > Best regards, Julian
>> >
>> >
>> > On 2015-03-12 16:22, chetanm@apache.org wrote:
>> >>
>> >> Author: chetanm
>> >> Date: Thu Mar 12 15:22:46 2015
>> >> New Revision: 1666220
>> >>
>> >> URL: http://svn.apache.org/r1666220
>> >> Log:
>> >> OAK-2557 - VersionGC uses way too much memory if there is a large pile
>> of
>> >> garbage
>> >>
>> >> Added:
>> >>
>> >>
>> jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java
>> >> (with props)
>> >>
>> >>
>> jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/StringSortTest.java
>> >> (with props)
>> >> Modified:
>> >> jackrabbit/oak/trunk/oak-commons/pom.xml
>> >>
>> >>
>> jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java
>> >>
>> >>
>> jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
>> >>
>> >>
>> jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCDeletionTest.java
>> >>
>> >>
>> jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCWithSplitTest.java
>> >>
>> >> Modified: jackrabbit/oak/trunk/oak-commons/pom.xml
>> >> URL:
>> >>
>> http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/pom.xml?rev=1666220&r1=1666219&r2=1666220&view=diff
>> >>
>> >>
>> ==============================================================================
>> >> --- jackrabbit/oak/trunk/oak-commons/pom.xml (original)
>> >> +++ jackrabbit/oak/trunk/oak-commons/pom.xml Thu Mar 12 15:22:46 2015
>> >> @@ -93,6 +93,11 @@
>> >> <artifactId>oak-mk-api</artifactId>
>> >> <version>${project.version}</version>
>> >> </dependency>
>> >> + <dependency>
>> >> + <groupId>commons-io</groupId>
>> >> + <artifactId>commons-io</artifactId>
>> >> + <version>2.4</version>
>> >> + </dependency>
>> >>
>> >> <!-- Test dependencies -->
>> >> <dependency>
>> >>
>> >> Added:
>> >>
>> jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java
>> >> URL:
>> >>
>> http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java?rev=1666220&view=auto
>> >>
>> >>
>> ==============================================================================
>> >> ---
>> >>
>> jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java
>> >> (added)
>> >> +++
>> >>
>> jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java
>> >> Thu Mar 12 15:22:46 2015
>> >> @@ -0,0 +1,255 @@
>> >> +/*
>> >> + * Licensed to the Apache Software Foundation (ASF) under one
>> >> + * or more contributor license agreements. See the NOTICE file
>> >> + * distributed with this work for additional information
>> >> + * regarding copyright ownership. The ASF licenses this file
>> >> + * to you under the Apache License, Version 2.0 (the
>> >> + * "License"); you may not use this file except in compliance
>> >> + * with the License. You may obtain a copy of the License at
>> >> + *
>> >> + * http://www.apache.org/licenses/LICENSE-2.0
>> >> + *
>> >> + * Unless required by applicable law or agreed to in writing,
>> >> + * software distributed under the License is distributed on an
>> >> + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
>> >> + * KIND, either express or implied. See the License for the
>> >> + * specific language governing permissions and limitations
>> >> + * under the License.
>> >> + */
>> >> +
>> >> +package org.apache.jackrabbit.oak.commons.sort;
>> >> +
>> >> +import java.io.BufferedWriter;
>> >> +import java.io.Closeable;
>> >> +import java.io.File;
>> >> +import java.io.FileNotFoundException;
>> >> +import java.io.IOException;
>> >> +import java.io.Reader;
>> >> +import java.nio.charset.Charset;
>> >> +import java.util.Collections;
>> >> +import java.util.Comparator;
>> >> +import java.util.Iterator;
>> >> +import java.util.List;
>> >> +
>> >> +import com.google.common.base.Charsets;
>> >> +import com.google.common.collect.Lists;
>> >> +import com.google.common.io.Closer;
>> >> +import com.google.common.io.Files;
>> >> +import org.apache.commons.io.FileUtils;
>> >> +import org.apache.commons.io.LineIterator;
>> >> +import org.slf4j.Logger;
>> >> +import org.slf4j.LoggerFactory;
>> >> +
>> >> +/**
>> >> + * Utility class to store a list of string and perform sort on that.
>> For
>> >> small size
>> >> + * the list would be maintained in memory. If the size crosses the
>> >> required threshold then
>> >> + * the sorting would be performed externally
>> >> + */
>> >> +public class StringSort implements Closeable {
>> >> + private final Logger log = LoggerFactory.getLogger(getClass());
>> >> + public static final int BATCH_SIZE = 2048;
>> >> +
>> >> + private final int overflowToDiskThreshold;
>> >> + private final Comparator<String> comparator;
>> >> +
>> >> + private final List<String> ids = Lists.newArrayList();
>> >> + private long size;
>> >> +
>> >> + private final List<String> inMemBatch = Lists.newArrayList();
>> >> +
>> >> + private boolean useFile;
>> >> + private PersistentState persistentState;
>> >> +
>> >> + public StringSort(int overflowToDiskThreshold, Comparator<String>
>> >> comparator) {
>> >> + this.overflowToDiskThreshold = overflowToDiskThreshold;
>> >> + this.comparator = comparator;
>> >> + }
>> >> +
>> >> + public void add(String id) throws IOException {
>> >> + if (useFile) {
>> >> + addToBatch(id);
>> >> + } else {
>> >> + ids.add(id);
>> >> + if (ids.size() >= overflowToDiskThreshold) {
>> >> + flushToFile(ids);
>> >> + useFile = true;
>> >> + log.debug("In memory buffer crossed the threshold of
>> {}.
>> >> " +
>> >> + "Switching to filesystem [{}] to manage the
>> >> state", overflowToDiskThreshold, persistentState);
>> >> + }
>> >> + }
>> >> + size++;
>> >> + }
>> >> +
>> >> + public void sort() throws IOException {
>> >> + if (useFile) {
>> >> + //Flush the last batch
>> >> + flushToFile(inMemBatch);
>> >> + persistentState.sort();
>> >> + } else {
>> >> + Collections.sort(ids, comparator);
>> >> + }
>> >> + }
>> >> +
>> >> + public Iterator<String> getIds() throws IOException {
>> >> + if (useFile) {
>> >> + return persistentState.getIterator();
>> >> + } else {
>> >> + return ids.iterator();
>> >> + }
>> >> + }
>> >> +
>> >> + public long getSize() {
>> >> + return size;
>> >> + }
>> >> +
>> >> + public boolean isEmpty() {
>> >> + return size == 0;
>> >> + }
>> >> +
>> >> + public boolean usingFile() {
>> >> + return useFile;
>> >> + }
>> >> +
>> >> + @Override
>> >> + public void close() throws IOException {
>> >> + if (persistentState != null) {
>> >> + persistentState.close();
>> >> + }
>> >> + }
>> >> +
>> >> + private void addToBatch(String id) throws IOException {
>> >> + inMemBatch.add(id);
>> >> + if (inMemBatch.size() >= BATCH_SIZE) {
>> >> + flushToFile(inMemBatch);
>> >> + }
>> >> + }
>> >> +
>> >> + private void flushToFile(List<String> ids) throws IOException {
>> >> + BufferedWriter w = getPersistentState().getWriter();
>> >> + for (String id : ids) {
>> >> + w.write(id);
>> >> + w.newLine();
>> >> + }
>> >> + ids.clear();
>> >> + }
>> >> +
>> >> + private PersistentState getPersistentState() {
>> >> + //Lazily initialize the persistent state
>> >> + if (persistentState == null) {
>> >> + persistentState = new PersistentState(comparator);
>> >> + }
>> >> + return persistentState;
>> >> + }
>> >> +
>> >> + private static class PersistentState implements Closeable {
>> >> + /**
>> >> + * Maximum loop count when creating temp directories.
>> >> + */
>> >> + private static final int TEMP_DIR_ATTEMPTS = 10000;
>> >> +
>> >> + private final Charset charset = Charsets.UTF_8;
>> >> + private final File workDir;
>> >> + private final Comparator<String> comparator;
>> >> + private File idFile;
>> >> + private File sortedFile;
>> >> + private BufferedWriter writer;
>> >> + private List<CloseableIterator> openedIterators =
>> >> Lists.newArrayList();
>> >> +
>> >> + public PersistentState(Comparator<String> comparator) {
>> >> + this(comparator, createTempDir("oak-sorter-"));
>> >> + }
>> >> +
>> >> + public PersistentState(Comparator<String> comparator, File
>> >> workDir) {
>> >> + this.workDir = workDir;
>> >> + this.comparator = comparator;
>> >> + }
>> >> +
>> >> + public BufferedWriter getWriter() throws FileNotFoundException
>> {
>> >> + if (idFile == null) {
>> >> + idFile = new File(workDir, "strings.txt");
>> >> + sortedFile = new File(workDir, "strings-sorted.txt");
>> >> + writer = Files.newWriter(idFile, charset);
>> >> + }
>> >> + return writer;
>> >> + }
>> >> +
>> >> + public void sort() throws IOException {
>> >> + closeWriter();
>> >> +
>> >> + List<File> sortedFiles = ExternalSort.sortInBatch(idFile,
>> >> + comparator, //Comparator to use
>> >> + ExternalSort.DEFAULTMAXTEMPFILES,
>> >> + ExternalSort.DEFAULT_MAX_MEM_BYTES,
>> >> + charset, //charset
>> >> + workDir, //temp directory where intermediate files
>> >> are created
>> >> + true //distinct
>> >> + );
>> >> +
>> >> + ExternalSort.mergeSortedFiles(sortedFiles,
>> >> + sortedFile,
>> >> + comparator,
>> >> + charset,
>> >> + true
>> >> + );
>> >> + }
>> >> +
>> >> + public Iterator<String> getIterator() throws IOException {
>> >> + CloseableIterator itr = new
>> >> CloseableIterator(Files.newReader(sortedFile, charset));
>> >> + openedIterators.add(itr);
>> >> + return itr;
>> >> + }
>> >> +
>> >> + @Override
>> >> + public String toString() {
>> >> + return "PersistentState : workDir=" +
>> >> workDir.getAbsolutePath();
>> >> + }
>> >> +
>> >> + @Override
>> >> + public void close() throws IOException {
>> >> + Closer closer = Closer.create();
>> >> + try {
>> >> + closer.register(writer);
>> >> + for (CloseableIterator citr : openedIterators) {
>> >> + closer.register(citr);
>> >> + }
>> >> + closer.register(new Closeable() {
>> >> + @Override
>> >> + public void close() throws IOException {
>> >> + FileUtils.deleteDirectory(workDir);
>> >> + }
>> >> + });
>> >> + } finally {
>> >> + closer.close();
>> >> + }
>> >> + }
>> >> +
>> >> + private void closeWriter() throws IOException {
>> >> + writer.close();
>> >> + }
>> >> +
>> >> + /**
>> >> + * Taken from com.google.common.io.Files#createTempDir()
>> >> + * Modified to provide a prefix
>> >> + */
>> >> + private static File createTempDir(String prefix) {
>> >> + File baseDir = new
>> >> File(System.getProperty("java.io.tmpdir"));
>> >> + String baseName = System.currentTimeMillis() + "-";
>> >> +
>> >> + for (int counter = 0; counter < TEMP_DIR_ATTEMPTS;
>> counter++)
>> >> {
>> >> + File tempDir = new File(baseDir, prefix + baseName +
>> >> counter);
>> >> + if (tempDir.mkdir()) {
>> >> + return tempDir;
>> >> + }
>> >> + }
>> >> + throw new IllegalStateException("Failed to create directory
>> >> within "
>> >> + + TEMP_DIR_ATTEMPTS + " attempts (tried "
>> >> + + baseName + "0 to " + baseName +
>> (TEMP_DIR_ATTEMPTS
>> >> - 1) + ')');
>> >> + }
>> >> + }
>> >> +
>> >> + private static class CloseableIterator extends LineIterator
>> >> implements Closeable {
>> >> + public CloseableIterator(Reader reader) throws
>> >> IllegalArgumentException {
>> >> + super(reader);
>> >> + }
>> >> + }
>> >> +}
>> >>
>> >> Propchange:
>> >>
>> jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java
>> >>
>> >>
>> ------------------------------------------------------------------------------
>> >> svn:eol-style = native
>> >>
>> >> Added:
>> >>
>> jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/StringSortTest.java
>> >> URL:
>> >>
>> http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/StringSortTest.java?rev=1666220&view=auto
>> >>
>> >>
>> ==============================================================================
>> >> ---
>> >>
>> jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/StringSortTest.java
>> >> (added)
>> >> +++
>> >>
>> jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/StringSortTest.java
>> >> Thu Mar 12 15:22:46 2015
>> >> @@ -0,0 +1,144 @@
>> >> +/*
>> >> + * Licensed to the Apache Software Foundation (ASF) under one
>> >> + * or more contributor license agreements. See the NOTICE file
>> >> + * distributed with this work for additional information
>> >> + * regarding copyright ownership. The ASF licenses this file
>> >> + * to you under the Apache License, Version 2.0 (the
>> >> + * "License"); you may not use this file except in compliance
>> >> + * with the License. You may obtain a copy of the License at
>> >> + *
>> >> + * http://www.apache.org/licenses/LICENSE-2.0
>> >> + *
>> >> + * Unless required by applicable law or agreed to in writing,
>> >> + * software distributed under the License is distributed on an
>> >> + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
>> >> + * KIND, either express or implied. See the License for the
>> >> + * specific language governing permissions and limitations
>> >> + * under the License.
>> >> + */
>> >> +
>> >> +package org.apache.jackrabbit.oak.commons.sort;
>> >> +
>> >> +import java.io.IOException;
>> >> +import java.io.Serializable;
>> >> +import java.util.ArrayList;
>> >> +import java.util.Arrays;
>> >> +import java.util.Collections;
>> >> +import java.util.Comparator;
>> >> +import java.util.HashSet;
>> >> +import java.util.List;
>> >> +import java.util.Set;
>> >> +
>> >> +import com.google.common.base.Joiner;
>> >> +import com.google.common.collect.Collections2;
>> >> +import com.google.common.collect.ImmutableList;
>> >> +import org.junit.Test;
>> >> +
>> >> +import static org.junit.Assert.assertEquals;
>> >> +import static org.junit.Assert.assertFalse;
>> >> +import static org.junit.Assert.assertTrue;
>> >> +
>> >> +public class StringSortTest {
>> >> + private Comparator<String> comparator = new PathComparator();
>> >> + private StringSort collector;
>> >> +
>> >> + @Test
>> >> + public void inMemory() throws Exception{
>> >> + List<String> paths = createTestPaths(5, false);
>> >> + collector = new StringSort(paths.size() + 1,comparator);
>> >> + addPathsToCollector(paths);
>> >> +
>> >> + assertConstraints(paths);
>> >> + assertFalse(collector.usingFile());
>> >> + collector.close();
>> >> + }
>> >> +
>> >> + @Test
>> >> + public void overflowToDisk() throws Exception{
>> >> + //Create ~50k paths
>> >> + List<String> paths = createTestPaths(10, true);
>> >> + collector = new StringSort(1000, comparator);
>> >> + addPathsToCollector(paths);
>> >> +
>> >> + assertTrue(collector.usingFile());
>> >> + assertConstraints(paths);
>> >> +
>> >> + collector.close();
>> >> + }
>> >> +
>> >> + private void assertConstraints(List<String> paths) throws
>> IOException
>> >> {
>> >> + assertEquals(paths.size(), collector.getSize());
>> >> +
>> >> + Collections.sort(paths, comparator);
>> >> + collector.sort();
>> >> +
>> >> + List<String> sortedPaths =
>> >> ImmutableList.copyOf(collector.getIds());
>> >> + assertEquals(paths.size(), sortedPaths.size());
>> >> + assertEquals(paths, sortedPaths);
>> >> + }
>> >> +
>> >> + private void addPathsToCollector(Iterable<String> paths) throws
>> >> IOException {
>> >> + for (String path : paths){
>> >> + collector.add(path);
>> >> + }
>> >> + }
>> >> +
>> >> + private static List<String> createTestPaths(int depth, boolean
>> >> permutation){
>> >> + List<String> rootPaths = Arrays.asList("a", "b", "c", "d", "e",
>> >> "f", "g");
>> >> + List<String> paths = new ArrayList<String>();
>> >> +
>> >> +
>> >> + if (permutation){
>> >> + List<String> newRoots = new ArrayList<String>();
>> >> + for (List<String> permuts :
>> >> Collections2.orderedPermutations(rootPaths)){
>> >> + newRoots.add(Joiner.on("").join(permuts));
>> >> + }
>> >> + rootPaths = newRoots;
>> >> + }
>> >> +
>> >> + for (String root : rootPaths){
>> >> + List<String> pathElements = new ArrayList<String>();
>> >> + pathElements.add(root);
>> >> + paths.add(createId(pathElements));
>> >> + for (int i = 0; i < depth; i++){
>> >> + pathElements.add(root + i);
>> >> + paths.add(createId(pathElements));
>> >> + }
>> >> + }
>> >> +
>> >> + Set<String> idSet = new HashSet<String>(paths);
>> >> + assertEquals(paths.size(), idSet.size());
>> >> +
>> >> + Collections.shuffle(paths);
>> >> + return paths;
>> >> + }
>> >> +
>> >> + private static String createId(Iterable<String> pathElements){
>> >> + return "/" + Joiner.on('/').join(pathElements);
>> >> + }
>> >> +
>> >> + private static class PathComparator implements Comparator<String>,
>> >> Serializable {
>> >> + @Override
>> >> + public int compare(String o1, String o2) {
>> >> + int d1 = pathDepth(o1);
>> >> + int d2 = pathDepth(o2);
>> >> + if (d1 != d2) {
>> >> + return Integer.signum(d2 - d1);
>> >> + }
>> >> + return o1.compareTo(o2);
>> >> + }
>> >> +
>> >> + private static int pathDepth(String path) {
>> >> + if (path.equals("/")) {
>> >> + return 0;
>> >> + }
>> >> + int depth = 0;
>> >> + for (int i = 0; i < path.length(); i++) {
>> >> + if (path.charAt(i) == '/') {
>> >> + depth++;
>> >> + }
>> >> + }
>> >> + return depth;
>> >> + }
>> >> + }
>> >> +}
>> >>
>> >> Propchange:
>> >>
>> jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/StringSortTest.java
>> >>
>> >>
>> ------------------------------------------------------------------------------
>> >> svn:eol-style = native
>> >>
>> >> Modified:
>> >>
>> jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java
>> >> URL:
>> >>
>> http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java?rev=1666220&r1=1666219&r2=1666220&view=diff
>> >>
>> >>
>> ==============================================================================
>> >> ---
>> >>
>> jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java
>> >> (original)
>> >> +++
>> >>
>> jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java
>> >> Thu Mar 12 15:22:46 2015
>> >> @@ -574,7 +574,11 @@ public class DocumentNodeStoreService {
>> >> RevisionGC revisionGC = new RevisionGC(new Runnable() {
>> >> @Override
>> >> public void run() {
>> >> -
>> >> store.getVersionGarbageCollector().gc(versionGcMaxAgeInSecs,
>> >> TimeUnit.SECONDS);
>> >> + try {
>> >> +
>> >> store.getVersionGarbageCollector().gc(versionGcMaxAgeInSecs,
>> >> TimeUnit.SECONDS);
>> >> + } catch (IOException e) {
>> >> + log.warn("Error occurred while executing the
>> Version
>> >> Garbage Collector", e);
>> >> + }
>> >> }
>> >> }, executor);
>> >> registrations.add(registerMBean(whiteboard,
>> >> RevisionGCMBean.class, revisionGC,
>> >>
>> >> Modified:
>> >>
>> jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
>> >> URL:
>> >>
>> http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java?rev=1666220&r1=1666219&r2=1666220&view=diff
>> >>
>> >>
>> ==============================================================================
>> >> ---
>> >>
>> jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
>> >> (original)
>> >> +++
>> >>
>> jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
>> >> Thu Mar 12 15:22:46 2015
>> >> @@ -19,9 +19,9 @@
>> >>
>> >> package org.apache.jackrabbit.oak.plugins.document;
>> >>
>> >> -import java.util.ArrayList;
>> >> -import java.util.Collections;
>> >> +import java.io.IOException;
>> >> import java.util.EnumSet;
>> >> +import java.util.Iterator;
>> >> import java.util.List;
>> >> import java.util.Set;
>> >> import java.util.concurrent.TimeUnit;
>> >> @@ -31,18 +31,23 @@ import com.google.common.base.StandardSy
>> >> import com.google.common.base.Stopwatch;
>> >> import com.google.common.collect.ImmutableList;
>> >>
>> >> +import org.apache.jackrabbit.oak.commons.sort.StringSort;
>> >> import org.apache.jackrabbit.oak.plugins.document.util.Utils;
>> >> import org.slf4j.Logger;
>> >> import org.slf4j.LoggerFactory;
>> >>
>> >> +import static com.google.common.collect.Iterators.partition;
>> >> import static
>> >>
>> org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType.COMMIT_ROOT_ONLY;
>> >> import static
>> >>
>> org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType.DEFAULT_LEAF;
>> >>
>> >> public class VersionGarbageCollector {
>> >> + //Kept less than MongoDocumentStore.IN_CLAUSE_BATCH_SIZE to avoid
>> >> re-partitioning
>> >> + private static final int DELETE_BATCH_SIZE = 450;
>> >> private final DocumentNodeStore nodeStore;
>> >> private final VersionGCSupport versionStore;
>> >> + private int overflowToDiskThreshold = 100000;
>> >>
>> >> - private final Logger log = LoggerFactory.getLogger(getClass());
>> >> + private static final Logger log =
>> >> LoggerFactory.getLogger(VersionGarbageCollector.class);
>> >>
>> >> /**
>> >> * Split document types which can be safely garbage collected
>> >> @@ -56,7 +61,7 @@ public class VersionGarbageCollector {
>> >> this.versionStore = gcSupport;
>> >> }
>> >>
>> >> - public VersionGCStats gc(long maxRevisionAge, TimeUnit unit) {
>> >> + public VersionGCStats gc(long maxRevisionAge, TimeUnit unit) throws
>> >> IOException {
>> >> long maxRevisionAgeInMillis = unit.toMillis(maxRevisionAge);
>> >> Stopwatch sw = Stopwatch.createStarted();
>> >> VersionGCStats stats = new VersionGCStats();
>> >> @@ -85,41 +90,60 @@ public class VersionGarbageCollector {
>> >> return stats;
>> >> }
>> >>
>> >> + public void setOverflowToDiskThreshold(int
>> overflowToDiskThreshold) {
>> >> + this.overflowToDiskThreshold = overflowToDiskThreshold;
>> >> + }
>> >> +
>> >> private void collectSplitDocuments(VersionGCStats stats, long
>> >> oldestRevTimeStamp) {
>> >> versionStore.deleteSplitDocuments(GC_TYPES,
>> oldestRevTimeStamp,
>> >> stats);
>> >> }
>> >>
>> >> - private void collectDeletedDocuments(VersionGCStats stats, Revision
>> >> headRevision, long oldestRevTimeStamp) {
>> >> - List<String> docIdsToDelete = new ArrayList<String>();
>> >> - Iterable<NodeDocument> itr =
>> >> versionStore.getPossiblyDeletedDocs(oldestRevTimeStamp);
>> >> + private void collectDeletedDocuments(VersionGCStats stats, Revision
>> >> headRevision, long oldestRevTimeStamp)
>> >> + throws IOException {
>> >> + StringSort docIdsToDelete = new
>> >> StringSort(overflowToDiskThreshold, NodeDocumentIdComparator.INSTANCE);
>> >> try {
>> >> - for (NodeDocument doc : itr) {
>> >> - //Check if node is actually deleted at current revision
>> >> - //As node is not modified since oldestRevTimeStamp then
>> >> - //this node has not be revived again in past
>> >> maxRevisionAge
>> >> - //So deleting it is safe
>> >> - if (doc.getNodeAtRevision(nodeStore, headRevision,
>> null)
>> >> == null) {
>> >> - docIdsToDelete.add(doc.getId());
>> >> - //Collect id of all previous docs also
>> >> - for (NodeDocument prevDoc :
>> >> ImmutableList.copyOf(doc.getAllPreviousDocs())) {
>> >> - docIdsToDelete.add(prevDoc.getId());
>> >> + Iterable<NodeDocument> itr =
>> >> versionStore.getPossiblyDeletedDocs(oldestRevTimeStamp);
>> >> + try {
>> >> + for (NodeDocument doc : itr) {
>> >> + //Check if node is actually deleted at current
>> >> revision
>> >> + //As node is not modified since oldestRevTimeStamp
>> >> then
>> >> + //this node has not be revived again in past
>> >> maxRevisionAge
>> >> + //So deleting it is safe
>> >> + if (doc.getNodeAtRevision(nodeStore, headRevision,
>> >> null) == null) {
>> >> + docIdsToDelete.add(doc.getId());
>> >> + //Collect id of all previous docs also
>> >> + for (NodeDocument prevDoc :
>> >> ImmutableList.copyOf(doc.getAllPreviousDocs())) {
>> >> + docIdsToDelete.add(prevDoc.getId());
>> >> + }
>> >> }
>> >> }
>> >> + } finally {
>> >> + Utils.closeIfCloseable(itr);
>> >> + }
>> >> +
>> >> + if (docIdsToDelete.isEmpty()){
>> >> + return;
>> >> }
>> >> - } finally {
>> >> - Utils.closeIfCloseable(itr);
>> >> - }
>> >>
>> >> - Collections.sort(docIdsToDelete,
>> >> NodeDocumentIdComparator.INSTANCE);
>> >> + docIdsToDelete.sort();
>> >> + log.info("Proceeding to delete [{}] documents",
>> >> docIdsToDelete.getSize());
>> >>
>> >> - if(log.isDebugEnabled()) {
>> >> - StringBuilder sb = new StringBuilder("Deleted document with
>> >> following ids were deleted as part of GC \n");
>> >> -
>> >> Joiner.on(StandardSystemProperty.LINE_SEPARATOR.value()).appendTo(sb,
>> >> docIdsToDelete);
>> >> - log.debug(sb.toString());
>> >> + if (log.isDebugEnabled() && docIdsToDelete.getSize() <
>> 1000)
>> >> {
>> >> + StringBuilder sb = new StringBuilder("Deleted document
>> >> with following ids were deleted as part of GC \n");
>> >> +
>> >> Joiner.on(StandardSystemProperty.LINE_SEPARATOR.value()).appendTo(sb,
>> >> docIdsToDelete.getIds());
>> >> + log.debug(sb.toString());
>> >> + }
>> >> +
>> >> + Iterator<List<String>> idListItr =
>> >> partition(docIdsToDelete.getIds(), DELETE_BATCH_SIZE);
>> >> + while (idListItr.hasNext()) {
>> >> + nodeStore.getDocumentStore().remove(Collection.NODES,
>> >> idListItr.next());
>> >> + }
>> >> +
>> >> + nodeStore.invalidateDocChildrenCache();
>> >> + stats.deletedDocGCCount += docIdsToDelete.getSize();
>> >> + } finally {
>> >> + docIdsToDelete.close();
>> >> }
>> >> - nodeStore.getDocumentStore().remove(Collection.NODES,
>> >> docIdsToDelete);
>> >> - nodeStore.invalidateDocChildrenCache();
>> >> - stats.deletedDocGCCount += docIdsToDelete.size();
>> >> }
>> >>
>> >> public static class VersionGCStats {
>> >>
>> >> Modified:
>> >>
>> jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCDeletionTest.java
>> >> URL:
>> >>
>> http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCDeletionTest.java?rev=1666220&r1=1666219&r2=1666220&view=diff
>> >>
>> >>
>> ==============================================================================
>> >> ---
>> >>
>> jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCDeletionTest.java
>> >> (original)
>> >> +++
>> >>
>> jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCDeletionTest.java
>> >> Thu Mar 12 15:22:46 2015
>> >> @@ -36,6 +36,7 @@ import org.junit.Before;
>> >> import org.junit.Test;
>> >>
>> >> import static java.util.concurrent.TimeUnit.HOURS;
>> >> +import static org.junit.Assert.assertEquals;
>> >> import static org.junit.Assert.assertNull;
>> >> import static org.junit.Assert.fail;
>> >>
>> >> @@ -104,6 +105,53 @@ public class VersionGCDeletionTest {
>> >> assertNull(ts.find(Collection.NODES, "1:/x"));
>> >> }
>> >>
>> >> + @Test
>> >> + public void deleteLargeNumber() throws Exception{
>> >> + int noOfDocsToDelete = 10000;
>> >> + DocumentStore ts = new MemoryDocumentStore();
>> >> + store = new DocumentMK.Builder()
>> >> + .clock(clock)
>> >> + .setDocumentStore(new MemoryDocumentStore())
>> >> + .setAsyncDelay(0)
>> >> + .getNodeStore();
>> >> +
>> >> + //Baseline the clock
>> >> + clock.waitUntil(Revision.getCurrentTimestamp());
>> >> +
>> >> + NodeBuilder b1 = store.getRoot().builder();
>> >> + NodeBuilder xb = b1.child("x");
>> >> + for (int i = 0; i < noOfDocsToDelete; i++){
>> >> + xb.child("a"+i).child("b"+i);
>> >> + }
>> >> + store.merge(b1, EmptyHook.INSTANCE, CommitInfo.EMPTY);
>> >> +
>> >> + long maxAge = 1; //hours
>> >> + long delta = TimeUnit.MINUTES.toMillis(10);
>> >> +
>> >> + //Remove x/y
>> >> + NodeBuilder b2 = store.getRoot().builder();
>> >> + b2.child("x").remove();
>> >> + store.merge(b2, EmptyHook.INSTANCE, CommitInfo.EMPTY);
>> >> +
>> >> + store.runBackgroundOperations();
>> >> +
>> >> + //3. Check that deleted doc does get collected post maxAge
>> >> + clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) +
>> >> delta);
>> >> + VersionGarbageCollector gc =
>> store.getVersionGarbageCollector();
>> >> + gc.setOverflowToDiskThreshold(100);
>> >> +
>> >> + VersionGarbageCollector.VersionGCStats stats = gc.gc(maxAge *
>> 2,
>> >> HOURS);
>> >> + assertEquals(noOfDocsToDelete * 2 + 1,
>> stats.deletedDocGCCount);
>> >> +
>> >> +
>> >> + assertNull(ts.find(Collection.NODES, "1:/x"));
>> >> +
>> >> + for (int i = 0; i < noOfDocsToDelete; i++){
>> >> + assertNull(ts.find(Collection.NODES, "2:/a"+i+"/b"+i));
>> >> + assertNull(ts.find(Collection.NODES, "1:/a"+i));
>> >> + }
>> >> + }
>> >> +
>> >> private static class TestDocumentStore extends
>> MemoryDocumentStore {
>> >> boolean throwException;
>> >> @Override
>> >>
>> >> Modified:
>> >>
>> jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCWithSplitTest.java
>> >> URL:
>> >>
>> http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCWithSplitTest.java?rev=1666220&r1=1666219&r2=1666220&view=diff
>> >>
>> >>
>> ==============================================================================
>> >> ---
>> >>
>> jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCWithSplitTest.java
>> >> (original)
>> >> +++
>> >>
>> jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCWithSplitTest.java
>> >> Thu Mar 12 15:22:46 2015
>> >> @@ -137,7 +137,11 @@ public class VersionGCWithSplitTest {
>> >> Thread t = new Thread(new Runnable() {
>> >> @Override
>> >> public void run() {
>> >> - stats.set(gc.gc(1, HOURS));
>> >> + try {
>> >> + stats.set(gc.gc(1, HOURS));
>> >> + } catch (IOException e) {
>> >> + throw new RuntimeException(e);
>> >> + }
>> >> }
>> >> });
>> >>
>> >>
>> >>
>> >>
>> >
>>
Re: svn commit: r1666220 - in /jackrabbit/oak/trunk: oak-commons/
oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/
oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/ oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/d...
Posted by Amit Jain <am...@ieee.org>.
The tests are passing for me on windows with the latest change.
Thanks
Amit
On Fri, Mar 13, 2015 at 9:21 AM, Chetan Mehrotra <ch...@gmail.com>
wrote:
> Looks like Closer closes the closeables in LIFO manner due to which
> directory containing that file got deleted first. I have change the
> logic now.
>
> Let me know if the test passes for you on Windows
> Chetan Mehrotra
>
>
> On Thu, Mar 12, 2015 at 10:21 PM, Julian Reschke <ju...@gmx.de>
> wrote:
> > With this change, I get a reliable test failure on Windows:
> >
> >
> > Tests in error:
> >
> > overflowToDisk(org.apache.jackrabbit.oak.commons.sort.StringSortTest):
> > Unable to delete file:
> C:\tmp\oak-sorter-1426178913437-0\strings-sorted.txt
> >
> >
> > Best regards, Julian
> >
> >
> > On 2015-03-12 16:22, chetanm@apache.org wrote:
> >>
> >> Author: chetanm
> >> Date: Thu Mar 12 15:22:46 2015
> >> New Revision: 1666220
> >>
> >> URL: http://svn.apache.org/r1666220
> >> Log:
> >> OAK-2557 - VersionGC uses way too much memory if there is a large pile
> of
> >> garbage
> >>
> >> Added:
> >>
> >>
> jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java
> >> (with props)
> >>
> >>
> jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/StringSortTest.java
> >> (with props)
> >> Modified:
> >> jackrabbit/oak/trunk/oak-commons/pom.xml
> >>
> >>
> jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java
> >>
> >>
> jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
> >>
> >>
> jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCDeletionTest.java
> >>
> >>
> jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCWithSplitTest.java
> >>
> >> Modified: jackrabbit/oak/trunk/oak-commons/pom.xml
> >> URL:
> >>
> http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/pom.xml?rev=1666220&r1=1666219&r2=1666220&view=diff
> >>
> >>
> ==============================================================================
> >> --- jackrabbit/oak/trunk/oak-commons/pom.xml (original)
> >> +++ jackrabbit/oak/trunk/oak-commons/pom.xml Thu Mar 12 15:22:46 2015
> >> @@ -93,6 +93,11 @@
> >> <artifactId>oak-mk-api</artifactId>
> >> <version>${project.version}</version>
> >> </dependency>
> >> + <dependency>
> >> + <groupId>commons-io</groupId>
> >> + <artifactId>commons-io</artifactId>
> >> + <version>2.4</version>
> >> + </dependency>
> >>
> >> <!-- Test dependencies -->
> >> <dependency>
> >>
> >> Added:
> >>
> jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java
> >> URL:
> >>
> http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java?rev=1666220&view=auto
> >>
> >>
> ==============================================================================
> >> ---
> >>
> jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java
> >> (added)
> >> +++
> >>
> jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java
> >> Thu Mar 12 15:22:46 2015
> >> @@ -0,0 +1,255 @@
> >> +/*
> >> + * Licensed to the Apache Software Foundation (ASF) under one
> >> + * or more contributor license agreements. See the NOTICE file
> >> + * distributed with this work for additional information
> >> + * regarding copyright ownership. The ASF licenses this file
> >> + * to you under the Apache License, Version 2.0 (the
> >> + * "License"); you may not use this file except in compliance
> >> + * with the License. You may obtain a copy of the License at
> >> + *
> >> + * http://www.apache.org/licenses/LICENSE-2.0
> >> + *
> >> + * Unless required by applicable law or agreed to in writing,
> >> + * software distributed under the License is distributed on an
> >> + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
> >> + * KIND, either express or implied. See the License for the
> >> + * specific language governing permissions and limitations
> >> + * under the License.
> >> + */
> >> +
> >> +package org.apache.jackrabbit.oak.commons.sort;
> >> +
> >> +import java.io.BufferedWriter;
> >> +import java.io.Closeable;
> >> +import java.io.File;
> >> +import java.io.FileNotFoundException;
> >> +import java.io.IOException;
> >> +import java.io.Reader;
> >> +import java.nio.charset.Charset;
> >> +import java.util.Collections;
> >> +import java.util.Comparator;
> >> +import java.util.Iterator;
> >> +import java.util.List;
> >> +
> >> +import com.google.common.base.Charsets;
> >> +import com.google.common.collect.Lists;
> >> +import com.google.common.io.Closer;
> >> +import com.google.common.io.Files;
> >> +import org.apache.commons.io.FileUtils;
> >> +import org.apache.commons.io.LineIterator;
> >> +import org.slf4j.Logger;
> >> +import org.slf4j.LoggerFactory;
> >> +
> >> +/**
> >> + * Utility class to store a list of string and perform sort on that.
> For
> >> small size
> >> + * the list would be maintained in memory. If the size crosses the
> >> required threshold then
> >> + * the sorting would be performed externally
> >> + */
> >> +public class StringSort implements Closeable {
> >> + private final Logger log = LoggerFactory.getLogger(getClass());
> >> + public static final int BATCH_SIZE = 2048;
> >> +
> >> + private final int overflowToDiskThreshold;
> >> + private final Comparator<String> comparator;
> >> +
> >> + private final List<String> ids = Lists.newArrayList();
> >> + private long size;
> >> +
> >> + private final List<String> inMemBatch = Lists.newArrayList();
> >> +
> >> + private boolean useFile;
> >> + private PersistentState persistentState;
> >> +
> >> + public StringSort(int overflowToDiskThreshold, Comparator<String>
> >> comparator) {
> >> + this.overflowToDiskThreshold = overflowToDiskThreshold;
> >> + this.comparator = comparator;
> >> + }
> >> +
> >> + public void add(String id) throws IOException {
> >> + if (useFile) {
> >> + addToBatch(id);
> >> + } else {
> >> + ids.add(id);
> >> + if (ids.size() >= overflowToDiskThreshold) {
> >> + flushToFile(ids);
> >> + useFile = true;
> >> + log.debug("In memory buffer crossed the threshold of
> {}.
> >> " +
> >> + "Switching to filesystem [{}] to manage the
> >> state", overflowToDiskThreshold, persistentState);
> >> + }
> >> + }
> >> + size++;
> >> + }
> >> +
> >> + public void sort() throws IOException {
> >> + if (useFile) {
> >> + //Flush the last batch
> >> + flushToFile(inMemBatch);
> >> + persistentState.sort();
> >> + } else {
> >> + Collections.sort(ids, comparator);
> >> + }
> >> + }
> >> +
> >> + public Iterator<String> getIds() throws IOException {
> >> + if (useFile) {
> >> + return persistentState.getIterator();
> >> + } else {
> >> + return ids.iterator();
> >> + }
> >> + }
> >> +
> >> + public long getSize() {
> >> + return size;
> >> + }
> >> +
> >> + public boolean isEmpty() {
> >> + return size == 0;
> >> + }
> >> +
> >> + public boolean usingFile() {
> >> + return useFile;
> >> + }
> >> +
> >> + @Override
> >> + public void close() throws IOException {
> >> + if (persistentState != null) {
> >> + persistentState.close();
> >> + }
> >> + }
> >> +
> >> + private void addToBatch(String id) throws IOException {
> >> + inMemBatch.add(id);
> >> + if (inMemBatch.size() >= BATCH_SIZE) {
> >> + flushToFile(inMemBatch);
> >> + }
> >> + }
> >> +
> >> + private void flushToFile(List<String> ids) throws IOException {
> >> + BufferedWriter w = getPersistentState().getWriter();
> >> + for (String id : ids) {
> >> + w.write(id);
> >> + w.newLine();
> >> + }
> >> + ids.clear();
> >> + }
> >> +
> >> + private PersistentState getPersistentState() {
> >> + //Lazily initialize the persistent state
> >> + if (persistentState == null) {
> >> + persistentState = new PersistentState(comparator);
> >> + }
> >> + return persistentState;
> >> + }
> >> +
> >> + private static class PersistentState implements Closeable {
> >> + /**
> >> + * Maximum loop count when creating temp directories.
> >> + */
> >> + private static final int TEMP_DIR_ATTEMPTS = 10000;
> >> +
> >> + private final Charset charset = Charsets.UTF_8;
> >> + private final File workDir;
> >> + private final Comparator<String> comparator;
> >> + private File idFile;
> >> + private File sortedFile;
> >> + private BufferedWriter writer;
> >> + private List<CloseableIterator> openedIterators =
> >> Lists.newArrayList();
> >> +
> >> + public PersistentState(Comparator<String> comparator) {
> >> + this(comparator, createTempDir("oak-sorter-"));
> >> + }
> >> +
> >> + public PersistentState(Comparator<String> comparator, File
> >> workDir) {
> >> + this.workDir = workDir;
> >> + this.comparator = comparator;
> >> + }
> >> +
> >> + public BufferedWriter getWriter() throws FileNotFoundException
> {
> >> + if (idFile == null) {
> >> + idFile = new File(workDir, "strings.txt");
> >> + sortedFile = new File(workDir, "strings-sorted.txt");
> >> + writer = Files.newWriter(idFile, charset);
> >> + }
> >> + return writer;
> >> + }
> >> +
> >> + public void sort() throws IOException {
> >> + closeWriter();
> >> +
> >> + List<File> sortedFiles = ExternalSort.sortInBatch(idFile,
> >> + comparator, //Comparator to use
> >> + ExternalSort.DEFAULTMAXTEMPFILES,
> >> + ExternalSort.DEFAULT_MAX_MEM_BYTES,
> >> + charset, //charset
> >> + workDir, //temp directory where intermediate files
> >> are created
> >> + true //distinct
> >> + );
> >> +
> >> + ExternalSort.mergeSortedFiles(sortedFiles,
> >> + sortedFile,
> >> + comparator,
> >> + charset,
> >> + true
> >> + );
> >> + }
> >> +
> >> + public Iterator<String> getIterator() throws IOException {
> >> + CloseableIterator itr = new
> >> CloseableIterator(Files.newReader(sortedFile, charset));
> >> + openedIterators.add(itr);
> >> + return itr;
> >> + }
> >> +
> >> + @Override
> >> + public String toString() {
> >> + return "PersistentState : workDir=" +
> >> workDir.getAbsolutePath();
> >> + }
> >> +
> >> + @Override
> >> + public void close() throws IOException {
> >> + Closer closer = Closer.create();
> >> + try {
> >> + closer.register(writer);
> >> + for (CloseableIterator citr : openedIterators) {
> >> + closer.register(citr);
> >> + }
> >> + closer.register(new Closeable() {
> >> + @Override
> >> + public void close() throws IOException {
> >> + FileUtils.deleteDirectory(workDir);
> >> + }
> >> + });
> >> + } finally {
> >> + closer.close();
> >> + }
> >> + }
> >> +
> >> + private void closeWriter() throws IOException {
> >> + writer.close();
> >> + }
> >> +
> >> + /**
> >> + * Taken from com.google.common.io.Files#createTempDir()
> >> + * Modified to provide a prefix
> >> + */
> >> + private static File createTempDir(String prefix) {
> >> + File baseDir = new
> >> File(System.getProperty("java.io.tmpdir"));
> >> + String baseName = System.currentTimeMillis() + "-";
> >> +
> >> + for (int counter = 0; counter < TEMP_DIR_ATTEMPTS;
> counter++)
> >> {
> >> + File tempDir = new File(baseDir, prefix + baseName +
> >> counter);
> >> + if (tempDir.mkdir()) {
> >> + return tempDir;
> >> + }
> >> + }
> >> + throw new IllegalStateException("Failed to create directory
> >> within "
> >> + + TEMP_DIR_ATTEMPTS + " attempts (tried "
> >> + + baseName + "0 to " + baseName +
> (TEMP_DIR_ATTEMPTS
> >> - 1) + ')');
> >> + }
> >> + }
> >> +
> >> + private static class CloseableIterator extends LineIterator
> >> implements Closeable {
> >> + public CloseableIterator(Reader reader) throws
> >> IllegalArgumentException {
> >> + super(reader);
> >> + }
> >> + }
> >> +}
> >>
> >> Propchange:
> >>
> jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java
> >>
> >>
> ------------------------------------------------------------------------------
> >> svn:eol-style = native
> >>
> >> Added:
> >>
> jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/StringSortTest.java
> >> URL:
> >>
> http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/StringSortTest.java?rev=1666220&view=auto
> >>
> >>
> ==============================================================================
> >> ---
> >>
> jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/StringSortTest.java
> >> (added)
> >> +++
> >>
> jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/StringSortTest.java
> >> Thu Mar 12 15:22:46 2015
> >> @@ -0,0 +1,144 @@
> >> +/*
> >> + * Licensed to the Apache Software Foundation (ASF) under one
> >> + * or more contributor license agreements. See the NOTICE file
> >> + * distributed with this work for additional information
> >> + * regarding copyright ownership. The ASF licenses this file
> >> + * to you under the Apache License, Version 2.0 (the
> >> + * "License"); you may not use this file except in compliance
> >> + * with the License. You may obtain a copy of the License at
> >> + *
> >> + * http://www.apache.org/licenses/LICENSE-2.0
> >> + *
> >> + * Unless required by applicable law or agreed to in writing,
> >> + * software distributed under the License is distributed on an
> >> + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
> >> + * KIND, either express or implied. See the License for the
> >> + * specific language governing permissions and limitations
> >> + * under the License.
> >> + */
> >> +
> >> +package org.apache.jackrabbit.oak.commons.sort;
> >> +
> >> +import java.io.IOException;
> >> +import java.io.Serializable;
> >> +import java.util.ArrayList;
> >> +import java.util.Arrays;
> >> +import java.util.Collections;
> >> +import java.util.Comparator;
> >> +import java.util.HashSet;
> >> +import java.util.List;
> >> +import java.util.Set;
> >> +
> >> +import com.google.common.base.Joiner;
> >> +import com.google.common.collect.Collections2;
> >> +import com.google.common.collect.ImmutableList;
> >> +import org.junit.Test;
> >> +
> >> +import static org.junit.Assert.assertEquals;
> >> +import static org.junit.Assert.assertFalse;
> >> +import static org.junit.Assert.assertTrue;
> >> +
> >> +public class StringSortTest {
> >> + private Comparator<String> comparator = new PathComparator();
> >> + private StringSort collector;
> >> +
> >> + @Test
> >> + public void inMemory() throws Exception{
> >> + List<String> paths = createTestPaths(5, false);
> >> + collector = new StringSort(paths.size() + 1,comparator);
> >> + addPathsToCollector(paths);
> >> +
> >> + assertConstraints(paths);
> >> + assertFalse(collector.usingFile());
> >> + collector.close();
> >> + }
> >> +
> >> + @Test
> >> + public void overflowToDisk() throws Exception{
> >> + //Create ~50k paths
> >> + List<String> paths = createTestPaths(10, true);
> >> + collector = new StringSort(1000, comparator);
> >> + addPathsToCollector(paths);
> >> +
> >> + assertTrue(collector.usingFile());
> >> + assertConstraints(paths);
> >> +
> >> + collector.close();
> >> + }
> >> +
> >> + private void assertConstraints(List<String> paths) throws
> IOException
> >> {
> >> + assertEquals(paths.size(), collector.getSize());
> >> +
> >> + Collections.sort(paths, comparator);
> >> + collector.sort();
> >> +
> >> + List<String> sortedPaths =
> >> ImmutableList.copyOf(collector.getIds());
> >> + assertEquals(paths.size(), sortedPaths.size());
> >> + assertEquals(paths, sortedPaths);
> >> + }
> >> +
> >> + private void addPathsToCollector(Iterable<String> paths) throws
> >> IOException {
> >> + for (String path : paths){
> >> + collector.add(path);
> >> + }
> >> + }
> >> +
> >> + private static List<String> createTestPaths(int depth, boolean
> >> permutation){
> >> + List<String> rootPaths = Arrays.asList("a", "b", "c", "d", "e",
> >> "f", "g");
> >> + List<String> paths = new ArrayList<String>();
> >> +
> >> +
> >> + if (permutation){
> >> + List<String> newRoots = new ArrayList<String>();
> >> + for (List<String> permuts :
> >> Collections2.orderedPermutations(rootPaths)){
> >> + newRoots.add(Joiner.on("").join(permuts));
> >> + }
> >> + rootPaths = newRoots;
> >> + }
> >> +
> >> + for (String root : rootPaths){
> >> + List<String> pathElements = new ArrayList<String>();
> >> + pathElements.add(root);
> >> + paths.add(createId(pathElements));
> >> + for (int i = 0; i < depth; i++){
> >> + pathElements.add(root + i);
> >> + paths.add(createId(pathElements));
> >> + }
> >> + }
> >> +
> >> + Set<String> idSet = new HashSet<String>(paths);
> >> + assertEquals(paths.size(), idSet.size());
> >> +
> >> + Collections.shuffle(paths);
> >> + return paths;
> >> + }
> >> +
> >> + private static String createId(Iterable<String> pathElements){
> >> + return "/" + Joiner.on('/').join(pathElements);
> >> + }
> >> +
> >> + private static class PathComparator implements Comparator<String>,
> >> Serializable {
> >> + @Override
> >> + public int compare(String o1, String o2) {
> >> + int d1 = pathDepth(o1);
> >> + int d2 = pathDepth(o2);
> >> + if (d1 != d2) {
> >> + return Integer.signum(d2 - d1);
> >> + }
> >> + return o1.compareTo(o2);
> >> + }
> >> +
> >> + private static int pathDepth(String path) {
> >> + if (path.equals("/")) {
> >> + return 0;
> >> + }
> >> + int depth = 0;
> >> + for (int i = 0; i < path.length(); i++) {
> >> + if (path.charAt(i) == '/') {
> >> + depth++;
> >> + }
> >> + }
> >> + return depth;
> >> + }
> >> + }
> >> +}
> >>
> >> Propchange:
> >>
> jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/StringSortTest.java
> >>
> >>
> ------------------------------------------------------------------------------
> >> svn:eol-style = native
> >>
> >> Modified:
> >>
> jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java
> >> URL:
> >>
> http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java?rev=1666220&r1=1666219&r2=1666220&view=diff
> >>
> >>
> ==============================================================================
> >> ---
> >>
> jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java
> >> (original)
> >> +++
> >>
> jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java
> >> Thu Mar 12 15:22:46 2015
> >> @@ -574,7 +574,11 @@ public class DocumentNodeStoreService {
> >> RevisionGC revisionGC = new RevisionGC(new Runnable() {
> >> @Override
> >> public void run() {
> >> -
> >> store.getVersionGarbageCollector().gc(versionGcMaxAgeInSecs,
> >> TimeUnit.SECONDS);
> >> + try {
> >> +
> >> store.getVersionGarbageCollector().gc(versionGcMaxAgeInSecs,
> >> TimeUnit.SECONDS);
> >> + } catch (IOException e) {
> >> + log.warn("Error occurred while executing the
> Version
> >> Garbage Collector", e);
> >> + }
> >> }
> >> }, executor);
> >> registrations.add(registerMBean(whiteboard,
> >> RevisionGCMBean.class, revisionGC,
> >>
> >> Modified:
> >>
> jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
> >> URL:
> >>
> http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java?rev=1666220&r1=1666219&r2=1666220&view=diff
> >>
> >>
> ==============================================================================
> >> ---
> >>
> jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
> >> (original)
> >> +++
> >>
> jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
> >> Thu Mar 12 15:22:46 2015
> >> @@ -19,9 +19,9 @@
> >>
> >> package org.apache.jackrabbit.oak.plugins.document;
> >>
> >> -import java.util.ArrayList;
> >> -import java.util.Collections;
> >> +import java.io.IOException;
> >> import java.util.EnumSet;
> >> +import java.util.Iterator;
> >> import java.util.List;
> >> import java.util.Set;
> >> import java.util.concurrent.TimeUnit;
> >> @@ -31,18 +31,23 @@ import com.google.common.base.StandardSy
> >> import com.google.common.base.Stopwatch;
> >> import com.google.common.collect.ImmutableList;
> >>
> >> +import org.apache.jackrabbit.oak.commons.sort.StringSort;
> >> import org.apache.jackrabbit.oak.plugins.document.util.Utils;
> >> import org.slf4j.Logger;
> >> import org.slf4j.LoggerFactory;
> >>
> >> +import static com.google.common.collect.Iterators.partition;
> >> import static
> >>
> org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType.COMMIT_ROOT_ONLY;
> >> import static
> >>
> org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType.DEFAULT_LEAF;
> >>
> >> public class VersionGarbageCollector {
> >> + //Kept less than MongoDocumentStore.IN_CLAUSE_BATCH_SIZE to avoid
> >> re-partitioning
> >> + private static final int DELETE_BATCH_SIZE = 450;
> >> private final DocumentNodeStore nodeStore;
> >> private final VersionGCSupport versionStore;
> >> + private int overflowToDiskThreshold = 100000;
> >>
> >> - private final Logger log = LoggerFactory.getLogger(getClass());
> >> + private static final Logger log =
> >> LoggerFactory.getLogger(VersionGarbageCollector.class);
> >>
> >> /**
> >> * Split document types which can be safely garbage collected
> >> @@ -56,7 +61,7 @@ public class VersionGarbageCollector {
> >> this.versionStore = gcSupport;
> >> }
> >>
> >> - public VersionGCStats gc(long maxRevisionAge, TimeUnit unit) {
> >> + public VersionGCStats gc(long maxRevisionAge, TimeUnit unit) throws
> >> IOException {
> >> long maxRevisionAgeInMillis = unit.toMillis(maxRevisionAge);
> >> Stopwatch sw = Stopwatch.createStarted();
> >> VersionGCStats stats = new VersionGCStats();
> >> @@ -85,41 +90,60 @@ public class VersionGarbageCollector {
> >> return stats;
> >> }
> >>
> >> + public void setOverflowToDiskThreshold(int
> overflowToDiskThreshold) {
> >> + this.overflowToDiskThreshold = overflowToDiskThreshold;
> >> + }
> >> +
> >> private void collectSplitDocuments(VersionGCStats stats, long
> >> oldestRevTimeStamp) {
> >> versionStore.deleteSplitDocuments(GC_TYPES,
> oldestRevTimeStamp,
> >> stats);
> >> }
> >>
> >> - private void collectDeletedDocuments(VersionGCStats stats, Revision
> >> headRevision, long oldestRevTimeStamp) {
> >> - List<String> docIdsToDelete = new ArrayList<String>();
> >> - Iterable<NodeDocument> itr =
> >> versionStore.getPossiblyDeletedDocs(oldestRevTimeStamp);
> >> + private void collectDeletedDocuments(VersionGCStats stats, Revision
> >> headRevision, long oldestRevTimeStamp)
> >> + throws IOException {
> >> + StringSort docIdsToDelete = new
> >> StringSort(overflowToDiskThreshold, NodeDocumentIdComparator.INSTANCE);
> >> try {
> >> - for (NodeDocument doc : itr) {
> >> - //Check if node is actually deleted at current revision
> >> - //As node is not modified since oldestRevTimeStamp then
> >> - //this node has not be revived again in past
> >> maxRevisionAge
> >> - //So deleting it is safe
> >> - if (doc.getNodeAtRevision(nodeStore, headRevision,
> null)
> >> == null) {
> >> - docIdsToDelete.add(doc.getId());
> >> - //Collect id of all previous docs also
> >> - for (NodeDocument prevDoc :
> >> ImmutableList.copyOf(doc.getAllPreviousDocs())) {
> >> - docIdsToDelete.add(prevDoc.getId());
> >> + Iterable<NodeDocument> itr =
> >> versionStore.getPossiblyDeletedDocs(oldestRevTimeStamp);
> >> + try {
> >> + for (NodeDocument doc : itr) {
> >> + //Check if node is actually deleted at current
> >> revision
> >> + //As node is not modified since oldestRevTimeStamp
> >> then
> >> + //this node has not be revived again in past
> >> maxRevisionAge
> >> + //So deleting it is safe
> >> + if (doc.getNodeAtRevision(nodeStore, headRevision,
> >> null) == null) {
> >> + docIdsToDelete.add(doc.getId());
> >> + //Collect id of all previous docs also
> >> + for (NodeDocument prevDoc :
> >> ImmutableList.copyOf(doc.getAllPreviousDocs())) {
> >> + docIdsToDelete.add(prevDoc.getId());
> >> + }
> >> }
> >> }
> >> + } finally {
> >> + Utils.closeIfCloseable(itr);
> >> + }
> >> +
> >> + if (docIdsToDelete.isEmpty()){
> >> + return;
> >> }
> >> - } finally {
> >> - Utils.closeIfCloseable(itr);
> >> - }
> >>
> >> - Collections.sort(docIdsToDelete,
> >> NodeDocumentIdComparator.INSTANCE);
> >> + docIdsToDelete.sort();
> >> + log.info("Proceeding to delete [{}] documents",
> >> docIdsToDelete.getSize());
> >>
> >> - if(log.isDebugEnabled()) {
> >> - StringBuilder sb = new StringBuilder("Deleted document with
> >> following ids were deleted as part of GC \n");
> >> -
> >> Joiner.on(StandardSystemProperty.LINE_SEPARATOR.value()).appendTo(sb,
> >> docIdsToDelete);
> >> - log.debug(sb.toString());
> >> + if (log.isDebugEnabled() && docIdsToDelete.getSize() <
> 1000)
> >> {
> >> + StringBuilder sb = new StringBuilder("Deleted document
> >> with following ids were deleted as part of GC \n");
> >> +
> >> Joiner.on(StandardSystemProperty.LINE_SEPARATOR.value()).appendTo(sb,
> >> docIdsToDelete.getIds());
> >> + log.debug(sb.toString());
> >> + }
> >> +
> >> + Iterator<List<String>> idListItr =
> >> partition(docIdsToDelete.getIds(), DELETE_BATCH_SIZE);
> >> + while (idListItr.hasNext()) {
> >> + nodeStore.getDocumentStore().remove(Collection.NODES,
> >> idListItr.next());
> >> + }
> >> +
> >> + nodeStore.invalidateDocChildrenCache();
> >> + stats.deletedDocGCCount += docIdsToDelete.getSize();
> >> + } finally {
> >> + docIdsToDelete.close();
> >> }
> >> - nodeStore.getDocumentStore().remove(Collection.NODES,
> >> docIdsToDelete);
> >> - nodeStore.invalidateDocChildrenCache();
> >> - stats.deletedDocGCCount += docIdsToDelete.size();
> >> }
> >>
> >> public static class VersionGCStats {
> >>
> >> Modified:
> >>
> jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCDeletionTest.java
> >> URL:
> >>
> http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCDeletionTest.java?rev=1666220&r1=1666219&r2=1666220&view=diff
> >>
> >>
> ==============================================================================
> >> ---
> >>
> jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCDeletionTest.java
> >> (original)
> >> +++
> >>
> jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCDeletionTest.java
> >> Thu Mar 12 15:22:46 2015
> >> @@ -36,6 +36,7 @@ import org.junit.Before;
> >> import org.junit.Test;
> >>
> >> import static java.util.concurrent.TimeUnit.HOURS;
> >> +import static org.junit.Assert.assertEquals;
> >> import static org.junit.Assert.assertNull;
> >> import static org.junit.Assert.fail;
> >>
> >> @@ -104,6 +105,53 @@ public class VersionGCDeletionTest {
> >> assertNull(ts.find(Collection.NODES, "1:/x"));
> >> }
> >>
> >> + @Test
> >> + public void deleteLargeNumber() throws Exception{
> >> + int noOfDocsToDelete = 10000;
> >> + DocumentStore ts = new MemoryDocumentStore();
> >> + store = new DocumentMK.Builder()
> >> + .clock(clock)
> >> + .setDocumentStore(new MemoryDocumentStore())
> >> + .setAsyncDelay(0)
> >> + .getNodeStore();
> >> +
> >> + //Baseline the clock
> >> + clock.waitUntil(Revision.getCurrentTimestamp());
> >> +
> >> + NodeBuilder b1 = store.getRoot().builder();
> >> + NodeBuilder xb = b1.child("x");
> >> + for (int i = 0; i < noOfDocsToDelete; i++){
> >> + xb.child("a"+i).child("b"+i);
> >> + }
> >> + store.merge(b1, EmptyHook.INSTANCE, CommitInfo.EMPTY);
> >> +
> >> + long maxAge = 1; //hours
> >> + long delta = TimeUnit.MINUTES.toMillis(10);
> >> +
> >> + //Remove x/y
> >> + NodeBuilder b2 = store.getRoot().builder();
> >> + b2.child("x").remove();
> >> + store.merge(b2, EmptyHook.INSTANCE, CommitInfo.EMPTY);
> >> +
> >> + store.runBackgroundOperations();
> >> +
> >> + //3. Check that deleted doc does get collected post maxAge
> >> + clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) +
> >> delta);
> >> + VersionGarbageCollector gc =
> store.getVersionGarbageCollector();
> >> + gc.setOverflowToDiskThreshold(100);
> >> +
> >> + VersionGarbageCollector.VersionGCStats stats = gc.gc(maxAge *
> 2,
> >> HOURS);
> >> + assertEquals(noOfDocsToDelete * 2 + 1,
> stats.deletedDocGCCount);
> >> +
> >> +
> >> + assertNull(ts.find(Collection.NODES, "1:/x"));
> >> +
> >> + for (int i = 0; i < noOfDocsToDelete; i++){
> >> + assertNull(ts.find(Collection.NODES, "2:/a"+i+"/b"+i));
> >> + assertNull(ts.find(Collection.NODES, "1:/a"+i));
> >> + }
> >> + }
> >> +
> >> private static class TestDocumentStore extends
> MemoryDocumentStore {
> >> boolean throwException;
> >> @Override
> >>
> >> Modified:
> >>
> jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCWithSplitTest.java
> >> URL:
> >>
> http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCWithSplitTest.java?rev=1666220&r1=1666219&r2=1666220&view=diff
> >>
> >>
> ==============================================================================
> >> ---
> >>
> jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCWithSplitTest.java
> >> (original)
> >> +++
> >>
> jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCWithSplitTest.java
> >> Thu Mar 12 15:22:46 2015
> >> @@ -137,7 +137,11 @@ public class VersionGCWithSplitTest {
> >> Thread t = new Thread(new Runnable() {
> >> @Override
> >> public void run() {
> >> - stats.set(gc.gc(1, HOURS));
> >> + try {
> >> + stats.set(gc.gc(1, HOURS));
> >> + } catch (IOException e) {
> >> + throw new RuntimeException(e);
> >> + }
> >> }
> >> });
> >>
> >>
> >>
> >>
> >
>
Re: svn commit: r1666220 - in /jackrabbit/oak/trunk: oak-commons/
oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/
oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/ oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/d...
Posted by Chetan Mehrotra <ch...@gmail.com>.
Looks like Closer closes the closeables in LIFO manner due to which
directory containing that file got deleted first. I have change the
logic now.
Let me know if the test passes for you on Windows
Chetan Mehrotra
On Thu, Mar 12, 2015 at 10:21 PM, Julian Reschke <ju...@gmx.de> wrote:
> With this change, I get a reliable test failure on Windows:
>
>
> Tests in error:
>
> overflowToDisk(org.apache.jackrabbit.oak.commons.sort.StringSortTest):
> Unable to delete file: C:\tmp\oak-sorter-1426178913437-0\strings-sorted.txt
>
>
> Best regards, Julian
>
>
> On 2015-03-12 16:22, chetanm@apache.org wrote:
>>
>> Author: chetanm
>> Date: Thu Mar 12 15:22:46 2015
>> New Revision: 1666220
>>
>> URL: http://svn.apache.org/r1666220
>> Log:
>> OAK-2557 - VersionGC uses way too much memory if there is a large pile of
>> garbage
>>
>> Added:
>>
>> jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java
>> (with props)
>>
>> jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/StringSortTest.java
>> (with props)
>> Modified:
>> jackrabbit/oak/trunk/oak-commons/pom.xml
>>
>> jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java
>>
>> jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
>>
>> jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCDeletionTest.java
>>
>> jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCWithSplitTest.java
>>
>> Modified: jackrabbit/oak/trunk/oak-commons/pom.xml
>> URL:
>> http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/pom.xml?rev=1666220&r1=1666219&r2=1666220&view=diff
>>
>> ==============================================================================
>> --- jackrabbit/oak/trunk/oak-commons/pom.xml (original)
>> +++ jackrabbit/oak/trunk/oak-commons/pom.xml Thu Mar 12 15:22:46 2015
>> @@ -93,6 +93,11 @@
>> <artifactId>oak-mk-api</artifactId>
>> <version>${project.version}</version>
>> </dependency>
>> + <dependency>
>> + <groupId>commons-io</groupId>
>> + <artifactId>commons-io</artifactId>
>> + <version>2.4</version>
>> + </dependency>
>>
>> <!-- Test dependencies -->
>> <dependency>
>>
>> Added:
>> jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java
>> URL:
>> http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java?rev=1666220&view=auto
>>
>> ==============================================================================
>> ---
>> jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java
>> (added)
>> +++
>> jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java
>> Thu Mar 12 15:22:46 2015
>> @@ -0,0 +1,255 @@
>> +/*
>> + * Licensed to the Apache Software Foundation (ASF) under one
>> + * or more contributor license agreements. See the NOTICE file
>> + * distributed with this work for additional information
>> + * regarding copyright ownership. The ASF licenses this file
>> + * to you under the Apache License, Version 2.0 (the
>> + * "License"); you may not use this file except in compliance
>> + * with the License. You may obtain a copy of the License at
>> + *
>> + * http://www.apache.org/licenses/LICENSE-2.0
>> + *
>> + * Unless required by applicable law or agreed to in writing,
>> + * software distributed under the License is distributed on an
>> + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
>> + * KIND, either express or implied. See the License for the
>> + * specific language governing permissions and limitations
>> + * under the License.
>> + */
>> +
>> +package org.apache.jackrabbit.oak.commons.sort;
>> +
>> +import java.io.BufferedWriter;
>> +import java.io.Closeable;
>> +import java.io.File;
>> +import java.io.FileNotFoundException;
>> +import java.io.IOException;
>> +import java.io.Reader;
>> +import java.nio.charset.Charset;
>> +import java.util.Collections;
>> +import java.util.Comparator;
>> +import java.util.Iterator;
>> +import java.util.List;
>> +
>> +import com.google.common.base.Charsets;
>> +import com.google.common.collect.Lists;
>> +import com.google.common.io.Closer;
>> +import com.google.common.io.Files;
>> +import org.apache.commons.io.FileUtils;
>> +import org.apache.commons.io.LineIterator;
>> +import org.slf4j.Logger;
>> +import org.slf4j.LoggerFactory;
>> +
>> +/**
>> + * Utility class to store a list of string and perform sort on that. For
>> small size
>> + * the list would be maintained in memory. If the size crosses the
>> required threshold then
>> + * the sorting would be performed externally
>> + */
>> +public class StringSort implements Closeable {
>> + private final Logger log = LoggerFactory.getLogger(getClass());
>> + public static final int BATCH_SIZE = 2048;
>> +
>> + private final int overflowToDiskThreshold;
>> + private final Comparator<String> comparator;
>> +
>> + private final List<String> ids = Lists.newArrayList();
>> + private long size;
>> +
>> + private final List<String> inMemBatch = Lists.newArrayList();
>> +
>> + private boolean useFile;
>> + private PersistentState persistentState;
>> +
>> + public StringSort(int overflowToDiskThreshold, Comparator<String>
>> comparator) {
>> + this.overflowToDiskThreshold = overflowToDiskThreshold;
>> + this.comparator = comparator;
>> + }
>> +
>> + public void add(String id) throws IOException {
>> + if (useFile) {
>> + addToBatch(id);
>> + } else {
>> + ids.add(id);
>> + if (ids.size() >= overflowToDiskThreshold) {
>> + flushToFile(ids);
>> + useFile = true;
>> + log.debug("In memory buffer crossed the threshold of {}.
>> " +
>> + "Switching to filesystem [{}] to manage the
>> state", overflowToDiskThreshold, persistentState);
>> + }
>> + }
>> + size++;
>> + }
>> +
>> + public void sort() throws IOException {
>> + if (useFile) {
>> + //Flush the last batch
>> + flushToFile(inMemBatch);
>> + persistentState.sort();
>> + } else {
>> + Collections.sort(ids, comparator);
>> + }
>> + }
>> +
>> + public Iterator<String> getIds() throws IOException {
>> + if (useFile) {
>> + return persistentState.getIterator();
>> + } else {
>> + return ids.iterator();
>> + }
>> + }
>> +
>> + public long getSize() {
>> + return size;
>> + }
>> +
>> + public boolean isEmpty() {
>> + return size == 0;
>> + }
>> +
>> + public boolean usingFile() {
>> + return useFile;
>> + }
>> +
>> + @Override
>> + public void close() throws IOException {
>> + if (persistentState != null) {
>> + persistentState.close();
>> + }
>> + }
>> +
>> + private void addToBatch(String id) throws IOException {
>> + inMemBatch.add(id);
>> + if (inMemBatch.size() >= BATCH_SIZE) {
>> + flushToFile(inMemBatch);
>> + }
>> + }
>> +
>> + private void flushToFile(List<String> ids) throws IOException {
>> + BufferedWriter w = getPersistentState().getWriter();
>> + for (String id : ids) {
>> + w.write(id);
>> + w.newLine();
>> + }
>> + ids.clear();
>> + }
>> +
>> + private PersistentState getPersistentState() {
>> + //Lazily initialize the persistent state
>> + if (persistentState == null) {
>> + persistentState = new PersistentState(comparator);
>> + }
>> + return persistentState;
>> + }
>> +
>> + private static class PersistentState implements Closeable {
>> + /**
>> + * Maximum loop count when creating temp directories.
>> + */
>> + private static final int TEMP_DIR_ATTEMPTS = 10000;
>> +
>> + private final Charset charset = Charsets.UTF_8;
>> + private final File workDir;
>> + private final Comparator<String> comparator;
>> + private File idFile;
>> + private File sortedFile;
>> + private BufferedWriter writer;
>> + private List<CloseableIterator> openedIterators =
>> Lists.newArrayList();
>> +
>> + public PersistentState(Comparator<String> comparator) {
>> + this(comparator, createTempDir("oak-sorter-"));
>> + }
>> +
>> + public PersistentState(Comparator<String> comparator, File
>> workDir) {
>> + this.workDir = workDir;
>> + this.comparator = comparator;
>> + }
>> +
>> + public BufferedWriter getWriter() throws FileNotFoundException {
>> + if (idFile == null) {
>> + idFile = new File(workDir, "strings.txt");
>> + sortedFile = new File(workDir, "strings-sorted.txt");
>> + writer = Files.newWriter(idFile, charset);
>> + }
>> + return writer;
>> + }
>> +
>> + public void sort() throws IOException {
>> + closeWriter();
>> +
>> + List<File> sortedFiles = ExternalSort.sortInBatch(idFile,
>> + comparator, //Comparator to use
>> + ExternalSort.DEFAULTMAXTEMPFILES,
>> + ExternalSort.DEFAULT_MAX_MEM_BYTES,
>> + charset, //charset
>> + workDir, //temp directory where intermediate files
>> are created
>> + true //distinct
>> + );
>> +
>> + ExternalSort.mergeSortedFiles(sortedFiles,
>> + sortedFile,
>> + comparator,
>> + charset,
>> + true
>> + );
>> + }
>> +
>> + public Iterator<String> getIterator() throws IOException {
>> + CloseableIterator itr = new
>> CloseableIterator(Files.newReader(sortedFile, charset));
>> + openedIterators.add(itr);
>> + return itr;
>> + }
>> +
>> + @Override
>> + public String toString() {
>> + return "PersistentState : workDir=" +
>> workDir.getAbsolutePath();
>> + }
>> +
>> + @Override
>> + public void close() throws IOException {
>> + Closer closer = Closer.create();
>> + try {
>> + closer.register(writer);
>> + for (CloseableIterator citr : openedIterators) {
>> + closer.register(citr);
>> + }
>> + closer.register(new Closeable() {
>> + @Override
>> + public void close() throws IOException {
>> + FileUtils.deleteDirectory(workDir);
>> + }
>> + });
>> + } finally {
>> + closer.close();
>> + }
>> + }
>> +
>> + private void closeWriter() throws IOException {
>> + writer.close();
>> + }
>> +
>> + /**
>> + * Taken from com.google.common.io.Files#createTempDir()
>> + * Modified to provide a prefix
>> + */
>> + private static File createTempDir(String prefix) {
>> + File baseDir = new
>> File(System.getProperty("java.io.tmpdir"));
>> + String baseName = System.currentTimeMillis() + "-";
>> +
>> + for (int counter = 0; counter < TEMP_DIR_ATTEMPTS; counter++)
>> {
>> + File tempDir = new File(baseDir, prefix + baseName +
>> counter);
>> + if (tempDir.mkdir()) {
>> + return tempDir;
>> + }
>> + }
>> + throw new IllegalStateException("Failed to create directory
>> within "
>> + + TEMP_DIR_ATTEMPTS + " attempts (tried "
>> + + baseName + "0 to " + baseName + (TEMP_DIR_ATTEMPTS
>> - 1) + ')');
>> + }
>> + }
>> +
>> + private static class CloseableIterator extends LineIterator
>> implements Closeable {
>> + public CloseableIterator(Reader reader) throws
>> IllegalArgumentException {
>> + super(reader);
>> + }
>> + }
>> +}
>>
>> Propchange:
>> jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java
>>
>> ------------------------------------------------------------------------------
>> svn:eol-style = native
>>
>> Added:
>> jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/StringSortTest.java
>> URL:
>> http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/StringSortTest.java?rev=1666220&view=auto
>>
>> ==============================================================================
>> ---
>> jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/StringSortTest.java
>> (added)
>> +++
>> jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/StringSortTest.java
>> Thu Mar 12 15:22:46 2015
>> @@ -0,0 +1,144 @@
>> +/*
>> + * Licensed to the Apache Software Foundation (ASF) under one
>> + * or more contributor license agreements. See the NOTICE file
>> + * distributed with this work for additional information
>> + * regarding copyright ownership. The ASF licenses this file
>> + * to you under the Apache License, Version 2.0 (the
>> + * "License"); you may not use this file except in compliance
>> + * with the License. You may obtain a copy of the License at
>> + *
>> + * http://www.apache.org/licenses/LICENSE-2.0
>> + *
>> + * Unless required by applicable law or agreed to in writing,
>> + * software distributed under the License is distributed on an
>> + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
>> + * KIND, either express or implied. See the License for the
>> + * specific language governing permissions and limitations
>> + * under the License.
>> + */
>> +
>> +package org.apache.jackrabbit.oak.commons.sort;
>> +
>> +import java.io.IOException;
>> +import java.io.Serializable;
>> +import java.util.ArrayList;
>> +import java.util.Arrays;
>> +import java.util.Collections;
>> +import java.util.Comparator;
>> +import java.util.HashSet;
>> +import java.util.List;
>> +import java.util.Set;
>> +
>> +import com.google.common.base.Joiner;
>> +import com.google.common.collect.Collections2;
>> +import com.google.common.collect.ImmutableList;
>> +import org.junit.Test;
>> +
>> +import static org.junit.Assert.assertEquals;
>> +import static org.junit.Assert.assertFalse;
>> +import static org.junit.Assert.assertTrue;
>> +
>> +public class StringSortTest {
>> + private Comparator<String> comparator = new PathComparator();
>> + private StringSort collector;
>> +
>> + @Test
>> + public void inMemory() throws Exception{
>> + List<String> paths = createTestPaths(5, false);
>> + collector = new StringSort(paths.size() + 1,comparator);
>> + addPathsToCollector(paths);
>> +
>> + assertConstraints(paths);
>> + assertFalse(collector.usingFile());
>> + collector.close();
>> + }
>> +
>> + @Test
>> + public void overflowToDisk() throws Exception{
>> + //Create ~50k paths
>> + List<String> paths = createTestPaths(10, true);
>> + collector = new StringSort(1000, comparator);
>> + addPathsToCollector(paths);
>> +
>> + assertTrue(collector.usingFile());
>> + assertConstraints(paths);
>> +
>> + collector.close();
>> + }
>> +
>> + private void assertConstraints(List<String> paths) throws IOException
>> {
>> + assertEquals(paths.size(), collector.getSize());
>> +
>> + Collections.sort(paths, comparator);
>> + collector.sort();
>> +
>> + List<String> sortedPaths =
>> ImmutableList.copyOf(collector.getIds());
>> + assertEquals(paths.size(), sortedPaths.size());
>> + assertEquals(paths, sortedPaths);
>> + }
>> +
>> + private void addPathsToCollector(Iterable<String> paths) throws
>> IOException {
>> + for (String path : paths){
>> + collector.add(path);
>> + }
>> + }
>> +
>> + private static List<String> createTestPaths(int depth, boolean
>> permutation){
>> + List<String> rootPaths = Arrays.asList("a", "b", "c", "d", "e",
>> "f", "g");
>> + List<String> paths = new ArrayList<String>();
>> +
>> +
>> + if (permutation){
>> + List<String> newRoots = new ArrayList<String>();
>> + for (List<String> permuts :
>> Collections2.orderedPermutations(rootPaths)){
>> + newRoots.add(Joiner.on("").join(permuts));
>> + }
>> + rootPaths = newRoots;
>> + }
>> +
>> + for (String root : rootPaths){
>> + List<String> pathElements = new ArrayList<String>();
>> + pathElements.add(root);
>> + paths.add(createId(pathElements));
>> + for (int i = 0; i < depth; i++){
>> + pathElements.add(root + i);
>> + paths.add(createId(pathElements));
>> + }
>> + }
>> +
>> + Set<String> idSet = new HashSet<String>(paths);
>> + assertEquals(paths.size(), idSet.size());
>> +
>> + Collections.shuffle(paths);
>> + return paths;
>> + }
>> +
>> + private static String createId(Iterable<String> pathElements){
>> + return "/" + Joiner.on('/').join(pathElements);
>> + }
>> +
>> + private static class PathComparator implements Comparator<String>,
>> Serializable {
>> + @Override
>> + public int compare(String o1, String o2) {
>> + int d1 = pathDepth(o1);
>> + int d2 = pathDepth(o2);
>> + if (d1 != d2) {
>> + return Integer.signum(d2 - d1);
>> + }
>> + return o1.compareTo(o2);
>> + }
>> +
>> + private static int pathDepth(String path) {
>> + if (path.equals("/")) {
>> + return 0;
>> + }
>> + int depth = 0;
>> + for (int i = 0; i < path.length(); i++) {
>> + if (path.charAt(i) == '/') {
>> + depth++;
>> + }
>> + }
>> + return depth;
>> + }
>> + }
>> +}
>>
>> Propchange:
>> jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/StringSortTest.java
>>
>> ------------------------------------------------------------------------------
>> svn:eol-style = native
>>
>> Modified:
>> jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java
>> URL:
>> http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java?rev=1666220&r1=1666219&r2=1666220&view=diff
>>
>> ==============================================================================
>> ---
>> jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java
>> (original)
>> +++
>> jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java
>> Thu Mar 12 15:22:46 2015
>> @@ -574,7 +574,11 @@ public class DocumentNodeStoreService {
>> RevisionGC revisionGC = new RevisionGC(new Runnable() {
>> @Override
>> public void run() {
>> -
>> store.getVersionGarbageCollector().gc(versionGcMaxAgeInSecs,
>> TimeUnit.SECONDS);
>> + try {
>> +
>> store.getVersionGarbageCollector().gc(versionGcMaxAgeInSecs,
>> TimeUnit.SECONDS);
>> + } catch (IOException e) {
>> + log.warn("Error occurred while executing the Version
>> Garbage Collector", e);
>> + }
>> }
>> }, executor);
>> registrations.add(registerMBean(whiteboard,
>> RevisionGCMBean.class, revisionGC,
>>
>> Modified:
>> jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
>> URL:
>> http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java?rev=1666220&r1=1666219&r2=1666220&view=diff
>>
>> ==============================================================================
>> ---
>> jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
>> (original)
>> +++
>> jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
>> Thu Mar 12 15:22:46 2015
>> @@ -19,9 +19,9 @@
>>
>> package org.apache.jackrabbit.oak.plugins.document;
>>
>> -import java.util.ArrayList;
>> -import java.util.Collections;
>> +import java.io.IOException;
>> import java.util.EnumSet;
>> +import java.util.Iterator;
>> import java.util.List;
>> import java.util.Set;
>> import java.util.concurrent.TimeUnit;
>> @@ -31,18 +31,23 @@ import com.google.common.base.StandardSy
>> import com.google.common.base.Stopwatch;
>> import com.google.common.collect.ImmutableList;
>>
>> +import org.apache.jackrabbit.oak.commons.sort.StringSort;
>> import org.apache.jackrabbit.oak.plugins.document.util.Utils;
>> import org.slf4j.Logger;
>> import org.slf4j.LoggerFactory;
>>
>> +import static com.google.common.collect.Iterators.partition;
>> import static
>> org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType.COMMIT_ROOT_ONLY;
>> import static
>> org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType.DEFAULT_LEAF;
>>
>> public class VersionGarbageCollector {
>> + //Kept less than MongoDocumentStore.IN_CLAUSE_BATCH_SIZE to avoid
>> re-partitioning
>> + private static final int DELETE_BATCH_SIZE = 450;
>> private final DocumentNodeStore nodeStore;
>> private final VersionGCSupport versionStore;
>> + private int overflowToDiskThreshold = 100000;
>>
>> - private final Logger log = LoggerFactory.getLogger(getClass());
>> + private static final Logger log =
>> LoggerFactory.getLogger(VersionGarbageCollector.class);
>>
>> /**
>> * Split document types which can be safely garbage collected
>> @@ -56,7 +61,7 @@ public class VersionGarbageCollector {
>> this.versionStore = gcSupport;
>> }
>>
>> - public VersionGCStats gc(long maxRevisionAge, TimeUnit unit) {
>> + public VersionGCStats gc(long maxRevisionAge, TimeUnit unit) throws
>> IOException {
>> long maxRevisionAgeInMillis = unit.toMillis(maxRevisionAge);
>> Stopwatch sw = Stopwatch.createStarted();
>> VersionGCStats stats = new VersionGCStats();
>> @@ -85,41 +90,60 @@ public class VersionGarbageCollector {
>> return stats;
>> }
>>
>> + public void setOverflowToDiskThreshold(int overflowToDiskThreshold) {
>> + this.overflowToDiskThreshold = overflowToDiskThreshold;
>> + }
>> +
>> private void collectSplitDocuments(VersionGCStats stats, long
>> oldestRevTimeStamp) {
>> versionStore.deleteSplitDocuments(GC_TYPES, oldestRevTimeStamp,
>> stats);
>> }
>>
>> - private void collectDeletedDocuments(VersionGCStats stats, Revision
>> headRevision, long oldestRevTimeStamp) {
>> - List<String> docIdsToDelete = new ArrayList<String>();
>> - Iterable<NodeDocument> itr =
>> versionStore.getPossiblyDeletedDocs(oldestRevTimeStamp);
>> + private void collectDeletedDocuments(VersionGCStats stats, Revision
>> headRevision, long oldestRevTimeStamp)
>> + throws IOException {
>> + StringSort docIdsToDelete = new
>> StringSort(overflowToDiskThreshold, NodeDocumentIdComparator.INSTANCE);
>> try {
>> - for (NodeDocument doc : itr) {
>> - //Check if node is actually deleted at current revision
>> - //As node is not modified since oldestRevTimeStamp then
>> - //this node has not be revived again in past
>> maxRevisionAge
>> - //So deleting it is safe
>> - if (doc.getNodeAtRevision(nodeStore, headRevision, null)
>> == null) {
>> - docIdsToDelete.add(doc.getId());
>> - //Collect id of all previous docs also
>> - for (NodeDocument prevDoc :
>> ImmutableList.copyOf(doc.getAllPreviousDocs())) {
>> - docIdsToDelete.add(prevDoc.getId());
>> + Iterable<NodeDocument> itr =
>> versionStore.getPossiblyDeletedDocs(oldestRevTimeStamp);
>> + try {
>> + for (NodeDocument doc : itr) {
>> + //Check if node is actually deleted at current
>> revision
>> + //As node is not modified since oldestRevTimeStamp
>> then
>> + //this node has not be revived again in past
>> maxRevisionAge
>> + //So deleting it is safe
>> + if (doc.getNodeAtRevision(nodeStore, headRevision,
>> null) == null) {
>> + docIdsToDelete.add(doc.getId());
>> + //Collect id of all previous docs also
>> + for (NodeDocument prevDoc :
>> ImmutableList.copyOf(doc.getAllPreviousDocs())) {
>> + docIdsToDelete.add(prevDoc.getId());
>> + }
>> }
>> }
>> + } finally {
>> + Utils.closeIfCloseable(itr);
>> + }
>> +
>> + if (docIdsToDelete.isEmpty()){
>> + return;
>> }
>> - } finally {
>> - Utils.closeIfCloseable(itr);
>> - }
>>
>> - Collections.sort(docIdsToDelete,
>> NodeDocumentIdComparator.INSTANCE);
>> + docIdsToDelete.sort();
>> + log.info("Proceeding to delete [{}] documents",
>> docIdsToDelete.getSize());
>>
>> - if(log.isDebugEnabled()) {
>> - StringBuilder sb = new StringBuilder("Deleted document with
>> following ids were deleted as part of GC \n");
>> -
>> Joiner.on(StandardSystemProperty.LINE_SEPARATOR.value()).appendTo(sb,
>> docIdsToDelete);
>> - log.debug(sb.toString());
>> + if (log.isDebugEnabled() && docIdsToDelete.getSize() < 1000)
>> {
>> + StringBuilder sb = new StringBuilder("Deleted document
>> with following ids were deleted as part of GC \n");
>> +
>> Joiner.on(StandardSystemProperty.LINE_SEPARATOR.value()).appendTo(sb,
>> docIdsToDelete.getIds());
>> + log.debug(sb.toString());
>> + }
>> +
>> + Iterator<List<String>> idListItr =
>> partition(docIdsToDelete.getIds(), DELETE_BATCH_SIZE);
>> + while (idListItr.hasNext()) {
>> + nodeStore.getDocumentStore().remove(Collection.NODES,
>> idListItr.next());
>> + }
>> +
>> + nodeStore.invalidateDocChildrenCache();
>> + stats.deletedDocGCCount += docIdsToDelete.getSize();
>> + } finally {
>> + docIdsToDelete.close();
>> }
>> - nodeStore.getDocumentStore().remove(Collection.NODES,
>> docIdsToDelete);
>> - nodeStore.invalidateDocChildrenCache();
>> - stats.deletedDocGCCount += docIdsToDelete.size();
>> }
>>
>> public static class VersionGCStats {
>>
>> Modified:
>> jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCDeletionTest.java
>> URL:
>> http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCDeletionTest.java?rev=1666220&r1=1666219&r2=1666220&view=diff
>>
>> ==============================================================================
>> ---
>> jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCDeletionTest.java
>> (original)
>> +++
>> jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCDeletionTest.java
>> Thu Mar 12 15:22:46 2015
>> @@ -36,6 +36,7 @@ import org.junit.Before;
>> import org.junit.Test;
>>
>> import static java.util.concurrent.TimeUnit.HOURS;
>> +import static org.junit.Assert.assertEquals;
>> import static org.junit.Assert.assertNull;
>> import static org.junit.Assert.fail;
>>
>> @@ -104,6 +105,53 @@ public class VersionGCDeletionTest {
>> assertNull(ts.find(Collection.NODES, "1:/x"));
>> }
>>
>> + @Test
>> + public void deleteLargeNumber() throws Exception{
>> + int noOfDocsToDelete = 10000;
>> + DocumentStore ts = new MemoryDocumentStore();
>> + store = new DocumentMK.Builder()
>> + .clock(clock)
>> + .setDocumentStore(new MemoryDocumentStore())
>> + .setAsyncDelay(0)
>> + .getNodeStore();
>> +
>> + //Baseline the clock
>> + clock.waitUntil(Revision.getCurrentTimestamp());
>> +
>> + NodeBuilder b1 = store.getRoot().builder();
>> + NodeBuilder xb = b1.child("x");
>> + for (int i = 0; i < noOfDocsToDelete; i++){
>> + xb.child("a"+i).child("b"+i);
>> + }
>> + store.merge(b1, EmptyHook.INSTANCE, CommitInfo.EMPTY);
>> +
>> + long maxAge = 1; //hours
>> + long delta = TimeUnit.MINUTES.toMillis(10);
>> +
>> + //Remove x/y
>> + NodeBuilder b2 = store.getRoot().builder();
>> + b2.child("x").remove();
>> + store.merge(b2, EmptyHook.INSTANCE, CommitInfo.EMPTY);
>> +
>> + store.runBackgroundOperations();
>> +
>> + //3. Check that deleted doc does get collected post maxAge
>> + clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) +
>> delta);
>> + VersionGarbageCollector gc = store.getVersionGarbageCollector();
>> + gc.setOverflowToDiskThreshold(100);
>> +
>> + VersionGarbageCollector.VersionGCStats stats = gc.gc(maxAge * 2,
>> HOURS);
>> + assertEquals(noOfDocsToDelete * 2 + 1, stats.deletedDocGCCount);
>> +
>> +
>> + assertNull(ts.find(Collection.NODES, "1:/x"));
>> +
>> + for (int i = 0; i < noOfDocsToDelete; i++){
>> + assertNull(ts.find(Collection.NODES, "2:/a"+i+"/b"+i));
>> + assertNull(ts.find(Collection.NODES, "1:/a"+i));
>> + }
>> + }
>> +
>> private static class TestDocumentStore extends MemoryDocumentStore {
>> boolean throwException;
>> @Override
>>
>> Modified:
>> jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCWithSplitTest.java
>> URL:
>> http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCWithSplitTest.java?rev=1666220&r1=1666219&r2=1666220&view=diff
>>
>> ==============================================================================
>> ---
>> jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCWithSplitTest.java
>> (original)
>> +++
>> jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCWithSplitTest.java
>> Thu Mar 12 15:22:46 2015
>> @@ -137,7 +137,11 @@ public class VersionGCWithSplitTest {
>> Thread t = new Thread(new Runnable() {
>> @Override
>> public void run() {
>> - stats.set(gc.gc(1, HOURS));
>> + try {
>> + stats.set(gc.gc(1, HOURS));
>> + } catch (IOException e) {
>> + throw new RuntimeException(e);
>> + }
>> }
>> });
>>
>>
>>
>>
>
Re: svn commit: r1666220 - in /jackrabbit/oak/trunk: oak-commons/
oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/ oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/
oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/d...
Posted by Julian Reschke <ju...@gmx.de>.
With this change, I get a reliable test failure on Windows:
Tests in error:
overflowToDisk(org.apache.jackrabbit.oak.commons.sort.StringSortTest):
Unable to delete file: C:\tmp\oak-sorter-1426178913437-0\strings-sorted.txt
Best regards, Julian
On 2015-03-12 16:22, chetanm@apache.org wrote:
> Author: chetanm
> Date: Thu Mar 12 15:22:46 2015
> New Revision: 1666220
>
> URL: http://svn.apache.org/r1666220
> Log:
> OAK-2557 - VersionGC uses way too much memory if there is a large pile of garbage
>
> Added:
> jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java (with props)
> jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/StringSortTest.java (with props)
> Modified:
> jackrabbit/oak/trunk/oak-commons/pom.xml
> jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java
> jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
> jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCDeletionTest.java
> jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCWithSplitTest.java
>
> Modified: jackrabbit/oak/trunk/oak-commons/pom.xml
> URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/pom.xml?rev=1666220&r1=1666219&r2=1666220&view=diff
> ==============================================================================
> --- jackrabbit/oak/trunk/oak-commons/pom.xml (original)
> +++ jackrabbit/oak/trunk/oak-commons/pom.xml Thu Mar 12 15:22:46 2015
> @@ -93,6 +93,11 @@
> <artifactId>oak-mk-api</artifactId>
> <version>${project.version}</version>
> </dependency>
> + <dependency>
> + <groupId>commons-io</groupId>
> + <artifactId>commons-io</artifactId>
> + <version>2.4</version>
> + </dependency>
>
> <!-- Test dependencies -->
> <dependency>
>
> Added: jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java
> URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java?rev=1666220&view=auto
> ==============================================================================
> --- jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java (added)
> +++ jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java Thu Mar 12 15:22:46 2015
> @@ -0,0 +1,255 @@
> +/*
> + * Licensed to the Apache Software Foundation (ASF) under one
> + * or more contributor license agreements. See the NOTICE file
> + * distributed with this work for additional information
> + * regarding copyright ownership. The ASF licenses this file
> + * to you under the Apache License, Version 2.0 (the
> + * "License"); you may not use this file except in compliance
> + * with the License. You may obtain a copy of the License at
> + *
> + * http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing,
> + * software distributed under the License is distributed on an
> + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
> + * KIND, either express or implied. See the License for the
> + * specific language governing permissions and limitations
> + * under the License.
> + */
> +
> +package org.apache.jackrabbit.oak.commons.sort;
> +
> +import java.io.BufferedWriter;
> +import java.io.Closeable;
> +import java.io.File;
> +import java.io.FileNotFoundException;
> +import java.io.IOException;
> +import java.io.Reader;
> +import java.nio.charset.Charset;
> +import java.util.Collections;
> +import java.util.Comparator;
> +import java.util.Iterator;
> +import java.util.List;
> +
> +import com.google.common.base.Charsets;
> +import com.google.common.collect.Lists;
> +import com.google.common.io.Closer;
> +import com.google.common.io.Files;
> +import org.apache.commons.io.FileUtils;
> +import org.apache.commons.io.LineIterator;
> +import org.slf4j.Logger;
> +import org.slf4j.LoggerFactory;
> +
> +/**
> + * Utility class to store a list of string and perform sort on that. For small size
> + * the list would be maintained in memory. If the size crosses the required threshold then
> + * the sorting would be performed externally
> + */
> +public class StringSort implements Closeable {
> + private final Logger log = LoggerFactory.getLogger(getClass());
> + public static final int BATCH_SIZE = 2048;
> +
> + private final int overflowToDiskThreshold;
> + private final Comparator<String> comparator;
> +
> + private final List<String> ids = Lists.newArrayList();
> + private long size;
> +
> + private final List<String> inMemBatch = Lists.newArrayList();
> +
> + private boolean useFile;
> + private PersistentState persistentState;
> +
> + public StringSort(int overflowToDiskThreshold, Comparator<String> comparator) {
> + this.overflowToDiskThreshold = overflowToDiskThreshold;
> + this.comparator = comparator;
> + }
> +
> + public void add(String id) throws IOException {
> + if (useFile) {
> + addToBatch(id);
> + } else {
> + ids.add(id);
> + if (ids.size() >= overflowToDiskThreshold) {
> + flushToFile(ids);
> + useFile = true;
> + log.debug("In memory buffer crossed the threshold of {}. " +
> + "Switching to filesystem [{}] to manage the state", overflowToDiskThreshold, persistentState);
> + }
> + }
> + size++;
> + }
> +
> + public void sort() throws IOException {
> + if (useFile) {
> + //Flush the last batch
> + flushToFile(inMemBatch);
> + persistentState.sort();
> + } else {
> + Collections.sort(ids, comparator);
> + }
> + }
> +
> + public Iterator<String> getIds() throws IOException {
> + if (useFile) {
> + return persistentState.getIterator();
> + } else {
> + return ids.iterator();
> + }
> + }
> +
> + public long getSize() {
> + return size;
> + }
> +
> + public boolean isEmpty() {
> + return size == 0;
> + }
> +
> + public boolean usingFile() {
> + return useFile;
> + }
> +
> + @Override
> + public void close() throws IOException {
> + if (persistentState != null) {
> + persistentState.close();
> + }
> + }
> +
> + private void addToBatch(String id) throws IOException {
> + inMemBatch.add(id);
> + if (inMemBatch.size() >= BATCH_SIZE) {
> + flushToFile(inMemBatch);
> + }
> + }
> +
> + private void flushToFile(List<String> ids) throws IOException {
> + BufferedWriter w = getPersistentState().getWriter();
> + for (String id : ids) {
> + w.write(id);
> + w.newLine();
> + }
> + ids.clear();
> + }
> +
> + private PersistentState getPersistentState() {
> + //Lazily initialize the persistent state
> + if (persistentState == null) {
> + persistentState = new PersistentState(comparator);
> + }
> + return persistentState;
> + }
> +
> + private static class PersistentState implements Closeable {
> + /**
> + * Maximum loop count when creating temp directories.
> + */
> + private static final int TEMP_DIR_ATTEMPTS = 10000;
> +
> + private final Charset charset = Charsets.UTF_8;
> + private final File workDir;
> + private final Comparator<String> comparator;
> + private File idFile;
> + private File sortedFile;
> + private BufferedWriter writer;
> + private List<CloseableIterator> openedIterators = Lists.newArrayList();
> +
> + public PersistentState(Comparator<String> comparator) {
> + this(comparator, createTempDir("oak-sorter-"));
> + }
> +
> + public PersistentState(Comparator<String> comparator, File workDir) {
> + this.workDir = workDir;
> + this.comparator = comparator;
> + }
> +
> + public BufferedWriter getWriter() throws FileNotFoundException {
> + if (idFile == null) {
> + idFile = new File(workDir, "strings.txt");
> + sortedFile = new File(workDir, "strings-sorted.txt");
> + writer = Files.newWriter(idFile, charset);
> + }
> + return writer;
> + }
> +
> + public void sort() throws IOException {
> + closeWriter();
> +
> + List<File> sortedFiles = ExternalSort.sortInBatch(idFile,
> + comparator, //Comparator to use
> + ExternalSort.DEFAULTMAXTEMPFILES,
> + ExternalSort.DEFAULT_MAX_MEM_BYTES,
> + charset, //charset
> + workDir, //temp directory where intermediate files are created
> + true //distinct
> + );
> +
> + ExternalSort.mergeSortedFiles(sortedFiles,
> + sortedFile,
> + comparator,
> + charset,
> + true
> + );
> + }
> +
> + public Iterator<String> getIterator() throws IOException {
> + CloseableIterator itr = new CloseableIterator(Files.newReader(sortedFile, charset));
> + openedIterators.add(itr);
> + return itr;
> + }
> +
> + @Override
> + public String toString() {
> + return "PersistentState : workDir=" + workDir.getAbsolutePath();
> + }
> +
> + @Override
> + public void close() throws IOException {
> + Closer closer = Closer.create();
> + try {
> + closer.register(writer);
> + for (CloseableIterator citr : openedIterators) {
> + closer.register(citr);
> + }
> + closer.register(new Closeable() {
> + @Override
> + public void close() throws IOException {
> + FileUtils.deleteDirectory(workDir);
> + }
> + });
> + } finally {
> + closer.close();
> + }
> + }
> +
> + private void closeWriter() throws IOException {
> + writer.close();
> + }
> +
> + /**
> + * Taken from com.google.common.io.Files#createTempDir()
> + * Modified to provide a prefix
> + */
> + private static File createTempDir(String prefix) {
> + File baseDir = new File(System.getProperty("java.io.tmpdir"));
> + String baseName = System.currentTimeMillis() + "-";
> +
> + for (int counter = 0; counter < TEMP_DIR_ATTEMPTS; counter++) {
> + File tempDir = new File(baseDir, prefix + baseName + counter);
> + if (tempDir.mkdir()) {
> + return tempDir;
> + }
> + }
> + throw new IllegalStateException("Failed to create directory within "
> + + TEMP_DIR_ATTEMPTS + " attempts (tried "
> + + baseName + "0 to " + baseName + (TEMP_DIR_ATTEMPTS - 1) + ')');
> + }
> + }
> +
> + private static class CloseableIterator extends LineIterator implements Closeable {
> + public CloseableIterator(Reader reader) throws IllegalArgumentException {
> + super(reader);
> + }
> + }
> +}
>
> Propchange: jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java
> ------------------------------------------------------------------------------
> svn:eol-style = native
>
> Added: jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/StringSortTest.java
> URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/StringSortTest.java?rev=1666220&view=auto
> ==============================================================================
> --- jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/StringSortTest.java (added)
> +++ jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/StringSortTest.java Thu Mar 12 15:22:46 2015
> @@ -0,0 +1,144 @@
> +/*
> + * Licensed to the Apache Software Foundation (ASF) under one
> + * or more contributor license agreements. See the NOTICE file
> + * distributed with this work for additional information
> + * regarding copyright ownership. The ASF licenses this file
> + * to you under the Apache License, Version 2.0 (the
> + * "License"); you may not use this file except in compliance
> + * with the License. You may obtain a copy of the License at
> + *
> + * http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing,
> + * software distributed under the License is distributed on an
> + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
> + * KIND, either express or implied. See the License for the
> + * specific language governing permissions and limitations
> + * under the License.
> + */
> +
> +package org.apache.jackrabbit.oak.commons.sort;
> +
> +import java.io.IOException;
> +import java.io.Serializable;
> +import java.util.ArrayList;
> +import java.util.Arrays;
> +import java.util.Collections;
> +import java.util.Comparator;
> +import java.util.HashSet;
> +import java.util.List;
> +import java.util.Set;
> +
> +import com.google.common.base.Joiner;
> +import com.google.common.collect.Collections2;
> +import com.google.common.collect.ImmutableList;
> +import org.junit.Test;
> +
> +import static org.junit.Assert.assertEquals;
> +import static org.junit.Assert.assertFalse;
> +import static org.junit.Assert.assertTrue;
> +
> +public class StringSortTest {
> + private Comparator<String> comparator = new PathComparator();
> + private StringSort collector;
> +
> + @Test
> + public void inMemory() throws Exception{
> + List<String> paths = createTestPaths(5, false);
> + collector = new StringSort(paths.size() + 1,comparator);
> + addPathsToCollector(paths);
> +
> + assertConstraints(paths);
> + assertFalse(collector.usingFile());
> + collector.close();
> + }
> +
> + @Test
> + public void overflowToDisk() throws Exception{
> + //Create ~50k paths
> + List<String> paths = createTestPaths(10, true);
> + collector = new StringSort(1000, comparator);
> + addPathsToCollector(paths);
> +
> + assertTrue(collector.usingFile());
> + assertConstraints(paths);
> +
> + collector.close();
> + }
> +
> + private void assertConstraints(List<String> paths) throws IOException {
> + assertEquals(paths.size(), collector.getSize());
> +
> + Collections.sort(paths, comparator);
> + collector.sort();
> +
> + List<String> sortedPaths = ImmutableList.copyOf(collector.getIds());
> + assertEquals(paths.size(), sortedPaths.size());
> + assertEquals(paths, sortedPaths);
> + }
> +
> + private void addPathsToCollector(Iterable<String> paths) throws IOException {
> + for (String path : paths){
> + collector.add(path);
> + }
> + }
> +
> + private static List<String> createTestPaths(int depth, boolean permutation){
> + List<String> rootPaths = Arrays.asList("a", "b", "c", "d", "e", "f", "g");
> + List<String> paths = new ArrayList<String>();
> +
> +
> + if (permutation){
> + List<String> newRoots = new ArrayList<String>();
> + for (List<String> permuts : Collections2.orderedPermutations(rootPaths)){
> + newRoots.add(Joiner.on("").join(permuts));
> + }
> + rootPaths = newRoots;
> + }
> +
> + for (String root : rootPaths){
> + List<String> pathElements = new ArrayList<String>();
> + pathElements.add(root);
> + paths.add(createId(pathElements));
> + for (int i = 0; i < depth; i++){
> + pathElements.add(root + i);
> + paths.add(createId(pathElements));
> + }
> + }
> +
> + Set<String> idSet = new HashSet<String>(paths);
> + assertEquals(paths.size(), idSet.size());
> +
> + Collections.shuffle(paths);
> + return paths;
> + }
> +
> + private static String createId(Iterable<String> pathElements){
> + return "/" + Joiner.on('/').join(pathElements);
> + }
> +
> + private static class PathComparator implements Comparator<String>, Serializable {
> + @Override
> + public int compare(String o1, String o2) {
> + int d1 = pathDepth(o1);
> + int d2 = pathDepth(o2);
> + if (d1 != d2) {
> + return Integer.signum(d2 - d1);
> + }
> + return o1.compareTo(o2);
> + }
> +
> + private static int pathDepth(String path) {
> + if (path.equals("/")) {
> + return 0;
> + }
> + int depth = 0;
> + for (int i = 0; i < path.length(); i++) {
> + if (path.charAt(i) == '/') {
> + depth++;
> + }
> + }
> + return depth;
> + }
> + }
> +}
>
> Propchange: jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/StringSortTest.java
> ------------------------------------------------------------------------------
> svn:eol-style = native
>
> Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java
> URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java?rev=1666220&r1=1666219&r2=1666220&view=diff
> ==============================================================================
> --- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java (original)
> +++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreService.java Thu Mar 12 15:22:46 2015
> @@ -574,7 +574,11 @@ public class DocumentNodeStoreService {
> RevisionGC revisionGC = new RevisionGC(new Runnable() {
> @Override
> public void run() {
> - store.getVersionGarbageCollector().gc(versionGcMaxAgeInSecs, TimeUnit.SECONDS);
> + try {
> + store.getVersionGarbageCollector().gc(versionGcMaxAgeInSecs, TimeUnit.SECONDS);
> + } catch (IOException e) {
> + log.warn("Error occurred while executing the Version Garbage Collector", e);
> + }
> }
> }, executor);
> registrations.add(registerMBean(whiteboard, RevisionGCMBean.class, revisionGC,
>
> Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java
> URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java?rev=1666220&r1=1666219&r2=1666220&view=diff
> ==============================================================================
> --- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java (original)
> +++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/document/VersionGarbageCollector.java Thu Mar 12 15:22:46 2015
> @@ -19,9 +19,9 @@
>
> package org.apache.jackrabbit.oak.plugins.document;
>
> -import java.util.ArrayList;
> -import java.util.Collections;
> +import java.io.IOException;
> import java.util.EnumSet;
> +import java.util.Iterator;
> import java.util.List;
> import java.util.Set;
> import java.util.concurrent.TimeUnit;
> @@ -31,18 +31,23 @@ import com.google.common.base.StandardSy
> import com.google.common.base.Stopwatch;
> import com.google.common.collect.ImmutableList;
>
> +import org.apache.jackrabbit.oak.commons.sort.StringSort;
> import org.apache.jackrabbit.oak.plugins.document.util.Utils;
> import org.slf4j.Logger;
> import org.slf4j.LoggerFactory;
>
> +import static com.google.common.collect.Iterators.partition;
> import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType.COMMIT_ROOT_ONLY;
> import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType.DEFAULT_LEAF;
>
> public class VersionGarbageCollector {
> + //Kept less than MongoDocumentStore.IN_CLAUSE_BATCH_SIZE to avoid re-partitioning
> + private static final int DELETE_BATCH_SIZE = 450;
> private final DocumentNodeStore nodeStore;
> private final VersionGCSupport versionStore;
> + private int overflowToDiskThreshold = 100000;
>
> - private final Logger log = LoggerFactory.getLogger(getClass());
> + private static final Logger log = LoggerFactory.getLogger(VersionGarbageCollector.class);
>
> /**
> * Split document types which can be safely garbage collected
> @@ -56,7 +61,7 @@ public class VersionGarbageCollector {
> this.versionStore = gcSupport;
> }
>
> - public VersionGCStats gc(long maxRevisionAge, TimeUnit unit) {
> + public VersionGCStats gc(long maxRevisionAge, TimeUnit unit) throws IOException {
> long maxRevisionAgeInMillis = unit.toMillis(maxRevisionAge);
> Stopwatch sw = Stopwatch.createStarted();
> VersionGCStats stats = new VersionGCStats();
> @@ -85,41 +90,60 @@ public class VersionGarbageCollector {
> return stats;
> }
>
> + public void setOverflowToDiskThreshold(int overflowToDiskThreshold) {
> + this.overflowToDiskThreshold = overflowToDiskThreshold;
> + }
> +
> private void collectSplitDocuments(VersionGCStats stats, long oldestRevTimeStamp) {
> versionStore.deleteSplitDocuments(GC_TYPES, oldestRevTimeStamp, stats);
> }
>
> - private void collectDeletedDocuments(VersionGCStats stats, Revision headRevision, long oldestRevTimeStamp) {
> - List<String> docIdsToDelete = new ArrayList<String>();
> - Iterable<NodeDocument> itr = versionStore.getPossiblyDeletedDocs(oldestRevTimeStamp);
> + private void collectDeletedDocuments(VersionGCStats stats, Revision headRevision, long oldestRevTimeStamp)
> + throws IOException {
> + StringSort docIdsToDelete = new StringSort(overflowToDiskThreshold, NodeDocumentIdComparator.INSTANCE);
> try {
> - for (NodeDocument doc : itr) {
> - //Check if node is actually deleted at current revision
> - //As node is not modified since oldestRevTimeStamp then
> - //this node has not be revived again in past maxRevisionAge
> - //So deleting it is safe
> - if (doc.getNodeAtRevision(nodeStore, headRevision, null) == null) {
> - docIdsToDelete.add(doc.getId());
> - //Collect id of all previous docs also
> - for (NodeDocument prevDoc : ImmutableList.copyOf(doc.getAllPreviousDocs())) {
> - docIdsToDelete.add(prevDoc.getId());
> + Iterable<NodeDocument> itr = versionStore.getPossiblyDeletedDocs(oldestRevTimeStamp);
> + try {
> + for (NodeDocument doc : itr) {
> + //Check if node is actually deleted at current revision
> + //As node is not modified since oldestRevTimeStamp then
> + //this node has not be revived again in past maxRevisionAge
> + //So deleting it is safe
> + if (doc.getNodeAtRevision(nodeStore, headRevision, null) == null) {
> + docIdsToDelete.add(doc.getId());
> + //Collect id of all previous docs also
> + for (NodeDocument prevDoc : ImmutableList.copyOf(doc.getAllPreviousDocs())) {
> + docIdsToDelete.add(prevDoc.getId());
> + }
> }
> }
> + } finally {
> + Utils.closeIfCloseable(itr);
> + }
> +
> + if (docIdsToDelete.isEmpty()){
> + return;
> }
> - } finally {
> - Utils.closeIfCloseable(itr);
> - }
>
> - Collections.sort(docIdsToDelete, NodeDocumentIdComparator.INSTANCE);
> + docIdsToDelete.sort();
> + log.info("Proceeding to delete [{}] documents", docIdsToDelete.getSize());
>
> - if(log.isDebugEnabled()) {
> - StringBuilder sb = new StringBuilder("Deleted document with following ids were deleted as part of GC \n");
> - Joiner.on(StandardSystemProperty.LINE_SEPARATOR.value()).appendTo(sb, docIdsToDelete);
> - log.debug(sb.toString());
> + if (log.isDebugEnabled() && docIdsToDelete.getSize() < 1000) {
> + StringBuilder sb = new StringBuilder("Deleted document with following ids were deleted as part of GC \n");
> + Joiner.on(StandardSystemProperty.LINE_SEPARATOR.value()).appendTo(sb, docIdsToDelete.getIds());
> + log.debug(sb.toString());
> + }
> +
> + Iterator<List<String>> idListItr = partition(docIdsToDelete.getIds(), DELETE_BATCH_SIZE);
> + while (idListItr.hasNext()) {
> + nodeStore.getDocumentStore().remove(Collection.NODES, idListItr.next());
> + }
> +
> + nodeStore.invalidateDocChildrenCache();
> + stats.deletedDocGCCount += docIdsToDelete.getSize();
> + } finally {
> + docIdsToDelete.close();
> }
> - nodeStore.getDocumentStore().remove(Collection.NODES, docIdsToDelete);
> - nodeStore.invalidateDocChildrenCache();
> - stats.deletedDocGCCount += docIdsToDelete.size();
> }
>
> public static class VersionGCStats {
>
> Modified: jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCDeletionTest.java
> URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCDeletionTest.java?rev=1666220&r1=1666219&r2=1666220&view=diff
> ==============================================================================
> --- jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCDeletionTest.java (original)
> +++ jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCDeletionTest.java Thu Mar 12 15:22:46 2015
> @@ -36,6 +36,7 @@ import org.junit.Before;
> import org.junit.Test;
>
> import static java.util.concurrent.TimeUnit.HOURS;
> +import static org.junit.Assert.assertEquals;
> import static org.junit.Assert.assertNull;
> import static org.junit.Assert.fail;
>
> @@ -104,6 +105,53 @@ public class VersionGCDeletionTest {
> assertNull(ts.find(Collection.NODES, "1:/x"));
> }
>
> + @Test
> + public void deleteLargeNumber() throws Exception{
> + int noOfDocsToDelete = 10000;
> + DocumentStore ts = new MemoryDocumentStore();
> + store = new DocumentMK.Builder()
> + .clock(clock)
> + .setDocumentStore(new MemoryDocumentStore())
> + .setAsyncDelay(0)
> + .getNodeStore();
> +
> + //Baseline the clock
> + clock.waitUntil(Revision.getCurrentTimestamp());
> +
> + NodeBuilder b1 = store.getRoot().builder();
> + NodeBuilder xb = b1.child("x");
> + for (int i = 0; i < noOfDocsToDelete; i++){
> + xb.child("a"+i).child("b"+i);
> + }
> + store.merge(b1, EmptyHook.INSTANCE, CommitInfo.EMPTY);
> +
> + long maxAge = 1; //hours
> + long delta = TimeUnit.MINUTES.toMillis(10);
> +
> + //Remove x/y
> + NodeBuilder b2 = store.getRoot().builder();
> + b2.child("x").remove();
> + store.merge(b2, EmptyHook.INSTANCE, CommitInfo.EMPTY);
> +
> + store.runBackgroundOperations();
> +
> + //3. Check that deleted doc does get collected post maxAge
> + clock.waitUntil(clock.getTime() + HOURS.toMillis(maxAge*2) + delta);
> + VersionGarbageCollector gc = store.getVersionGarbageCollector();
> + gc.setOverflowToDiskThreshold(100);
> +
> + VersionGarbageCollector.VersionGCStats stats = gc.gc(maxAge * 2, HOURS);
> + assertEquals(noOfDocsToDelete * 2 + 1, stats.deletedDocGCCount);
> +
> +
> + assertNull(ts.find(Collection.NODES, "1:/x"));
> +
> + for (int i = 0; i < noOfDocsToDelete; i++){
> + assertNull(ts.find(Collection.NODES, "2:/a"+i+"/b"+i));
> + assertNull(ts.find(Collection.NODES, "1:/a"+i));
> + }
> + }
> +
> private static class TestDocumentStore extends MemoryDocumentStore {
> boolean throwException;
> @Override
>
> Modified: jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCWithSplitTest.java
> URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCWithSplitTest.java?rev=1666220&r1=1666219&r2=1666220&view=diff
> ==============================================================================
> --- jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCWithSplitTest.java (original)
> +++ jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCWithSplitTest.java Thu Mar 12 15:22:46 2015
> @@ -137,7 +137,11 @@ public class VersionGCWithSplitTest {
> Thread t = new Thread(new Runnable() {
> @Override
> public void run() {
> - stats.set(gc.gc(1, HOURS));
> + try {
> + stats.set(gc.gc(1, HOURS));
> + } catch (IOException e) {
> + throw new RuntimeException(e);
> + }
> }
> });
>
>
>
>