You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by am...@apache.org on 2015/09/11 13:08:05 UTC
svn commit: r1702426 - in /jackrabbit/oak/trunk:
oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/
oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/
oak-commons/src/test/resources/org/apache/jackrabbit/oak/commons/sort/ ...
Author: amitj
Date: Fri Sep 11 11:08:05 2015
New Revision: 1702426
URL: http://svn.apache.org/r1702426
Log:
OAK-3184: Consistency checker for data/blob store
Fixed failures for inlined blobs and duplicate entries not correctly being removed
Added test cases
Added:
jackrabbit/oak/trunk/oak-commons/src/test/resources/org/apache/jackrabbit/oak/commons/sort/test-file-2.csv
Modified:
jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/ExternalSort.java
jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/ExternalSortTest.java
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/BlobGC.java
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/BlobGCMBean.java
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/datastore/InMemoryDataRecord.java
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/blob/FileLineDifferenceIteratorTest.java
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/MongoBlobGCTest.java
Modified: jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/ExternalSort.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/ExternalSort.java?rev=1702426&r1=1702425&r2=1702426&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/ExternalSort.java (original)
+++ jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/ExternalSort.java Fri Sep 11 11:08:05 2015
@@ -295,7 +295,7 @@ public class ExternalSort {
try {
for (String r : tmplist) {
// Skip duplicate lines
- if (!distinct || !r.equals(lastLine)) {
+ if (!distinct || (lastLine == null || (lastLine != null && cmp.compare(r, lastLine) != 0))) {
fbw.write(r);
fbw.newLine();
lastLine = r;
@@ -453,7 +453,7 @@ public class ExternalSort {
BinaryFileBuffer bfb = pq.poll();
String r = bfb.pop();
// Skip duplicate lines
- if (!distinct || !r.equals(lastLine)) {
+ if (!distinct || (lastLine == null || (lastLine != null && cmp.compare(r, lastLine) != 0))) {
fbw.write(r);
fbw.newLine();
lastLine = r;
Modified: jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/ExternalSortTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/ExternalSortTest.java?rev=1702426&r1=1702425&r2=1702426&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/ExternalSortTest.java (original)
+++ jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/ExternalSortTest.java Fri Sep 11 11:08:05 2015
@@ -78,6 +78,8 @@ public class ExternalSortTest {
private static final String TEST_FILE1_TXT = "test-file-1.txt";
private static final String TEST_FILE2_TXT = "test-file-2.txt";
private static final String TEST_FILE1_CSV = "test-file-1.csv";
+ private static final String TEST_FILE2_CSV = "test-file-2.csv";
+
private static final String[] EXPECTED_SORT_RESULTS = { "a", "b", "b", "e",
"f", "i", "m", "o", "u", "u", "x", "y", "z" };
private static final String[] EXPECTED_MERGE_RESULTS = { "a", "a", "b",
@@ -90,10 +92,14 @@ public class ExternalSortTest {
"f", "i", "m", "o", "u", "x", "y", "z" };
private static final String[] SAMPLE = { "f", "m", "b", "e", "i", "o", "u",
"x", "a", "y", "z", "b", "u" };
-
+ private static final String[] EXPECTED_CSV_DISTINCT_RESULTS = { "a,1", "b,2a", "e,3", "f,4", "i,5", "m,6", "o,7",
+ "u,8a", "x,9", "y,10", "z,11"};
+ private static final String[] EXPECTED_CSV_RESULTS = { "a,1", "b,2a", "b,2b", "e,3", "f,4", "i,5", "m,6", "o,7",
+ "u,8a", "u,8b", "x,9", "y,10", "z,11"};
private File file1;
private File file2;
private File csvFile;
+ private File csvFile2;
private List<File> fileList;
/**
@@ -340,6 +346,51 @@ public class ExternalSortTest {
testCSVSortingWithParams(true);
}
+ @Test
+ public void testCSVKeyValueSorting() throws Exception {
+ testCSVSortKeyValue(false);
+ testCSVSortKeyValue(true);
+ }
+
+ /**
+ * Sample case to sort csv file with key, value pair.
+ *
+ * @param distinct if distinct records need to be omitted
+ * @throws Exception
+ *
+ */
+ public void testCSVSortKeyValue(boolean distinct) throws Exception {
+
+ File out = File.createTempFile("test_results", ".tmp", null);
+
+ Comparator<String> cmp = new Comparator<String>() {
+ @Override
+ public int compare(String s1, String s2) {
+ return s1.split(",")[0].compareTo(s2.split(",")[0]);
+ }
+ };
+
+ List<File> listOfFiles = ExternalSort.sortInBatch(this.csvFile2, cmp,
+ ExternalSort.DEFAULTMAXTEMPFILES,
+ ExternalSort.DEFAULT_MAX_MEM_BYTES,
+ Charset.defaultCharset(),
+ null, distinct, 0, false);
+
+ // now merge with append
+ ExternalSort.mergeSortedFiles(listOfFiles, out, cmp,
+ Charset.defaultCharset(), distinct, true, false);
+ ArrayList<String> result = readLines(out);
+
+ if (distinct) {
+ assertEquals(11, result.size());
+ assertArrayEquals(Arrays.toString(result.toArray()), EXPECTED_CSV_DISTINCT_RESULTS, result.toArray());
+ } else {
+ assertEquals(13, result.size());
+ assertArrayEquals(Arrays.toString(result.toArray()), EXPECTED_CSV_RESULTS, result.toArray());
+ }
+
+ }
+
/**
* Sample case to sort csv file.
*
@@ -404,4 +455,4 @@ public class ExternalSortTest {
}
}
-}
\ No newline at end of file
+}
Added: jackrabbit/oak/trunk/oak-commons/src/test/resources/org/apache/jackrabbit/oak/commons/sort/test-file-2.csv
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/src/test/resources/org/apache/jackrabbit/oak/commons/sort/test-file-2.csv?rev=1702426&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-commons/src/test/resources/org/apache/jackrabbit/oak/commons/sort/test-file-2.csv (added)
+++ jackrabbit/oak/trunk/oak-commons/src/test/resources/org/apache/jackrabbit/oak/commons/sort/test-file-2.csv Fri Sep 11 11:08:05 2015
@@ -0,0 +1,13 @@
+f,4
+m,6
+b,2a
+e,3
+i,5
+o,7
+u,8a
+x,9
+a,1
+y,10
+z,11
+b,2b
+u,8b
Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/BlobGC.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/BlobGC.java?rev=1702426&r1=1702425&r2=1702426&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/BlobGC.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/BlobGC.java Fri Sep 11 11:08:05 2015
@@ -126,12 +126,19 @@ public class BlobGC extends AnnotatedSt
public String call() throws Exception {
long t0 = nanoTime();
long missing = blobGarbageCollector.checkConsistency();
- return missing + "missing blobs found (details in the log). Consistency check completed in "
- + formatTime(nanoTime() - t0);
+ return "Consistency check completed in "
+ + formatTime(nanoTime() - t0) + " " +
+ + missing + "missing blobs found (details in the log).";
}
});
executor.execute(consistencyOp);
}
+ return getConsistencyCheckStatus();
+ }
+
+ @Nonnull
+ @Override
+ public CompositeData getConsistencyCheckStatus() {
return consistencyOp.getStatus().toCompositeData();
}
Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/BlobGCMBean.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/BlobGCMBean.java?rev=1702426&r1=1702425&r2=1702426&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/BlobGCMBean.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/BlobGCMBean.java Fri Sep 11 11:08:05 2015
@@ -68,4 +68,13 @@ public interface BlobGCMBean {
* @return the missing blobs
*/
CompositeData checkConsistency();
+
+ /**
+ * Consistency check status
+ *
+ * @return the status of the ongoing operation or if none the terminal
+ * status of the last operation or <em>Status not available</em> if none.
+ */
+ @Nonnull
+ CompositeData getConsistencyCheckStatus();
}
Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java?rev=1702426&r1=1702425&r2=1702426&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java Fri Sep 11 11:08:05 2015
@@ -58,6 +58,7 @@ import org.apache.commons.io.LineIterato
import org.apache.jackrabbit.core.data.DataRecord;
import org.apache.jackrabbit.core.data.DataStoreException;
import org.apache.jackrabbit.oak.commons.IOUtils;
+import org.apache.jackrabbit.oak.plugins.blob.datastore.InMemoryDataRecord;
import org.apache.jackrabbit.oak.plugins.blob.datastore.SharedDataStoreUtils;
import org.apache.jackrabbit.oak.plugins.blob.datastore.SharedDataStoreUtils.SharedStoreRecordType;
import org.apache.jackrabbit.oak.spi.blob.GarbageCollectableBlobStore;
@@ -568,12 +569,13 @@ public class MarkSweepGarbageCollector i
candidates = calculateDifference(fs, iter);
LOG.trace("Ending difference phase of the consistency check");
+ LOG.info("Consistency check found [{}] missing blobs", candidates);
if (candidates > 0) {
LOG.warn("Consistency check failure in the the blob store : {}, check missing candidates in file {}",
blobStore, fs.getGcCandidates().getAbsolutePath());
}
} finally {
- if (!LOG.isTraceEnabled() || candidates == 0) {
+ if (!LOG.isTraceEnabled() && candidates == 0) {
Closeables.close(fs, threw);
}
}
@@ -617,7 +619,7 @@ public class MarkSweepGarbageCollector i
// sort the file
GarbageCollectorFileState.sort(fs.getAvailableRefs());
- LOG.debug("Number of blobs present in BlobStore : [{}] ", blobsCount);
+ LOG.info("Number of blobs present in BlobStore : [{}] ", blobsCount);
} finally {
IOUtils.closeQuietly(bufferWriter);
}
@@ -692,7 +694,12 @@ public class MarkSweepGarbageCollector i
} else {
//This entry is not found in marked entries
//hence part of diff
- return diff;
+ if (!InMemoryDataRecord.isInstance(getKey(diff))) {
+ return diff;
+ } else {
+ diff = null;
+ break;
+ }
}
}
}
Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/datastore/InMemoryDataRecord.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/datastore/InMemoryDataRecord.java?rev=1702426&r1=1702425&r2=1702426&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/datastore/InMemoryDataRecord.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/datastore/InMemoryDataRecord.java Fri Sep 11 11:08:05 2015
@@ -31,7 +31,7 @@ import org.slf4j.LoggerFactory;
/**
* Represents binary data which is backed by a byte[] (in memory).
*/
-class InMemoryDataRecord implements DataRecord {
+public class InMemoryDataRecord implements DataRecord {
/**
* Logger instance for this class
@@ -89,7 +89,7 @@ class InMemoryDataRecord implements Data
* @param id DataRecord identifier
* @return true if it can be converted
*/
- static boolean isInstance(String id) {
+ public static boolean isInstance(String id) {
return id.startsWith(PREFIX);
}
Modified: jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/blob/FileLineDifferenceIteratorTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/blob/FileLineDifferenceIteratorTest.java?rev=1702426&r1=1702425&r2=1702426&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/blob/FileLineDifferenceIteratorTest.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/blob/FileLineDifferenceIteratorTest.java Fri Sep 11 11:08:05 2015
@@ -89,7 +89,21 @@ public class FileLineDifferenceIteratorT
assertDiff("a,b,d,e,f", "a,b,c,f, h", asList("c", "h"));
assertDiff("3,7", "2,3,5,9", asList("2", "5", "9"));
}
-
+
+ @Test
+ public void testMarkedDiffWithExtraEntriesInMarked() throws IOException {
+ assertReverseDiff("a,b,c,e,h", "a,b,c", asList("e", "h"));
+ assertReverseDiff("a,b,d,e", "a,b,c", asList("d", "e"));
+ assertReverseDiff("a,b,d", "a,b,d", Collections.<String>emptyList());
+ assertReverseDiff("a,0xb,d,e,f", "a,d", asList("e", "f"));
+ assertReverseDiff("a,0xb,d,e,f", "a,d,e,f,g", Collections.<String>emptyList());
+ }
+
+ private static void assertReverseDiff(String marked, String all, List<String> diff) throws IOException {
+ Iterator<String> itr = createItr(all, marked);
+ assertThat("marked: " + marked + " all: " + all, ImmutableList.copyOf(itr), is(diff));
+ }
+
private static void assertDiff(String marked, String all, List<String> diff) throws IOException {
Iterator<String> itr = createItr(marked, all);
assertThat("marked: " + marked + " all: " + all, ImmutableList.copyOf(itr), is(diff));
Modified: jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/MongoBlobGCTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/MongoBlobGCTest.java?rev=1702426&r1=1702425&r2=1702426&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/MongoBlobGCTest.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/MongoBlobGCTest.java Fri Sep 11 11:08:05 2015
@@ -86,6 +86,10 @@ public class MongoBlobGCTest extends Abs
}
}
a.child("c" + i).setProperty("x", b);
+ // Add a duplicated entry
+ if (i == 0) {
+ a.child("cdup").setProperty("x", b);
+ }
}
s.merge(a, EmptyHook.INSTANCE, CommitInfo.EMPTY);
@@ -163,6 +167,7 @@ public class MongoBlobGCTest extends Abs
Set<String> existingAfterGC = gc(0);
assertTrue(Sets.symmetricDifference(state.blobsPresent, existingAfterGC).isEmpty());
}
+
@Test
public void gcVersionDeleteWithInlined() throws Exception {
DataStoreState state = setUp(false);
@@ -172,6 +177,17 @@ public class MongoBlobGCTest extends Abs
}
@Test
+ public void consistencyCheckInlined() throws Exception {
+ DataStoreState state = setUp(true);
+ addInlined();
+ ThreadPoolExecutor executor = (ThreadPoolExecutor) Executors.newFixedThreadPool(10);
+ MarkSweepGarbageCollector gcObj = init(86400, executor);
+ long candidates = gcObj.checkConsistency();
+ assertEquals(1, executor.getTaskCount());
+ assertEquals(0, candidates);
+ }
+
+ @Test
public void consistencyCheckInit() throws Exception {
DataStoreState state = setUp(true);
ThreadPoolExecutor executor = (ThreadPoolExecutor) Executors.newFixedThreadPool(10);