You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by am...@apache.org on 2015/09/11 13:08:05 UTC

svn commit: r1702426 - in /jackrabbit/oak/trunk: oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/ oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/ oak-commons/src/test/resources/org/apache/jackrabbit/oak/commons/sort/ ...

Author: amitj
Date: Fri Sep 11 11:08:05 2015
New Revision: 1702426

URL: http://svn.apache.org/r1702426
Log:
OAK-3184: Consistency checker for data/blob store

Fixed failures for inlined blobs and duplicate entries not correctly being removed
Added test cases

Added:
    jackrabbit/oak/trunk/oak-commons/src/test/resources/org/apache/jackrabbit/oak/commons/sort/test-file-2.csv
Modified:
    jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/ExternalSort.java
    jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/ExternalSortTest.java
    jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/BlobGC.java
    jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/BlobGCMBean.java
    jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java
    jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/datastore/InMemoryDataRecord.java
    jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/blob/FileLineDifferenceIteratorTest.java
    jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/MongoBlobGCTest.java

Modified: jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/ExternalSort.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/ExternalSort.java?rev=1702426&r1=1702425&r2=1702426&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/ExternalSort.java (original)
+++ jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/ExternalSort.java Fri Sep 11 11:08:05 2015
@@ -295,7 +295,7 @@ public class ExternalSort {
         try {
             for (String r : tmplist) {
                 // Skip duplicate lines
-                if (!distinct || !r.equals(lastLine)) {
+                if (!distinct || (lastLine == null || (lastLine != null && cmp.compare(r, lastLine) != 0))) {
                     fbw.write(r);
                     fbw.newLine();
                     lastLine = r;
@@ -453,7 +453,7 @@ public class ExternalSort {
                 BinaryFileBuffer bfb = pq.poll();
                 String r = bfb.pop();
                 // Skip duplicate lines
-                if (!distinct || !r.equals(lastLine)) {
+                if (!distinct || (lastLine == null || (lastLine != null && cmp.compare(r, lastLine) != 0))) {
                     fbw.write(r);
                     fbw.newLine();
                     lastLine = r;

Modified: jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/ExternalSortTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/ExternalSortTest.java?rev=1702426&r1=1702425&r2=1702426&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/ExternalSortTest.java (original)
+++ jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/ExternalSortTest.java Fri Sep 11 11:08:05 2015
@@ -78,6 +78,8 @@ public class ExternalSortTest {
     private static final String TEST_FILE1_TXT = "test-file-1.txt";
     private static final String TEST_FILE2_TXT = "test-file-2.txt";
     private static final String TEST_FILE1_CSV = "test-file-1.csv";
+    private static final String TEST_FILE2_CSV = "test-file-2.csv";
+
     private static final String[] EXPECTED_SORT_RESULTS = { "a", "b", "b", "e",
             "f", "i", "m", "o", "u", "u", "x", "y", "z" };
     private static final String[] EXPECTED_MERGE_RESULTS = { "a", "a", "b",
@@ -90,10 +92,14 @@ public class ExternalSortTest {
             "f", "i", "m", "o", "u", "x", "y", "z" };
     private static final String[] SAMPLE = { "f", "m", "b", "e", "i", "o", "u",
             "x", "a", "y", "z", "b", "u" };
-
+    private static final String[] EXPECTED_CSV_DISTINCT_RESULTS = { "a,1", "b,2a", "e,3", "f,4", "i,5", "m,6", "o,7", 
+                                                                      "u,8a", "x,9", "y,10", "z,11"};
+    private static final String[] EXPECTED_CSV_RESULTS = { "a,1", "b,2a", "b,2b", "e,3", "f,4", "i,5", "m,6", "o,7",
+                                                             "u,8a", "u,8b", "x,9", "y,10", "z,11"};
     private File file1;
     private File file2;
     private File csvFile;
+    private File csvFile2;
     private List<File> fileList;
 
     /**
@@ -340,6 +346,51 @@ public class ExternalSortTest {
         testCSVSortingWithParams(true);
     }
 
+    @Test
+    public void testCSVKeyValueSorting() throws Exception {
+        testCSVSortKeyValue(false);
+        testCSVSortKeyValue(true);
+    }
+    
+    /**
+     * Sample case to sort csv file with key, value pair.
+     *
+     * @param distinct if distinct records need to be omitted
+     * @throws Exception
+     *
+     */    
+    public void testCSVSortKeyValue(boolean distinct) throws Exception {
+        
+        File out = File.createTempFile("test_results", ".tmp", null);
+        
+        Comparator<String> cmp =   new Comparator<String>() {
+            @Override
+            public int compare(String s1, String s2) {
+                return s1.split(",")[0].compareTo(s2.split(",")[0]);
+            }
+        };
+        
+        List<File> listOfFiles = ExternalSort.sortInBatch(this.csvFile2, cmp,
+                                                             ExternalSort.DEFAULTMAXTEMPFILES,
+                                                             ExternalSort.DEFAULT_MAX_MEM_BYTES,
+                                                             Charset.defaultCharset(),
+                                                             null, distinct, 0, false);
+        
+        // now merge with append
+        ExternalSort.mergeSortedFiles(listOfFiles, out, cmp,
+                                         Charset.defaultCharset(), distinct, true, false);
+        ArrayList<String> result = readLines(out);
+        
+        if (distinct) {
+            assertEquals(11, result.size());
+            assertArrayEquals(Arrays.toString(result.toArray()), EXPECTED_CSV_DISTINCT_RESULTS, result.toArray());
+        } else {
+            assertEquals(13, result.size());
+            assertArrayEquals(Arrays.toString(result.toArray()), EXPECTED_CSV_RESULTS, result.toArray());            
+        }
+        
+    }
+    
     /**
      * Sample case to sort csv file.
      * 
@@ -404,4 +455,4 @@ public class ExternalSortTest {
         }
     }
 
-}
\ No newline at end of file
+}

Added: jackrabbit/oak/trunk/oak-commons/src/test/resources/org/apache/jackrabbit/oak/commons/sort/test-file-2.csv
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/src/test/resources/org/apache/jackrabbit/oak/commons/sort/test-file-2.csv?rev=1702426&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-commons/src/test/resources/org/apache/jackrabbit/oak/commons/sort/test-file-2.csv (added)
+++ jackrabbit/oak/trunk/oak-commons/src/test/resources/org/apache/jackrabbit/oak/commons/sort/test-file-2.csv Fri Sep 11 11:08:05 2015
@@ -0,0 +1,13 @@
+f,4
+m,6
+b,2a
+e,3
+i,5
+o,7
+u,8a
+x,9
+a,1
+y,10
+z,11
+b,2b
+u,8b

Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/BlobGC.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/BlobGC.java?rev=1702426&r1=1702425&r2=1702426&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/BlobGC.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/BlobGC.java Fri Sep 11 11:08:05 2015
@@ -126,12 +126,19 @@ public class  BlobGC extends AnnotatedSt
                 public String call() throws Exception {
                     long t0 = nanoTime();
                     long missing = blobGarbageCollector.checkConsistency();
-                    return missing + "missing blobs found (details in the log). Consistency check completed in "
-                               + formatTime(nanoTime() - t0);
+                    return "Consistency check completed in "
+                                + formatTime(nanoTime() - t0) + " " +
+                                + missing + "missing blobs found (details in the log).";
                 }
             });
             executor.execute(consistencyOp);
         }
+        return getConsistencyCheckStatus();
+    }
+    
+    @Nonnull
+    @Override
+    public CompositeData getConsistencyCheckStatus() {
         return consistencyOp.getStatus().toCompositeData();
     }
     

Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/BlobGCMBean.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/BlobGCMBean.java?rev=1702426&r1=1702425&r2=1702426&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/BlobGCMBean.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/BlobGCMBean.java Fri Sep 11 11:08:05 2015
@@ -68,4 +68,13 @@ public interface BlobGCMBean {
      * @return the missing blobs
      */
     CompositeData checkConsistency();
+    
+    /**
+     * Consistency check status
+     * 
+     * @return the status of the ongoing operation or if none the terminal
+     * status of the last operation or <em>Status not available</em> if none.
+     */
+    @Nonnull 
+    CompositeData getConsistencyCheckStatus();
 }

Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java?rev=1702426&r1=1702425&r2=1702426&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java Fri Sep 11 11:08:05 2015
@@ -58,6 +58,7 @@ import org.apache.commons.io.LineIterato
 import org.apache.jackrabbit.core.data.DataRecord;
 import org.apache.jackrabbit.core.data.DataStoreException;
 import org.apache.jackrabbit.oak.commons.IOUtils;
+import org.apache.jackrabbit.oak.plugins.blob.datastore.InMemoryDataRecord;
 import org.apache.jackrabbit.oak.plugins.blob.datastore.SharedDataStoreUtils;
 import org.apache.jackrabbit.oak.plugins.blob.datastore.SharedDataStoreUtils.SharedStoreRecordType;
 import org.apache.jackrabbit.oak.spi.blob.GarbageCollectableBlobStore;
@@ -568,12 +569,13 @@ public class MarkSweepGarbageCollector i
             candidates = calculateDifference(fs, iter);
             LOG.trace("Ending difference phase of the consistency check");
             
+            LOG.info("Consistency check found [{}] missing blobs", candidates);
             if (candidates > 0) {
                 LOG.warn("Consistency check failure in the the blob store : {}, check missing candidates in file {}",
                             blobStore, fs.getGcCandidates().getAbsolutePath());
             }
         } finally {
-            if (!LOG.isTraceEnabled() || candidates == 0) {
+            if (!LOG.isTraceEnabled() && candidates == 0) {
                 Closeables.close(fs, threw);
             }
         }
@@ -617,7 +619,7 @@ public class MarkSweepGarbageCollector i
 
                 // sort the file
                 GarbageCollectorFileState.sort(fs.getAvailableRefs());
-                LOG.debug("Number of blobs present in BlobStore : [{}] ", blobsCount);
+                LOG.info("Number of blobs present in BlobStore : [{}] ", blobsCount);
             } finally {
                 IOUtils.closeQuietly(bufferWriter);
             }
@@ -692,7 +694,12 @@ public class MarkSweepGarbageCollector i
                     } else {
                         //This entry is not found in marked entries
                         //hence part of diff
-                        return diff;
+                        if (!InMemoryDataRecord.isInstance(getKey(diff))) {
+                            return diff;
+                        } else {
+                            diff = null;
+                            break;
+                        }
                     }
                 }
             }

Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/datastore/InMemoryDataRecord.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/datastore/InMemoryDataRecord.java?rev=1702426&r1=1702425&r2=1702426&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/datastore/InMemoryDataRecord.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/datastore/InMemoryDataRecord.java Fri Sep 11 11:08:05 2015
@@ -31,7 +31,7 @@ import org.slf4j.LoggerFactory;
 /**
  * Represents binary data which is backed by a byte[] (in memory).
  */
-class InMemoryDataRecord implements DataRecord {
+public class InMemoryDataRecord implements DataRecord {
 
     /**
      * Logger instance for this class
@@ -89,7 +89,7 @@ class InMemoryDataRecord implements Data
      * @param id DataRecord identifier
      * @return true if it can be converted
      */
-    static boolean isInstance(String id) {
+    public static boolean isInstance(String id) {
         return id.startsWith(PREFIX);
     }
 

Modified: jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/blob/FileLineDifferenceIteratorTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/blob/FileLineDifferenceIteratorTest.java?rev=1702426&r1=1702425&r2=1702426&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/blob/FileLineDifferenceIteratorTest.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/blob/FileLineDifferenceIteratorTest.java Fri Sep 11 11:08:05 2015
@@ -89,7 +89,21 @@ public class FileLineDifferenceIteratorT
         assertDiff("a,b,d,e,f", "a,b,c,f, h", asList("c", "h"));
         assertDiff("3,7", "2,3,5,9", asList("2", "5", "9"));
     }
-
+    
+    @Test
+    public void testMarkedDiffWithExtraEntriesInMarked() throws IOException {
+        assertReverseDiff("a,b,c,e,h", "a,b,c", asList("e", "h"));
+        assertReverseDiff("a,b,d,e", "a,b,c", asList("d", "e"));
+        assertReverseDiff("a,b,d", "a,b,d", Collections.<String>emptyList());
+        assertReverseDiff("a,0xb,d,e,f", "a,d", asList("e", "f"));
+        assertReverseDiff("a,0xb,d,e,f", "a,d,e,f,g", Collections.<String>emptyList());
+    }
+    
+    private static void assertReverseDiff(String marked, String all, List<String> diff) throws IOException {
+        Iterator<String> itr = createItr(all, marked);
+        assertThat("marked: " + marked + " all: " + all, ImmutableList.copyOf(itr), is(diff));
+    }
+    
     private static void assertDiff(String marked, String all, List<String> diff) throws IOException {
         Iterator<String> itr = createItr(marked, all);
         assertThat("marked: " + marked + " all: " + all, ImmutableList.copyOf(itr), is(diff));

Modified: jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/MongoBlobGCTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/MongoBlobGCTest.java?rev=1702426&r1=1702425&r2=1702426&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/MongoBlobGCTest.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/MongoBlobGCTest.java Fri Sep 11 11:08:05 2015
@@ -86,6 +86,10 @@ public class MongoBlobGCTest extends Abs
                 }
             }
             a.child("c" + i).setProperty("x", b);
+            // Add a duplicated entry
+            if (i == 0) {
+                a.child("cdup").setProperty("x", b);
+            }
         }
         s.merge(a, EmptyHook.INSTANCE, CommitInfo.EMPTY);
 
@@ -163,6 +167,7 @@ public class MongoBlobGCTest extends Abs
         Set<String> existingAfterGC = gc(0);
         assertTrue(Sets.symmetricDifference(state.blobsPresent, existingAfterGC).isEmpty());
     }
+    
     @Test
     public void gcVersionDeleteWithInlined() throws Exception {
         DataStoreState state = setUp(false);
@@ -172,6 +177,17 @@ public class MongoBlobGCTest extends Abs
     }
     
     @Test
+    public void consistencyCheckInlined() throws Exception {
+        DataStoreState state = setUp(true);
+        addInlined();
+        ThreadPoolExecutor executor = (ThreadPoolExecutor) Executors.newFixedThreadPool(10);
+        MarkSweepGarbageCollector gcObj = init(86400, executor);
+        long candidates = gcObj.checkConsistency();
+        assertEquals(1, executor.getTaskCount());
+        assertEquals(0, candidates);        
+    }
+    
+    @Test
     public void consistencyCheckInit() throws Exception {
         DataStoreState state = setUp(true);
         ThreadPoolExecutor executor = (ThreadPoolExecutor) Executors.newFixedThreadPool(10);