You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by am...@apache.org on 2016/07/01 08:02:45 UTC

svn commit: r1750886 - in /jackrabbit/oak/trunk: oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/ oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/ oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/ oak-core/src/ma...

Author: amitj
Date: Fri Jul  1 08:02:45 2016
New Revision: 1750886

URL: http://svn.apache.org/viewvc?rev=1750886&view=rev
Log:
OAK-4454: Create consistent API in ExternalSort to write and read escaped line breaks

* Introduced a new class FileIOUtils to have helper methods
* Removed unescaping and escaping from ExternalSort and introduced a decorating comparator which unescapes lines before comparison for cases where comparison is to be the same as for in memory sorts.
* Changed implementations to use FileIOUtils to write escaped strings
* Accompanying tests

Added:
    jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/FileIOUtils.java
    jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/FileIOUtilsTest.java
Modified:
    jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/EscapeUtils.java
    jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/ExternalSort.java
    jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java
    jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java
    jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/blob/FileLineDifferenceIteratorTest.java
    jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/MongoBlobGCTest.java
    jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/SharedBlobStoreGCTest.java
    jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DumpDataStoreReferencesCommand.java
    jackrabbit/oak/trunk/oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/SegmentDataStoreBlobGCIT.java
    jackrabbit/oak/trunk/oak-segment/src/test/java/org/apache/jackrabbit/oak/plugins/segment/SegmentDataStoreBlobGCIT.java

Added: jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/FileIOUtils.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/FileIOUtils.java?rev=1750886&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/FileIOUtils.java (added)
+++ jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/FileIOUtils.java Fri Jul  1 08:02:45 2016
@@ -0,0 +1,160 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.oak.commons;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.Comparator;
+import java.util.Iterator;
+import java.util.Set;
+
+import javax.annotation.Nullable;
+
+import com.google.common.base.Charsets;
+import com.google.common.base.Function;
+
+import static com.google.common.collect.Sets.newHashSet;
+import static com.google.common.io.Closeables.close;
+import static com.google.common.io.Files.newWriter;
+import static java.nio.charset.StandardCharsets.UTF_8;
+import static org.apache.jackrabbit.oak.commons.sort.EscapeUtils.escapeLineBreak;
+import static org.apache.jackrabbit.oak.commons.sort.EscapeUtils.unescapeLineBreaks;
+
+/**
+ * Simple File utils
+ */
+public final class FileIOUtils {
+
+    private FileIOUtils() {
+    }
+
+    /**
+     * Writes a string as a new line into the given buffered writer and optionally
+     * escapes the line for line breaks.
+     *
+     * @param writer to write the string
+     * @param str the string to write
+     * @param escape whether to escape string for line breaks
+     * @throws IOException
+     */
+    public static void writeAsLine(BufferedWriter writer, String str, boolean escape) throws IOException {
+        if (escape) {
+            writer.write(escapeLineBreak(str));
+        } else {
+            writer.write(str);
+        }
+        writer.newLine();
+    }
+
+    /**
+     * Writes string from the given iterator to the given file and optionally
+     * escape the written strings for line breaks.
+     *
+     * @param iterator the source of the strings
+     * @param f file to write to
+     * @param escape whether to escape for line breaks
+     * @return count
+     * @throws IOException
+     */
+    public static int writeStrings(Iterator<String> iterator, File f, boolean escape)
+        throws IOException {
+        BufferedWriter writer =  newWriter(f, UTF_8);
+        boolean threw = true;
+
+        int count = 0;
+        try {
+            while (iterator.hasNext()) {
+                writeAsLine(writer, iterator.next(), escape);
+                count++;
+            }
+            threw = false;
+        } finally {
+            close(writer, threw);
+        }
+        return count;
+    }
+
+    /**
+     * Reads strings from the given stream into a set and optionally unescaping for line breaks.
+     *
+     * @param stream the source of the strings
+     * @param unescape whether to unescape for line breaks
+     * @return set
+     * @throws IOException
+     */
+    public static Set<String> readStringsAsSet(InputStream stream, boolean unescape) throws IOException {
+        BufferedReader reader = null;
+        Set<String> set = newHashSet();
+        boolean threw = true;
+
+        try {
+            reader = new BufferedReader(new InputStreamReader(stream, Charsets.UTF_8));
+            String line  = null;
+            while ((line = reader.readLine()) != null) {
+                if (unescape) {
+                    set.add(unescapeLineBreaks(line));
+                } else {
+                    set.add(line);
+                }
+            }
+            threw = false;
+        } finally {
+            close(reader, threw);
+        }
+        return set;
+    }
+
+    /**
+     * Composing iterator which unescapes for line breaks and delegates to the given comparator.
+     * When using this it should be ensured that the data source has been correspondingly escaped.
+     *
+     * @param delegate the actual comparison iterator
+     * @return comparator aware of line breaks
+     */
+    public static Comparator<String> lineBreakAwareComparator (Comparator<String> delegate) {
+        return new FileIOUtils.TransformingComparator(delegate, new Function<String, String>() {
+            @Nullable
+            @Override
+            public String apply(@Nullable String input) {
+                return unescapeLineBreaks(input);
+            }
+        });
+    }
+
+    /**
+     * Decorates the given comparator and applies the function before delegating to the decorated
+     * comparator.
+     */
+    public static class TransformingComparator implements Comparator<String> {
+        private Comparator delegate;
+        private Function<String, String> func;
+
+        public TransformingComparator(Comparator delegate, Function<String, String> func) {
+            this.delegate = delegate;
+            this.func = func;
+        }
+
+        @Override
+        public int compare(String s1, String s2) {
+            return delegate.compare(func.apply(s1), func.apply(s2));
+        }
+    }
+}

Modified: jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/EscapeUtils.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/EscapeUtils.java?rev=1750886&r1=1750885&r2=1750886&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/EscapeUtils.java (original)
+++ jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/EscapeUtils.java Fri Jul  1 08:02:45 2016
@@ -31,9 +31,9 @@ import static com.google.common.base.Pre
  * to handle data which contains line break. If left unescaped
  * then such data interferes with the processing of such utilities
  */
-abstract class EscapeUtils {
+public abstract class EscapeUtils {
 
-    static String escapeLineBreak(@Nullable String line) {
+    public static String escapeLineBreak(@Nullable String line) {
         if (line == null) {
             return null;
         }
@@ -43,7 +43,7 @@ abstract class EscapeUtils {
         return line;
     }
 
-    static String unescapeLineBreaks(@Nullable String line) {
+    public static String unescapeLineBreaks(@Nullable String line) {
         if (line == null) {
             return null;
         }

Modified: jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/ExternalSort.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/ExternalSort.java?rev=1750886&r1=1750885&r2=1750886&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/ExternalSort.java (original)
+++ jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/ExternalSort.java Fri Jul  1 08:02:45 2016
@@ -201,7 +201,7 @@ public class ExternalSort {
                     // in bytes
                     long currentblocksize = 0;
                     while ((currentblocksize < blocksize)
-                            && ((line = readLine(fbr)) != null)) {
+                            && ((line = fbr.readLine()) != null)) {
                         // as long as you have enough memory
                         if (counter < numHeader) {
                             counter++;
@@ -296,7 +296,7 @@ public class ExternalSort {
             for (String r : tmplist) {
                 // Skip duplicate lines
                 if (!distinct || (lastLine == null || (lastLine != null && cmp.compare(r, lastLine) != 0))) {
-                    writeLine(fbw, r);
+                    fbw.write(r);
                     fbw.newLine();
                     lastLine = r;
                 }
@@ -454,7 +454,7 @@ public class ExternalSort {
                 String r = bfb.pop();
                 // Skip duplicate lines
                 if (!distinct || (lastLine == null || (lastLine != null && cmp.compare(r, lastLine) != 0))) {
-                    writeLine(fbw, r);
+                    fbw.write(r);
                     fbw.newLine();
                     lastLine = r;
                 }
@@ -628,15 +628,6 @@ public class ExternalSort {
             return r1.compareTo(r2);
         }
     };
-
-    public static String readLine(BufferedReader br) throws IOException {
-        return EscapeUtils.unescapeLineBreaks(br.readLine());
-    }
-
-    public static void writeLine(BufferedWriter wr, String line) throws IOException {
-        wr.write(EscapeUtils.escapeLineBreak(line));
-    }
-
 }
 
 class BinaryFileBuffer {
@@ -656,7 +647,7 @@ class BinaryFileBuffer {
 
     private void reload() throws IOException {
         try {
-            if ((this.cache = ExternalSort.readLine(fbr)) == null) {
+            if ((this.cache = fbr.readLine()) == null) {
                 this.empty = true;
                 this.cache = null;
             } else {

Modified: jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java?rev=1750886&r1=1750885&r2=1750886&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java (original)
+++ jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java Fri Jul  1 08:02:45 2016
@@ -38,6 +38,7 @@ import com.google.common.io.Files;
 
 import org.apache.commons.io.FileUtils;
 import org.apache.commons.io.LineIterator;
+import org.apache.jackrabbit.oak.commons.FileIOUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -176,7 +177,7 @@ public class StringSort implements Itera
 
         public PersistentState(Comparator<String> comparator, File workDir) {
             this.workDir = workDir;
-            this.comparator = comparator;
+            this.comparator = FileIOUtils.lineBreakAwareComparator(comparator);
         }
 
         public BufferedWriter getWriter() throws FileNotFoundException {

Added: jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/FileIOUtilsTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/FileIOUtilsTest.java?rev=1750886&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/FileIOUtilsTest.java (added)
+++ jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/FileIOUtilsTest.java Fri Jul  1 08:02:45 2016
@@ -0,0 +1,156 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.jackrabbit.oak.commons;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.nio.CharBuffer;
+import java.nio.charset.CharacterCodingException;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetEncoder;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Random;
+import java.util.Set;
+
+import com.google.common.base.Charsets;
+import com.google.common.collect.Lists;
+import org.junit.Assert;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+
+import static com.google.common.collect.Sets.newHashSet;
+import static org.apache.jackrabbit.oak.commons.FileIOUtils.readStringsAsSet;
+import static org.apache.jackrabbit.oak.commons.FileIOUtils.writeStrings;
+import static org.apache.jackrabbit.oak.commons.sort.EscapeUtils.escapeLineBreak;
+import static org.apache.jackrabbit.oak.commons.sort.EscapeUtils.unescapeLineBreaks;
+
+
+/**
+ * Tests for {@link FileIOUtils}
+ */
+public class FileIOUtilsTest {
+
+    @Rule
+    public TemporaryFolder folder = new TemporaryFolder(new File("./target"));
+
+    private static final Random RANDOM = new Random();
+
+    @Test
+    public void writeReadStrings() throws Exception {
+        Set<String> added = newHashSet("a", "z", "e", "b");
+        File f = folder.newFile();
+
+        int count = writeStrings(added.iterator(), f, false);
+        Assert.assertEquals(added.size(), count);
+
+        Set<String> retrieved = readStringsAsSet(new FileInputStream(f), false);
+
+        Assert.assertEquals(added, retrieved);
+    }
+
+    @Test
+    public void writeReadStringsWithLineBreaks() throws IOException {
+        Set<String> added = newHashSet(getLineBreakStrings());
+        File f = folder.newFile();
+        int count = writeStrings(added.iterator(), f, true);
+        Assert.assertEquals(added.size(), count);
+
+        Set<String> retrieved = readStringsAsSet(new FileInputStream(f), true);
+        Assert.assertEquals(added, retrieved);
+    }
+
+    @Test
+    public void writeReadRandomStrings() throws Exception {
+        Set<String> added = newHashSet();
+        File f = folder.newFile();
+
+        for (int i = 0; i < 100; i++) {
+            added.add(getRandomTestString());
+        }
+        int count = writeStrings(added.iterator(), f, true);
+        Assert.assertEquals(added.size(), count);
+
+        Set<String> retrieved = readStringsAsSet(new FileInputStream(f), true);
+        Assert.assertEquals(added, retrieved);
+    }
+
+    @Test
+    public void compareWithLineBreaks() throws Exception {
+        Comparator<String> lexCmp = new Comparator<String>() {
+            @Override public int compare(String s1, String s2) {
+                return s1.compareTo(s2);
+            }
+        };
+        Comparator<String> cmp = FileIOUtils.lineBreakAwareComparator(lexCmp);
+
+        List<String> strs = getLineBreakStrings();
+        Collections.sort(strs, lexCmp);
+
+        // Escape line breaks and then compare with string sorted
+        List<String> escapedStrs = escape(getLineBreakStrings());
+        Collections.sort(escapedStrs, cmp);
+
+        Assert.assertEquals(strs, unescape(escapedStrs));
+    }
+
+    private static List<String> getLineBreakStrings() {
+        return Lists.newArrayList("ab\nc\r", "ab\\z", "a\\\\z\nc",
+            "/a", "/a/b\nc", "/a/b\rd", "/a/b\r\ne", "/a/c");
+    }
+
+    private static List<String> escape(List<String> list) {
+        List<String> escaped = Lists.newArrayList();
+        for (String s : list) {
+            escaped.add(escapeLineBreak(s));
+        }
+        return escaped;
+    }
+
+    private static List<String> unescape(List<String> list) {
+        List<String> unescaped = Lists.newArrayList();
+        for (String s : list) {
+            unescaped.add(unescapeLineBreaks(s));
+        }
+        return unescaped;
+    }
+
+    private static String getRandomTestString() throws Exception {
+        boolean valid = false;
+        StringBuilder buffer = new StringBuilder();
+        while(!valid) {
+            int length = RANDOM.nextInt(40);
+            for (int i = 0; i < length; i++) {
+                buffer.append((char) (RANDOM.nextInt(Character.MAX_VALUE)));
+            }
+            String s = buffer.toString();
+            CharsetEncoder encoder = Charset.forName(Charsets.UTF_8.toString()).newEncoder();
+            try {
+                encoder.encode(CharBuffer.wrap(s));
+                valid = true;
+            } catch (CharacterCodingException e) {
+                buffer = new StringBuilder();
+            }
+        }
+        return buffer.toString();
+    }
+}

Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java?rev=1750886&r1=1750885&r2=1750886&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/blob/MarkSweepGarbageCollector.java Fri Jul  1 08:02:45 2016
@@ -39,6 +39,8 @@ import java.util.concurrent.Executor;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicInteger;
 
+import javax.annotation.Nullable;
+
 import com.google.common.base.Charsets;
 import com.google.common.base.Function;
 import com.google.common.base.Joiner;
@@ -56,16 +58,14 @@ import org.apache.commons.io.FileUtils;
 import org.apache.commons.io.LineIterator;
 import org.apache.jackrabbit.core.data.DataRecord;
 import org.apache.jackrabbit.core.data.DataStoreException;
+import org.apache.jackrabbit.oak.commons.FileIOUtils;
 import org.apache.jackrabbit.oak.commons.IOUtils;
-import org.apache.jackrabbit.oak.commons.sort.ExternalSort;
 import org.apache.jackrabbit.oak.plugins.blob.datastore.SharedDataStoreUtils;
 import org.apache.jackrabbit.oak.plugins.blob.datastore.SharedDataStoreUtils.SharedStoreRecordType;
 import org.apache.jackrabbit.oak.spi.blob.GarbageCollectableBlobStore;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import javax.annotation.Nullable;
-
 import static com.google.common.collect.Lists.newArrayList;
 
 /**
@@ -455,8 +455,7 @@ public class MarkSweepGarbageCollector i
      */
     static void saveBatchToFile(List<String> ids, BufferedWriter writer) throws IOException {
         for (String id : ids) {
-            ExternalSort.writeLine(writer, id);
-            writer.append(NEWLINE);
+            FileIOUtils.writeAsLine(writer, id, true);
         }
         ids.clear();
         writer.flush();

Modified: jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/blob/FileLineDifferenceIteratorTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/blob/FileLineDifferenceIteratorTest.java?rev=1750886&r1=1750885&r2=1750886&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/blob/FileLineDifferenceIteratorTest.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/blob/FileLineDifferenceIteratorTest.java Fri Jul  1 08:02:45 2016
@@ -32,10 +32,12 @@ import com.google.common.base.Joiner;
 import com.google.common.base.Splitter;
 import com.google.common.base.StandardSystemProperty;
 import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Lists;
 import org.apache.commons.io.LineIterator;
 import org.junit.Test;
 
 import static java.util.Arrays.asList;
+import static org.apache.jackrabbit.oak.commons.sort.EscapeUtils.escapeLineBreak;
 import static org.apache.jackrabbit.oak.plugins.blob.MarkSweepGarbageCollector.FileLineDifferenceIterator;
 import static org.hamcrest.CoreMatchers.is;
 import static org.junit.Assert.assertThat;
@@ -98,7 +100,51 @@ public class FileLineDifferenceIteratorT
         assertReverseDiff("a,0xb,d,e,f", "a,d", asList("0xb", "e", "f"));
         assertReverseDiff("a,0xb,d,e,f", "a,d,e,f,g", asList("0xb"));
     }
-    
+
+    @Test
+    public void testDiffLineBreakChars() throws IOException {
+        List<String> all = getLineBreakStrings();
+        List<String> marked = getLineBreakStrings();
+        List<String> diff = remove(marked, 3, 2);
+
+        // without escaping, the line breaks will be resolved
+        assertDiff(Joiner.on(",").join(marked), Joiner.on(",").join(all),
+            asList("/a", "c", "/a/b"));
+    }
+
+    @Test
+    public void testDiffEscapedLineBreakChars() throws IOException {
+        // Escaped characters
+        List<String> all = escape(getLineBreakStrings());
+        List<String> marked = escape(getLineBreakStrings());
+        List<String> diff = remove(marked, 3, 2);
+
+        assertDiff(Joiner.on(",").join(marked), Joiner.on(",").join(all), diff);
+    }
+
+    private static List<String> getLineBreakStrings() {
+        return Lists.newArrayList("ab\nc\r", "ab\\z", "a\\\\z\nc",
+            "/a", "/a/b\nc", "/a/b\rd", "/a/b\r\ne", "/a/c");
+    }
+
+    private static List<String> remove(List<String> list, int idx, int count) {
+        List<String> diff = Lists.newArrayList();
+        int i = 0;
+        while (i < count) {
+            diff.add(list.remove(idx));
+            i++;
+        }
+        return diff;
+    }
+
+    private static List<String> escape(List<String> list) {
+        List<String> escaped = Lists.newArrayList();
+        for (String s : list) {
+            escaped.add(escapeLineBreak(s));
+        }
+        return escaped;
+    }
+
     private static void assertReverseDiff(String marked, String all, List<String> diff) throws IOException {
         Iterator<String> itr = createItr(all, marked);
         assertThat("marked: " + marked + " all: " + all, ImmutableList.copyOf(itr), is(diff));

Modified: jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/MongoBlobGCTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/MongoBlobGCTest.java?rev=1750886&r1=1750885&r2=1750886&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/MongoBlobGCTest.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/MongoBlobGCTest.java Fri Jul  1 08:02:45 2016
@@ -148,20 +148,21 @@ public class MongoBlobGCTest extends Abs
     }
 
     private HashSet<String> addNodeSpecialChars() throws Exception {
+        List<String> specialCharSets =
+            Lists.newArrayList("q\\%22afdg\\%22", "a\nbcd", "a\n\rabcd", "012\\efg" );
+        DocumentNodeStore ds = mk.getNodeStore();
         HashSet<String> set = new HashSet<String>();
-        DocumentNodeStore s = mk.getNodeStore();
-        NodeBuilder a = s.getRoot().builder();
-        int number = 1;
-        for (int i = 0; i < number; i++) {
-            Blob b = s.createBlob(randomStream(i, 18432));
+        NodeBuilder a = ds.getRoot().builder();
+        for (int i = 0; i < specialCharSets.size(); i++) {
+            Blob b = ds.createBlob(randomStream(i, 18432));
             NodeBuilder n = a.child("cspecial");
-            n.child("q\\%22afdg\\%22").setProperty("x", b);
+            n.child(specialCharSets.get(i)).setProperty("x", b);
             Iterator<String> idIter =
-                ((GarbageCollectableBlobStore) s.getBlobStore())
+                ((GarbageCollectableBlobStore) ds.getBlobStore())
                     .resolveChunks(b.toString());
             set.addAll(Lists.newArrayList(idIter));
         }
-        s.merge(a, EmptyHook.INSTANCE, CommitInfo.EMPTY);
+        ds.merge(a, EmptyHook.INSTANCE, CommitInfo.EMPTY);
         return set;
     }
 

Modified: jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/SharedBlobStoreGCTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/SharedBlobStoreGCTest.java?rev=1750886&r1=1750885&r2=1750886&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/SharedBlobStoreGCTest.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/SharedBlobStoreGCTest.java Fri Jul  1 08:02:45 2016
@@ -301,13 +301,14 @@ public class SharedBlobStoreGCTest {
         }
 
         private HashSet<String> addNodeSpecialChars() throws Exception {
+            List<String> specialCharSets =
+                Lists.newArrayList("q\\%22afdg\\%22", "a\nbcd", "a\n\rabcd", "012\\efg" );
             HashSet<String> set = new HashSet<String>();
             NodeBuilder a = ds.getRoot().builder();
-            int number = 1;
-            for (int i = 0; i < number; i++) {
+            for (int i = 0; i < specialCharSets.size(); i++) {
                 Blob b = ds.createBlob(randomStream(i, 18432));
                 NodeBuilder n = a.child("cspecial");
-                n.child("q\\%22afdg\\%22").setProperty("x", b);
+                n.child(specialCharSets.get(i)).setProperty("x", b);
                 Iterator<String> idIter =
                     ((GarbageCollectableBlobStore) ds.getBlobStore())
                         .resolveChunks(b.toString());

Modified: jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DumpDataStoreReferencesCommand.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DumpDataStoreReferencesCommand.java?rev=1750886&r1=1750885&r2=1750886&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DumpDataStoreReferencesCommand.java (original)
+++ jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/run/DumpDataStoreReferencesCommand.java Fri Jul  1 08:02:45 2016
@@ -38,8 +38,8 @@ import com.mongodb.MongoURI;
 import joptsimple.OptionParser;
 import joptsimple.OptionSet;
 import joptsimple.OptionSpec;
+import org.apache.jackrabbit.oak.commons.FileIOUtils;
 import org.apache.jackrabbit.oak.commons.IOUtils;
-import org.apache.jackrabbit.oak.commons.sort.ExternalSort;
 import org.apache.jackrabbit.oak.plugins.blob.BlobReferenceRetriever;
 import org.apache.jackrabbit.oak.plugins.blob.ReferenceCollector;
 import org.apache.jackrabbit.oak.plugins.document.DocumentBlobReferenceRetriever;
@@ -119,8 +119,7 @@ class DumpDataStoreReferencesCommand imp
                                         count.getAndIncrement();
                                         if (idBatch.size() >= 1024) {
                                             for (String rec : idBatch) {
-                                                ExternalSort.writeLine(writer, rec);
-                                                writer.append(StandardSystemProperty.LINE_SEPARATOR.value());
+                                                FileIOUtils.writeAsLine(writer, rec, true);
                                                 writer.flush();
                                             }
                                             idBatch.clear();
@@ -134,8 +133,7 @@ class DumpDataStoreReferencesCommand imp
                 );
                 if (!idBatch.isEmpty()) {
                     for (String rec : idBatch) {
-                        ExternalSort.writeLine(writer, rec);
-                        writer.append(StandardSystemProperty.LINE_SEPARATOR.value());
+                        FileIOUtils.writeAsLine(writer, rec, true);
                         writer.flush();
                     }
                     idBatch.clear();

Modified: jackrabbit/oak/trunk/oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/SegmentDataStoreBlobGCIT.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/SegmentDataStoreBlobGCIT.java?rev=1750886&r1=1750885&r2=1750886&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/SegmentDataStoreBlobGCIT.java (original)
+++ jackrabbit/oak/trunk/oak-segment-tar/src/test/java/org/apache/jackrabbit/oak/segment/SegmentDataStoreBlobGCIT.java Fri Jul  1 08:02:45 2016
@@ -208,13 +208,14 @@ public class SegmentDataStoreBlobGCIT {
     }
 
     private HashSet<String> addNodeSpecialChars() throws Exception {
+        List<String> specialCharSets =
+            Lists.newArrayList("q\\%22afdg\\%22", "a\nbcd", "a\n\rabcd", "012\\efg" );
         HashSet<String> set = new HashSet<String>();
         NodeBuilder a = nodeStore.getRoot().builder();
-        int number = 1;
-        for (int i = 0; i < number; i++) {
+        for (int i = 0; i < specialCharSets.size(); i++) {
             SegmentBlob b = (SegmentBlob) nodeStore.createBlob(randomStream(i, 18432));
             NodeBuilder n = a.child("cspecial");
-            n.child("q \\%22afdg\\%22").setProperty("x", b);
+            n.child(specialCharSets.get(i)).setProperty("x", b);
             Iterator<String> idIter = blobStore.resolveChunks(b.getBlobId());
             set.addAll(Lists.newArrayList(idIter));
         }

Modified: jackrabbit/oak/trunk/oak-segment/src/test/java/org/apache/jackrabbit/oak/plugins/segment/SegmentDataStoreBlobGCIT.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-segment/src/test/java/org/apache/jackrabbit/oak/plugins/segment/SegmentDataStoreBlobGCIT.java?rev=1750886&r1=1750885&r2=1750886&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-segment/src/test/java/org/apache/jackrabbit/oak/plugins/segment/SegmentDataStoreBlobGCIT.java (original)
+++ jackrabbit/oak/trunk/oak-segment/src/test/java/org/apache/jackrabbit/oak/plugins/segment/SegmentDataStoreBlobGCIT.java Fri Jul  1 08:02:45 2016
@@ -219,14 +219,16 @@ public class SegmentDataStoreBlobGCIT {
         nodeStore.merge(a, EmptyHook.INSTANCE, CommitInfo.EMPTY);
         return set;
     }
+
     private HashSet<String> addNodeSpecialChars() throws Exception {
+        List<String> specialCharSets =
+            Lists.newArrayList("q\\%22afdg\\%22", "a\nbcd", "a\n\rabcd", "012\\efg" );
         HashSet<String> set = new HashSet<String>();
         NodeBuilder a = nodeStore.getRoot().builder();
-        int number = 1;
-        for (int i = 0; i < number; i++) {
+        for (int i = 0; i < specialCharSets.size(); i++) {
             SegmentBlob b = (SegmentBlob) nodeStore.createBlob(randomStream(i, 18432));
             NodeBuilder n = a.child("cspecial");
-            n.child("q \\%22afdg\\%22").setProperty("x", b);
+            n.child(specialCharSets.get(i)).setProperty("x", b);
             Iterator<String> idIter = blobStore.resolveChunks(b.getBlobId());
             set.addAll(Lists.newArrayList(idIter));
         }