You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by ch...@apache.org on 2015/09/16 12:42:26 UTC

svn commit: r1703382 - in /jackrabbit/oak/trunk: oak-commons/ oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/ oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/ oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/d...

Author: chetanm
Date: Wed Sep 16 10:42:25 2015
New Revision: 1703382

URL: http://svn.apache.org/r1703382
Log:
OAK-3395 - RevisionGC fails for JCR paths having line feed characters

Added:
    jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/EscapeUtils.java   (with props)
    jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/EscapeUtilsTest.java   (with props)
Modified:
    jackrabbit/oak/trunk/oak-commons/pom.xml
    jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/ExternalSort.java
    jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java
    jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/StringSortTest.java
    jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCDeletionTest.java

Modified: jackrabbit/oak/trunk/oak-commons/pom.xml
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/pom.xml?rev=1703382&r1=1703381&r2=1703382&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-commons/pom.xml (original)
+++ jackrabbit/oak/trunk/oak-commons/pom.xml Wed Sep 16 10:42:25 2015
@@ -109,5 +109,12 @@
       <artifactId>commons-math3</artifactId>
       <optional>true</optional>
     </dependency>
+    <dependency>
+      <groupId>org.apache.commons</groupId>
+      <artifactId>commons-lang3</artifactId>
+      <version>3.3.2</version>
+      <optional>true</optional>
+      <scope>test</scope>
+    </dependency>
   </dependencies>
 </project>

Added: jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/EscapeUtils.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/EscapeUtils.java?rev=1703382&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/EscapeUtils.java (added)
+++ jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/EscapeUtils.java Wed Sep 16 10:42:25 2015
@@ -0,0 +1,135 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.jackrabbit.oak.commons.sort;
+
+import javax.annotation.Nullable;
+
+
+import static com.google.common.base.Preconditions.checkState;
+
+/**
+ * Utility class to escape '\n', '\r', '\' char
+ * while being written to file and unescape then upon getting
+ * read from file. This is used by StringSort and ExternalSort
+ * to handle data which contains line break. If left unescaped
+ * then such data interferes with the processing of such utilities
+ */
+abstract class EscapeUtils {
+
+    static String escapeLineBreak(@Nullable String line) {
+        if (line == null) {
+            return null;
+        }
+        if (escapingRequired(line)) {
+            line = escape(line);
+        }
+        return line;
+    }
+
+    static String unescapeLineBreaks(@Nullable String line) {
+        if (line == null) {
+            return null;
+        }
+        if (unescapingRequired(line)) {
+            line = unescape(line);
+        }
+        return line;
+    }
+
+    private static boolean escapingRequired(String line) {
+        int len = line.length();
+        for (int i = 0; i < len; i++) {
+            char c = line.charAt(i);
+            switch (c) {
+                case '\n':
+                case '\r':
+                case '\\':
+                    return true;
+            }
+        }
+        return false;
+    }
+
+    private static boolean unescapingRequired(String line) {
+        return line.indexOf('\\') >= 0;
+    }
+
+    private static String escape(String line) {
+        int len = line.length();
+        StringBuilder sb = new StringBuilder(len + 1);
+        for (int i = 0; i < len; i++) {
+            char c = line.charAt(i);
+            /*
+              Here we need not worry about unicode chars
+              because UTF-16 represents supplementary characters using
+              code units whose values are not used for BMP characters
+              i.e. ASCII chars
+             */
+            switch (c) {
+                case '\n':
+                    sb.append("\\n");
+                    break;
+                case '\r':
+                    sb.append("\\r");
+                    break;
+                case '\\':
+                    sb.append("\\\\");
+                    break;
+                default:
+                    sb.append(c);
+            }
+        }
+        return sb.toString();
+    }
+
+    private static String unescape(String line) {
+        int len = line.length();
+        StringBuilder sb = new StringBuilder(len - 1);
+        for (int i = 0; i < len; i++) {
+            char c = line.charAt(i);
+            if (c == '\\') {
+                checkState(i < len - 1, "Expected one more char after '\\' at [%s] in [%s]", i, line);
+                char nextChar = line.charAt(i + 1);
+                switch (nextChar) {
+                    case 'n':
+                        sb.append('\n');
+                        i++;
+                        break;
+                    case 'r':
+                        sb.append('\r');
+                        i++;
+                        break;
+                    case '\\':
+                        sb.append('\\');
+                        i++;
+                        break;
+                    default:
+                        String msg = String.format("Unexpected char [%c] found at %d of [%s]. " +
+                                "Expected '\\' or 'r' or 'n", nextChar, i, line);
+                        throw new IllegalArgumentException(msg);
+                }
+            } else {
+                sb.append(c);
+            }
+        }
+        return sb.toString();
+    }
+
+}

Propchange: jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/EscapeUtils.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/ExternalSort.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/ExternalSort.java?rev=1703382&r1=1703381&r2=1703382&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/ExternalSort.java (original)
+++ jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/ExternalSort.java Wed Sep 16 10:42:25 2015
@@ -201,7 +201,7 @@ public class ExternalSort {
                     // in bytes
                     long currentblocksize = 0;
                     while ((currentblocksize < blocksize)
-                            && ((line = fbr.readLine()) != null)) {
+                            && ((line = readLine(fbr)) != null)) {
                         // as long as you have enough memory
                         if (counter < numHeader) {
                             counter++;
@@ -296,7 +296,7 @@ public class ExternalSort {
             for (String r : tmplist) {
                 // Skip duplicate lines
                 if (!distinct || (lastLine == null || (lastLine != null && cmp.compare(r, lastLine) != 0))) {
-                    fbw.write(r);
+                    writeLine(fbw, r);
                     fbw.newLine();
                     lastLine = r;
                 }
@@ -454,7 +454,7 @@ public class ExternalSort {
                 String r = bfb.pop();
                 // Skip duplicate lines
                 if (!distinct || (lastLine == null || (lastLine != null && cmp.compare(r, lastLine) != 0))) {
-                    fbw.write(r);
+                    writeLine(fbw, r);
                     fbw.newLine();
                     lastLine = r;
                 }
@@ -629,6 +629,14 @@ public class ExternalSort {
         }
     };
 
+    static String readLine(BufferedReader br) throws IOException {
+        return EscapeUtils.unescapeLineBreaks(br.readLine());
+    }
+
+    static void writeLine(BufferedWriter wr, String line) throws IOException {
+        wr.write(EscapeUtils.escapeLineBreak(line));
+    }
+
 }
 
 class BinaryFileBuffer {
@@ -648,7 +656,7 @@ class BinaryFileBuffer {
 
     private void reload() throws IOException {
         try {
-            if ((this.cache = this.fbr.readLine()) == null) {
+            if ((this.cache = ExternalSort.readLine(fbr)) == null) {
                 this.empty = true;
                 this.cache = null;
             } else {

Modified: jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java?rev=1703382&r1=1703381&r2=1703382&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java (original)
+++ jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java Wed Sep 16 10:42:25 2015
@@ -41,6 +41,9 @@ import org.apache.commons.io.LineIterato
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import static org.apache.jackrabbit.oak.commons.sort.EscapeUtils.escapeLineBreak;
+import static org.apache.jackrabbit.oak.commons.sort.EscapeUtils.unescapeLineBreaks;
+
 /**
  * Utility class to store a list of string and perform sort on that. For small size
  * the list would be maintained in memory. If the size crosses the required threshold then
@@ -139,7 +142,7 @@ public class StringSort implements Itera
     private void flushToFile(List<String> ids) throws IOException {
         BufferedWriter w = getPersistentState().getWriter();
         for (String id : ids) {
-            w.write(id);
+            w.write(escapeLineBreak(id));
             w.newLine();
         }
         ids.clear();
@@ -264,5 +267,10 @@ public class StringSort implements Itera
         public CloseableIterator(Reader reader) throws IllegalArgumentException {
             super(reader);
         }
+
+        @Override
+        public String next() {
+            return unescapeLineBreaks(super.next());
+        }
     }
 }

Added: jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/EscapeUtilsTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/EscapeUtilsTest.java?rev=1703382&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/EscapeUtilsTest.java (added)
+++ jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/EscapeUtilsTest.java Wed Sep 16 10:42:25 2015
@@ -0,0 +1,120 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.jackrabbit.oak.commons.sort;
+
+import java.util.Random;
+
+import org.apache.commons.lang3.RandomStringUtils;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+public class EscapeUtilsTest {
+
+    @Test
+    public void noOp() throws Exception{
+        assertEquals(null, EscapeUtils.escapeLineBreak(null));
+        assertEquals("abc", EscapeUtils.escapeLineBreak("abc"));
+        assertEquals("", EscapeUtils.escapeLineBreak(""));
+        assertEquals("some text with multi byte 田中 characters.",
+                EscapeUtils.escapeLineBreak("some text with multi byte 田中 characters."));
+    }
+
+    @Test
+    public void testEscape() throws Exception{
+        assertEquals("ab\\nc\\r", EscapeUtils.escapeLineBreak("ab\nc\r"));
+        assertEquals("a\\\\z", EscapeUtils.escapeLineBreak("a\\z"));
+        assertEquals("some text with multi \\nbyte 田中 characters.",
+                EscapeUtils.escapeLineBreak("some text with multi \nbyte 田中 characters."));
+    }
+
+    @Test
+    public void noOpUnEscape() throws Exception{
+        assertEquals(null, EscapeUtils.unescapeLineBreaks(null));
+        assertEquals("abc", EscapeUtils.unescapeLineBreaks("abc"));
+        assertEquals("abc\b", EscapeUtils.unescapeLineBreaks("abc\b"));
+        assertEquals("", EscapeUtils.unescapeLineBreaks(""));
+        assertEquals("some text with multi byte 田中 characters.",
+                EscapeUtils.unescapeLineBreaks("some text with multi byte 田中 characters."));
+    }
+
+    @Test
+    public void testUnEscape() throws Exception{
+        assertEquals("ab\nc\r", EscapeUtils.unescapeLineBreaks("ab\\nc\\r"));
+        assertEquals("a\\z", EscapeUtils.unescapeLineBreaks("a\\\\z"));
+        assertEquals("some text with multi \nbyte 田中 characters.",
+                EscapeUtils.unescapeLineBreaks("some text with multi \\nbyte 田中 characters."));
+    }
+
+    @Test
+    public void testEscapeUnEscape() throws Exception{
+        assertEscape("ab\nc\r");
+        assertEscape("a\\z");
+        assertEscape("a\\\\z\nc");
+        assertEscape("some text with multi \nbyte \r田中 characters\\.");
+    }
+
+    @Test(expected = IllegalStateException.class)
+    public void invalidUnEscape() throws Exception{
+        EscapeUtils.unescapeLineBreaks("abc\\");
+    }
+
+    @Test(expected = IllegalArgumentException.class)
+    public void invalidUnEscape2() throws Exception{
+        //Pass an unescaped string. In an escaped string a literal '\'
+        // would always be escaped
+        EscapeUtils.unescapeLineBreaks("abc\\k\\n");
+    }
+
+    @Test
+    public void randomized() throws Exception {
+        Random r = new Random(1);
+        for (int i = 0; i < 100000; i++) {
+            int len = r.nextInt(10);
+            StringBuilder buff = new StringBuilder();
+            for (int j = 0; j < len; j++) {
+                switch (r.nextInt(3)) {
+                    case 0:
+                        String s = "\\\r\nrnRN ";
+                        buff.append(s.charAt(r.nextInt(s.length())));
+                        break;
+                    case 1:
+                        buff.append(RandomStringUtils.random(4, true, false));
+                        break;
+                    case 2:
+                        buff.append((char) r.nextInt(65000));
+                        break;
+                }
+            }
+            String original = buff.toString();
+            String escaped = EscapeUtils.escapeLineBreak(original);
+            String unescaped = EscapeUtils.unescapeLineBreaks(escaped);
+            assertTrue(escaped.indexOf('\n') < 0);
+            assertTrue(escaped.indexOf('\r') < 0);
+            assertEquals(original, unescaped);
+        }
+    }
+
+    private static void assertEscape(String text){
+        String result = EscapeUtils.unescapeLineBreaks(EscapeUtils.escapeLineBreak(text));
+        assertEquals(text, result);
+    }
+}

Propchange: jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/EscapeUtilsTest.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/StringSortTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/StringSortTest.java?rev=1703382&r1=1703381&r2=1703382&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/StringSortTest.java (original)
+++ jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/StringSortTest.java Wed Sep 16 10:42:25 2015
@@ -32,6 +32,7 @@ import java.util.Set;
 import com.google.common.base.Joiner;
 import com.google.common.collect.Collections2;
 import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Lists;
 import org.junit.Test;
 
 import static org.junit.Assert.assertEquals;
@@ -61,6 +62,42 @@ public class StringSortTest {
         addPathsToCollector(paths);
 
         assertTrue(collector.usingFile());
+        assertConstraints(paths);
+
+        collector.close();
+    }
+
+    @Test
+    public void sortWithEntriesHavingLineBreaks() throws Exception{
+        List<String> paths = Lists.newArrayList("/a", "/a/b\nc", "/a/b\rd", "/a/b\r\ne", "/a/c");
+
+        collector = new StringSort(0, comparator);
+        addPathsToCollector(paths);
+
+        assertTrue(collector.usingFile());
+        assertConstraints(paths);
+
+        collector.close();
+    }
+
+    /**
+     * Test for the case where sorting order should not be affected by escaping
+     *
+     * "aa", "aa\n1", "aa\r2", "aa\\" -> "aa", "aa\n1", "aa\r2", "aa\\"
+     * "aa", "aa\\n1", "aa\\r2", "aa\\\\" -> "aa", "aa\\", "aa\n1", "aa\r2",
+     *
+     * In above case the sorting order for escaped string is different. So
+     * it needs to be ensured that sorting order remain un affected by escaping
+     * @throws Exception
+     */
+    @Test
+    public void sortWithEntriesHavingLineBreaks2() throws Exception{
+        List<String> paths = Lists.newArrayList("/a", "/a/a\nc", "/a/a\rd", "/a/a\r\ne", "/a/a\\");
+
+        collector = new StringSort(0, comparator);
+        addPathsToCollector(paths);
+
+        assertTrue(collector.usingFile());
         assertConstraints(paths);
 
         collector.close();

Modified: jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCDeletionTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCDeletionTest.java?rev=1703382&r1=1703381&r2=1703382&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCDeletionTest.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCDeletionTest.java Wed Sep 16 10:42:25 2015
@@ -165,7 +165,6 @@ public class VersionGCDeletionTest {
         }
     }
 
-    @Ignore("OAK-3395")
     @Test
     public void gcWithPathsHavingNewLine() throws Exception{
         int noOfDocsToDelete = 200;