You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by ch...@apache.org on 2015/09/16 12:42:26 UTC
svn commit: r1703382 - in /jackrabbit/oak/trunk: oak-commons/
oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/
oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/
oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/d...
Author: chetanm
Date: Wed Sep 16 10:42:25 2015
New Revision: 1703382
URL: http://svn.apache.org/r1703382
Log:
OAK-3395 - RevisionGC fails for JCR paths having line feed characters
Added:
jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/EscapeUtils.java (with props)
jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/EscapeUtilsTest.java (with props)
Modified:
jackrabbit/oak/trunk/oak-commons/pom.xml
jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/ExternalSort.java
jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java
jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/StringSortTest.java
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCDeletionTest.java
Modified: jackrabbit/oak/trunk/oak-commons/pom.xml
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/pom.xml?rev=1703382&r1=1703381&r2=1703382&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-commons/pom.xml (original)
+++ jackrabbit/oak/trunk/oak-commons/pom.xml Wed Sep 16 10:42:25 2015
@@ -109,5 +109,12 @@
<artifactId>commons-math3</artifactId>
<optional>true</optional>
</dependency>
+ <dependency>
+ <groupId>org.apache.commons</groupId>
+ <artifactId>commons-lang3</artifactId>
+ <version>3.3.2</version>
+ <optional>true</optional>
+ <scope>test</scope>
+ </dependency>
</dependencies>
</project>
Added: jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/EscapeUtils.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/EscapeUtils.java?rev=1703382&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/EscapeUtils.java (added)
+++ jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/EscapeUtils.java Wed Sep 16 10:42:25 2015
@@ -0,0 +1,135 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.jackrabbit.oak.commons.sort;
+
+import javax.annotation.Nullable;
+
+
+import static com.google.common.base.Preconditions.checkState;
+
+/**
+ * Utility class to escape '\n', '\r', '\' char
+ * while being written to file and unescape then upon getting
+ * read from file. This is used by StringSort and ExternalSort
+ * to handle data which contains line break. If left unescaped
+ * then such data interferes with the processing of such utilities
+ */
+abstract class EscapeUtils {
+
+ static String escapeLineBreak(@Nullable String line) {
+ if (line == null) {
+ return null;
+ }
+ if (escapingRequired(line)) {
+ line = escape(line);
+ }
+ return line;
+ }
+
+ static String unescapeLineBreaks(@Nullable String line) {
+ if (line == null) {
+ return null;
+ }
+ if (unescapingRequired(line)) {
+ line = unescape(line);
+ }
+ return line;
+ }
+
+ private static boolean escapingRequired(String line) {
+ int len = line.length();
+ for (int i = 0; i < len; i++) {
+ char c = line.charAt(i);
+ switch (c) {
+ case '\n':
+ case '\r':
+ case '\\':
+ return true;
+ }
+ }
+ return false;
+ }
+
+ private static boolean unescapingRequired(String line) {
+ return line.indexOf('\\') >= 0;
+ }
+
+ private static String escape(String line) {
+ int len = line.length();
+ StringBuilder sb = new StringBuilder(len + 1);
+ for (int i = 0; i < len; i++) {
+ char c = line.charAt(i);
+ /*
+ Here we need not worry about unicode chars
+ because UTF-16 represents supplementary characters using
+ code units whose values are not used for BMP characters
+ i.e. ASCII chars
+ */
+ switch (c) {
+ case '\n':
+ sb.append("\\n");
+ break;
+ case '\r':
+ sb.append("\\r");
+ break;
+ case '\\':
+ sb.append("\\\\");
+ break;
+ default:
+ sb.append(c);
+ }
+ }
+ return sb.toString();
+ }
+
+ private static String unescape(String line) {
+ int len = line.length();
+ StringBuilder sb = new StringBuilder(len - 1);
+ for (int i = 0; i < len; i++) {
+ char c = line.charAt(i);
+ if (c == '\\') {
+ checkState(i < len - 1, "Expected one more char after '\\' at [%s] in [%s]", i, line);
+ char nextChar = line.charAt(i + 1);
+ switch (nextChar) {
+ case 'n':
+ sb.append('\n');
+ i++;
+ break;
+ case 'r':
+ sb.append('\r');
+ i++;
+ break;
+ case '\\':
+ sb.append('\\');
+ i++;
+ break;
+ default:
+ String msg = String.format("Unexpected char [%c] found at %d of [%s]. " +
+ "Expected '\\' or 'r' or 'n", nextChar, i, line);
+ throw new IllegalArgumentException(msg);
+ }
+ } else {
+ sb.append(c);
+ }
+ }
+ return sb.toString();
+ }
+
+}
Propchange: jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/EscapeUtils.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/ExternalSort.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/ExternalSort.java?rev=1703382&r1=1703381&r2=1703382&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/ExternalSort.java (original)
+++ jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/ExternalSort.java Wed Sep 16 10:42:25 2015
@@ -201,7 +201,7 @@ public class ExternalSort {
// in bytes
long currentblocksize = 0;
while ((currentblocksize < blocksize)
- && ((line = fbr.readLine()) != null)) {
+ && ((line = readLine(fbr)) != null)) {
// as long as you have enough memory
if (counter < numHeader) {
counter++;
@@ -296,7 +296,7 @@ public class ExternalSort {
for (String r : tmplist) {
// Skip duplicate lines
if (!distinct || (lastLine == null || (lastLine != null && cmp.compare(r, lastLine) != 0))) {
- fbw.write(r);
+ writeLine(fbw, r);
fbw.newLine();
lastLine = r;
}
@@ -454,7 +454,7 @@ public class ExternalSort {
String r = bfb.pop();
// Skip duplicate lines
if (!distinct || (lastLine == null || (lastLine != null && cmp.compare(r, lastLine) != 0))) {
- fbw.write(r);
+ writeLine(fbw, r);
fbw.newLine();
lastLine = r;
}
@@ -629,6 +629,14 @@ public class ExternalSort {
}
};
+ static String readLine(BufferedReader br) throws IOException {
+ return EscapeUtils.unescapeLineBreaks(br.readLine());
+ }
+
+ static void writeLine(BufferedWriter wr, String line) throws IOException {
+ wr.write(EscapeUtils.escapeLineBreak(line));
+ }
+
}
class BinaryFileBuffer {
@@ -648,7 +656,7 @@ class BinaryFileBuffer {
private void reload() throws IOException {
try {
- if ((this.cache = this.fbr.readLine()) == null) {
+ if ((this.cache = ExternalSort.readLine(fbr)) == null) {
this.empty = true;
this.cache = null;
} else {
Modified: jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java?rev=1703382&r1=1703381&r2=1703382&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java (original)
+++ jackrabbit/oak/trunk/oak-commons/src/main/java/org/apache/jackrabbit/oak/commons/sort/StringSort.java Wed Sep 16 10:42:25 2015
@@ -41,6 +41,9 @@ import org.apache.commons.io.LineIterato
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import static org.apache.jackrabbit.oak.commons.sort.EscapeUtils.escapeLineBreak;
+import static org.apache.jackrabbit.oak.commons.sort.EscapeUtils.unescapeLineBreaks;
+
/**
* Utility class to store a list of string and perform sort on that. For small size
* the list would be maintained in memory. If the size crosses the required threshold then
@@ -139,7 +142,7 @@ public class StringSort implements Itera
private void flushToFile(List<String> ids) throws IOException {
BufferedWriter w = getPersistentState().getWriter();
for (String id : ids) {
- w.write(id);
+ w.write(escapeLineBreak(id));
w.newLine();
}
ids.clear();
@@ -264,5 +267,10 @@ public class StringSort implements Itera
public CloseableIterator(Reader reader) throws IllegalArgumentException {
super(reader);
}
+
+ @Override
+ public String next() {
+ return unescapeLineBreaks(super.next());
+ }
}
}
Added: jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/EscapeUtilsTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/EscapeUtilsTest.java?rev=1703382&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/EscapeUtilsTest.java (added)
+++ jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/EscapeUtilsTest.java Wed Sep 16 10:42:25 2015
@@ -0,0 +1,120 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.jackrabbit.oak.commons.sort;
+
+import java.util.Random;
+
+import org.apache.commons.lang3.RandomStringUtils;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+public class EscapeUtilsTest {
+
+ @Test
+ public void noOp() throws Exception{
+ assertEquals(null, EscapeUtils.escapeLineBreak(null));
+ assertEquals("abc", EscapeUtils.escapeLineBreak("abc"));
+ assertEquals("", EscapeUtils.escapeLineBreak(""));
+ assertEquals("some text with multi byte ç°ä¸ characters.",
+ EscapeUtils.escapeLineBreak("some text with multi byte ç°ä¸ characters."));
+ }
+
+ @Test
+ public void testEscape() throws Exception{
+ assertEquals("ab\\nc\\r", EscapeUtils.escapeLineBreak("ab\nc\r"));
+ assertEquals("a\\\\z", EscapeUtils.escapeLineBreak("a\\z"));
+ assertEquals("some text with multi \\nbyte ç°ä¸ characters.",
+ EscapeUtils.escapeLineBreak("some text with multi \nbyte ç°ä¸ characters."));
+ }
+
+ @Test
+ public void noOpUnEscape() throws Exception{
+ assertEquals(null, EscapeUtils.unescapeLineBreaks(null));
+ assertEquals("abc", EscapeUtils.unescapeLineBreaks("abc"));
+ assertEquals("abc\b", EscapeUtils.unescapeLineBreaks("abc\b"));
+ assertEquals("", EscapeUtils.unescapeLineBreaks(""));
+ assertEquals("some text with multi byte ç°ä¸ characters.",
+ EscapeUtils.unescapeLineBreaks("some text with multi byte ç°ä¸ characters."));
+ }
+
+ @Test
+ public void testUnEscape() throws Exception{
+ assertEquals("ab\nc\r", EscapeUtils.unescapeLineBreaks("ab\\nc\\r"));
+ assertEquals("a\\z", EscapeUtils.unescapeLineBreaks("a\\\\z"));
+ assertEquals("some text with multi \nbyte ç°ä¸ characters.",
+ EscapeUtils.unescapeLineBreaks("some text with multi \\nbyte ç°ä¸ characters."));
+ }
+
+ @Test
+ public void testEscapeUnEscape() throws Exception{
+ assertEscape("ab\nc\r");
+ assertEscape("a\\z");
+ assertEscape("a\\\\z\nc");
+ assertEscape("some text with multi \nbyte \rç°ä¸ characters\\.");
+ }
+
+ @Test(expected = IllegalStateException.class)
+ public void invalidUnEscape() throws Exception{
+ EscapeUtils.unescapeLineBreaks("abc\\");
+ }
+
+ @Test(expected = IllegalArgumentException.class)
+ public void invalidUnEscape2() throws Exception{
+ //Pass an unescaped string. In an escaped string a literal '\'
+ // would always be escaped
+ EscapeUtils.unescapeLineBreaks("abc\\k\\n");
+ }
+
+ @Test
+ public void randomized() throws Exception {
+ Random r = new Random(1);
+ for (int i = 0; i < 100000; i++) {
+ int len = r.nextInt(10);
+ StringBuilder buff = new StringBuilder();
+ for (int j = 0; j < len; j++) {
+ switch (r.nextInt(3)) {
+ case 0:
+ String s = "\\\r\nrnRN ";
+ buff.append(s.charAt(r.nextInt(s.length())));
+ break;
+ case 1:
+ buff.append(RandomStringUtils.random(4, true, false));
+ break;
+ case 2:
+ buff.append((char) r.nextInt(65000));
+ break;
+ }
+ }
+ String original = buff.toString();
+ String escaped = EscapeUtils.escapeLineBreak(original);
+ String unescaped = EscapeUtils.unescapeLineBreaks(escaped);
+ assertTrue(escaped.indexOf('\n') < 0);
+ assertTrue(escaped.indexOf('\r') < 0);
+ assertEquals(original, unescaped);
+ }
+ }
+
+ private static void assertEscape(String text){
+ String result = EscapeUtils.unescapeLineBreaks(EscapeUtils.escapeLineBreak(text));
+ assertEquals(text, result);
+ }
+}
Propchange: jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/EscapeUtilsTest.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/StringSortTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/StringSortTest.java?rev=1703382&r1=1703381&r2=1703382&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/StringSortTest.java (original)
+++ jackrabbit/oak/trunk/oak-commons/src/test/java/org/apache/jackrabbit/oak/commons/sort/StringSortTest.java Wed Sep 16 10:42:25 2015
@@ -32,6 +32,7 @@ import java.util.Set;
import com.google.common.base.Joiner;
import com.google.common.collect.Collections2;
import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Lists;
import org.junit.Test;
import static org.junit.Assert.assertEquals;
@@ -61,6 +62,42 @@ public class StringSortTest {
addPathsToCollector(paths);
assertTrue(collector.usingFile());
+ assertConstraints(paths);
+
+ collector.close();
+ }
+
+ @Test
+ public void sortWithEntriesHavingLineBreaks() throws Exception{
+ List<String> paths = Lists.newArrayList("/a", "/a/b\nc", "/a/b\rd", "/a/b\r\ne", "/a/c");
+
+ collector = new StringSort(0, comparator);
+ addPathsToCollector(paths);
+
+ assertTrue(collector.usingFile());
+ assertConstraints(paths);
+
+ collector.close();
+ }
+
+ /**
+ * Test for the case where sorting order should not be affected by escaping
+ *
+ * "aa", "aa\n1", "aa\r2", "aa\\" -> "aa", "aa\n1", "aa\r2", "aa\\"
+ * "aa", "aa\\n1", "aa\\r2", "aa\\\\" -> "aa", "aa\\", "aa\n1", "aa\r2",
+ *
+ * In above case the sorting order for escaped string is different. So
+ * it needs to be ensured that sorting order remain un affected by escaping
+ * @throws Exception
+ */
+ @Test
+ public void sortWithEntriesHavingLineBreaks2() throws Exception{
+ List<String> paths = Lists.newArrayList("/a", "/a/a\nc", "/a/a\rd", "/a/a\r\ne", "/a/a\\");
+
+ collector = new StringSort(0, comparator);
+ addPathsToCollector(paths);
+
+ assertTrue(collector.usingFile());
assertConstraints(paths);
collector.close();
Modified: jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCDeletionTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCDeletionTest.java?rev=1703382&r1=1703381&r2=1703382&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCDeletionTest.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/document/VersionGCDeletionTest.java Wed Sep 16 10:42:25 2015
@@ -165,7 +165,6 @@ public class VersionGCDeletionTest {
}
}
- @Ignore("OAK-3395")
@Test
public void gcWithPathsHavingNewLine() throws Exception{
int noOfDocsToDelete = 200;