You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by th...@apache.org on 2015/06/17 16:07:40 UTC

svn commit: r1686015 - in /jackrabbit/oak/trunk/oak-core/src: main/java/org/apache/jackrabbit/oak/query/fulltext/ test/java/org/apache/jackrabbit/oak/query/fulltext/

Author: thomasm
Date: Wed Jun 17 14:07:39 2015
New Revision: 1686015

URL: http://svn.apache.org/r1686015
Log:
OAK-3000 SimpleExcerptProvider causes OOM for some wildcard expressions

Added:
    jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/query/fulltext/
    jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/query/fulltext/SimpleExcerptProviderTest.java
Modified:
    jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/SimpleExcerptProvider.java

Modified: jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/SimpleExcerptProvider.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/SimpleExcerptProvider.java?rev=1686015&r1=1686014&r2=1686015&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/SimpleExcerptProvider.java (original)
+++ jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/SimpleExcerptProvider.java Wed Jun 17 14:07:39 2015
@@ -18,6 +18,7 @@ package org.apache.jackrabbit.oak.query.
 
 import static org.apache.jackrabbit.util.Text.encodeIllegalXMLCharacters;
 
+import java.util.BitSet;
 import java.util.HashSet;
 import java.util.Set;
 
@@ -190,46 +191,61 @@ public class SimpleExcerptProvider {
         return excerpt.toString();
     }
 
-    private static String highlight(StringBuilder text, Set<String> searchToken) {
+    static String highlight(StringBuilder text, Set<String> searchToken) {
         Set<String> tokens = tokenize(searchToken);
-        text = new StringBuilder(encodeIllegalXMLCharacters(text.toString()));
+        String escaped = encodeIllegalXMLCharacters(text.toString());
+        BitSet highlight = new BitSet();
         for (String token : tokens) {
-            text = replaceAll(text, token, "<strong>", "</strong>");
+            highlight(escaped, highlight, token);
         }
-
         StringBuilder excerpt = new StringBuilder("<div><span>");
-        excerpt.append(text.toString());
+        boolean strong = false;
+        for (int i = 0; i < escaped.length(); i++) {
+            if (highlight.get(i) && !strong) {
+                strong = true;
+                excerpt.append("<strong>");
+            } else if (!highlight.get(i) && strong) {
+                strong = false;
+                excerpt.append("</strong>");
+            }
+            excerpt.append(escaped.charAt(i));
+        }
+        if (strong) {
+            excerpt.append("</strong>");
+        }
         excerpt.append("</span></div>");
         return excerpt.toString();
     }
-
-    private static StringBuilder replaceAll(StringBuilder in, String token,
-            String start, String end) {
+    
+    private static void highlight(String text, BitSet highlightBits, String token) {
         boolean isLike = false;
         if (token.endsWith("*")) {
+            if (token.length() == 1) {
+                // don't highlight the '*' character itself
+                return;
+            }
             token = token.substring(0, token.length() - 1);
             isLike = true;
         }
-        int index = in.indexOf(token);
-        while (index != -1) {
+        int index = 0;
+        while (index < text.length()) {
+            index = text.indexOf(token, index);
+            if (index < 0) {
+                break;
+            }
             int endIndex = index + token.length();
             if (isLike) {
-                int nextSpace = in.indexOf(" ", endIndex);
+                int nextSpace = text.indexOf(" ", endIndex);
                 if (nextSpace != -1) {
                     endIndex = nextSpace;
                 } else {
-                    endIndex = in.length();
+                    endIndex = text.length();
                 }
             }
-            String current = in.substring(index, endIndex);
-            StringBuilder newToken = new StringBuilder(start);
-            newToken.append(current);
-            newToken.append(end);
-            String newTokenS = newToken.toString();
-            in.replace(index, index + current.length(), newTokenS);
-            index = in.indexOf(token,
-                    in.lastIndexOf(newTokenS) + newTokenS.length());
+            while (index < endIndex) {
+                highlightBits.set(index++);
+            }
         }
-        return in;
     }
+    
 }

Added: jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/query/fulltext/SimpleExcerptProviderTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/query/fulltext/SimpleExcerptProviderTest.java?rev=1686015&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/query/fulltext/SimpleExcerptProviderTest.java (added)
+++ jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/query/fulltext/SimpleExcerptProviderTest.java Wed Jun 17 14:07:39 2015
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.jackrabbit.oak.query.fulltext;
+
+import static com.google.common.collect.ImmutableSet.of;
+import static org.apache.jackrabbit.oak.query.fulltext.SimpleExcerptProvider.highlight;
+import static org.junit.Assert.assertEquals;
+
+import java.util.Random;
+
+import org.junit.Test;
+
+public class SimpleExcerptProviderTest {
+
+    @Test
+    public void simpleTest() throws Exception {
+        assertEquals("<div><span><strong>fox</strong> is jumping</span></div>",
+                highlight(sb("fox is jumping"), of("fox")));
+        assertEquals("<div><span>fox is <strong>jumping</strong></span></div>",
+                highlight(sb("fox is jumping"), of("jump*")));
+
+    }
+
+    @Test
+    public void highlightWithWildCard() throws Exception {
+        assertEquals("<div><span><strong>fox</strong> is jumping</span></div>",
+                highlight(sb("fox is jumping"), of("fox *")));
+    }
+
+    @Test
+    public void highlightIgnoreStar() throws Exception {
+        assertEquals("<div><span>10 * 10</span></div>",
+                highlight(sb("10 * 10"), of("fox *")));
+    }
+
+    @Test
+    public void randomized() throws Exception {
+        Random r = new Random(1);
+        String set = "abc*\'\"<> ";
+        for (int i = 0; i < 10000; i++) {
+            highlight(sb(randomString(r, set)), of(randomString(r, set)));
+        }
+    }
+
+    private static String randomString(Random r, String set) {
+        int len = r.nextInt(10);
+        StringBuilder buff = new StringBuilder();
+        for (int i = 0; i < len; i++) {
+            buff.append(set.charAt(r.nextInt(set.length())));
+        }
+        return buff.toString();
+    }
+
+    private static StringBuilder sb(String text) {
+        return new StringBuilder(text);
+    }
+}
\ No newline at end of file