You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by th...@apache.org on 2015/06/17 18:59:56 UTC
svn commit: r1686076 - in /jackrabbit/oak/branches/1.2/oak-core/src:
main/java/org/apache/jackrabbit/oak/query/fulltext/
test/java/org/apache/jackrabbit/oak/query/fulltext/
Author: thomasm
Date: Wed Jun 17 16:59:56 2015
New Revision: 1686076
URL: http://svn.apache.org/r1686076
Log:
OAK-3000 SimpleExcerptProvider causes OOM for some wildcard expressions
Added:
jackrabbit/oak/branches/1.2/oak-core/src/test/java/org/apache/jackrabbit/oak/query/fulltext/
jackrabbit/oak/branches/1.2/oak-core/src/test/java/org/apache/jackrabbit/oak/query/fulltext/SimpleExcerptProviderTest.java
Modified:
jackrabbit/oak/branches/1.2/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/SimpleExcerptProvider.java
Modified: jackrabbit/oak/branches/1.2/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/SimpleExcerptProvider.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.2/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/SimpleExcerptProvider.java?rev=1686076&r1=1686075&r2=1686076&view=diff
==============================================================================
--- jackrabbit/oak/branches/1.2/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/SimpleExcerptProvider.java (original)
+++ jackrabbit/oak/branches/1.2/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/SimpleExcerptProvider.java Wed Jun 17 16:59:56 2015
@@ -18,6 +18,7 @@ package org.apache.jackrabbit.oak.query.
import static org.apache.jackrabbit.util.Text.encodeIllegalXMLCharacters;
+import java.util.BitSet;
import java.util.HashSet;
import java.util.Set;
@@ -190,46 +191,61 @@ public class SimpleExcerptProvider {
return excerpt.toString();
}
- private static String highlight(StringBuilder text, Set<String> searchToken) {
+ static String highlight(StringBuilder text, Set<String> searchToken) {
Set<String> tokens = tokenize(searchToken);
- text = new StringBuilder(encodeIllegalXMLCharacters(text.toString()));
+ String escaped = encodeIllegalXMLCharacters(text.toString());
+ BitSet highlight = new BitSet();
for (String token : tokens) {
- text = replaceAll(text, token, "<strong>", "</strong>");
+ highlight(escaped, highlight, token);
}
-
StringBuilder excerpt = new StringBuilder("<div><span>");
- excerpt.append(text.toString());
+ boolean strong = false;
+ for (int i = 0; i < escaped.length(); i++) {
+ if (highlight.get(i) && !strong) {
+ strong = true;
+ excerpt.append("<strong>");
+ } else if (!highlight.get(i) && strong) {
+ strong = false;
+ excerpt.append("</strong>");
+ }
+ excerpt.append(escaped.charAt(i));
+ }
+ if (strong) {
+ excerpt.append("</strong>");
+ }
excerpt.append("</span></div>");
return excerpt.toString();
}
-
- private static StringBuilder replaceAll(StringBuilder in, String token,
- String start, String end) {
+
+ private static void highlight(String text, BitSet highlightBits, String token) {
boolean isLike = false;
if (token.endsWith("*")) {
+ if (token.length() == 1) {
+ // don't highlight the '*' character itself
+ return;
+ }
token = token.substring(0, token.length() - 1);
isLike = true;
}
- int index = in.indexOf(token);
- while (index != -1) {
+ int index = 0;
+ while (index < text.length()) {
+ index = text.indexOf(token, index);
+ if (index < 0) {
+ break;
+ }
int endIndex = index + token.length();
if (isLike) {
- int nextSpace = in.indexOf(" ", endIndex);
+ int nextSpace = text.indexOf(" ", endIndex);
if (nextSpace != -1) {
endIndex = nextSpace;
} else {
- endIndex = in.length();
+ endIndex = text.length();
}
}
- String current = in.substring(index, endIndex);
- StringBuilder newToken = new StringBuilder(start);
- newToken.append(current);
- newToken.append(end);
- String newTokenS = newToken.toString();
- in.replace(index, index + current.length(), newTokenS);
- index = in.indexOf(token,
- in.lastIndexOf(newTokenS) + newTokenS.length());
+ while (index < endIndex) {
+ highlightBits.set(index++);
+ }
}
- return in;
}
+
}
Added: jackrabbit/oak/branches/1.2/oak-core/src/test/java/org/apache/jackrabbit/oak/query/fulltext/SimpleExcerptProviderTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.2/oak-core/src/test/java/org/apache/jackrabbit/oak/query/fulltext/SimpleExcerptProviderTest.java?rev=1686076&view=auto
==============================================================================
--- jackrabbit/oak/branches/1.2/oak-core/src/test/java/org/apache/jackrabbit/oak/query/fulltext/SimpleExcerptProviderTest.java (added)
+++ jackrabbit/oak/branches/1.2/oak-core/src/test/java/org/apache/jackrabbit/oak/query/fulltext/SimpleExcerptProviderTest.java Wed Jun 17 16:59:56 2015
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.jackrabbit.oak.query.fulltext;
+
+import static com.google.common.collect.ImmutableSet.of;
+import static org.apache.jackrabbit.oak.query.fulltext.SimpleExcerptProvider.highlight;
+import static org.junit.Assert.assertEquals;
+
+import java.util.Random;
+
+import org.junit.Test;
+
+public class SimpleExcerptProviderTest {
+
+ @Test
+ public void simpleTest() throws Exception {
+ assertEquals("<div><span><strong>fox</strong> is jumping</span></div>",
+ highlight(sb("fox is jumping"), of("fox")));
+ assertEquals("<div><span>fox is <strong>jumping</strong></span></div>",
+ highlight(sb("fox is jumping"), of("jump*")));
+
+ }
+
+ @Test
+ public void highlightWithWildCard() throws Exception {
+ assertEquals("<div><span><strong>fox</strong> is jumping</span></div>",
+ highlight(sb("fox is jumping"), of("fox *")));
+ }
+
+ @Test
+ public void highlightIgnoreStar() throws Exception {
+ assertEquals("<div><span>10 * 10</span></div>",
+ highlight(sb("10 * 10"), of("fox *")));
+ }
+
+ @Test
+ public void randomized() throws Exception {
+ Random r = new Random(1);
+ String set = "abc*\'\"<> ";
+ for (int i = 0; i < 10000; i++) {
+ highlight(sb(randomString(r, set)), of(randomString(r, set)));
+ }
+ }
+
+ private static String randomString(Random r, String set) {
+ int len = r.nextInt(10);
+ StringBuilder buff = new StringBuilder();
+ for (int i = 0; i < len; i++) {
+ buff.append(set.charAt(r.nextInt(set.length())));
+ }
+ return buff.toString();
+ }
+
+ private static StringBuilder sb(String text) {
+ return new StringBuilder(text);
+ }
+}
\ No newline at end of file