You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jackrabbit.apache.org by re...@apache.org on 2011/09/20 17:36:11 UTC

svn commit: r1173196 - in /jackrabbit/trunk/jackrabbit-core/src: main/java/org/apache/jackrabbit/core/query/lucene/ test/java/org/apache/jackrabbit/core/query/

Author: reschke
Date: Tue Sep 20 15:36:10 2011
New Revision: 1173196

URL: http://svn.apache.org/viewvc?rev=1173196&view=rev
Log:
JCR-3077: WeightedHighlighter does not encode XML markup characters

Refactor escaping in DefaultHighlighter a bit; allow derived classes to override it; let WeightedHighlighter inherit the default; add test case.

Modified:
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/DefaultHighlighter.java
    jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/WeightedHighlighter.java
    jackrabbit/trunk/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/query/ExcerptTest.java

Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/DefaultHighlighter.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/DefaultHighlighter.java?rev=1173196&r1=1173195&r2=1173196&view=diff
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/DefaultHighlighter.java (original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/DefaultHighlighter.java Tue Sep 20 15:36:10 2011
@@ -248,8 +248,8 @@ public class DefaultHighlighter {
                     if (skippedChars > surround) {
                         skippedChars = surround;
                     }
-                    sb.append(Text.encodeIllegalXMLCharacters(
-                            new String(cbuf, 0, surround - skippedChars)));
+                    sb.append(escape(new String(cbuf, 0, surround
+                            - skippedChars)));
                     sb.append(fragmentEnd);
                 }
             }
@@ -296,8 +296,8 @@ public class DefaultHighlighter {
             if (!sentenceStart) {
                 sb.append("... ");
             }
-            sb.append(Text.encodeIllegalXMLCharacters(
-                    new String(cbuf, skippedChars, cbuf.length - skippedChars)));
+            sb.append(escape(new String(cbuf, skippedChars, cbuf.length
+                    - skippedChars)));
 
             // iterate terms
             for (Iterator<TermVectorOffsetInfo> iter = fi.iterator(); iter.hasNext();) {
@@ -307,8 +307,7 @@ public class DefaultHighlighter {
                     cbuf = new char[nextStart - pos];
                     int charsRead = reader.read(cbuf, 0, nextStart - pos);
                     pos += (nextStart - pos);
-                    sb.append(Text.encodeIllegalXMLCharacters(
-                            new String(cbuf, 0, charsRead)));
+                    sb.append(escape(new String(cbuf, 0, charsRead)));
                 }
                 sb.append(hlStart);
                 nextStart = ti.getEndOffset();
@@ -316,8 +315,7 @@ public class DefaultHighlighter {
                 cbuf = new char[nextStart - pos];
                 reader.read(cbuf, 0, nextStart - pos);
                 pos += (nextStart - pos);
-                sb.append(Text.encodeIllegalXMLCharacters(
-                        new String(cbuf)));
+                sb.append(escape(new String(cbuf)));
                 sb.append(hlEnd);
             }
         }
@@ -343,8 +341,8 @@ public class DefaultHighlighter {
                 } else {
                     skippedChars = 0;
                 }
-                sb.append(Text.encodeIllegalXMLCharacters(
-                        new String(cbuf, 0, EOF ? skip : (surround - skippedChars))));
+                sb.append(escape(new String(cbuf, 0, EOF ? skip
+                        : (surround - skippedChars))));
                 if (!EOF) {
                     char lastChar = sb.charAt(sb.length() - 1);
                     if (lastChar != '.' && lastChar != '!' && lastChar != '?') {
@@ -364,7 +362,7 @@ public class DefaultHighlighter {
      * @param text the text.
      * @param excerptStart the excerpt start.
      * @param excerptEnd the excerpt end.
-     * @param fragmentStart the fragement start.
+     * @param fragmentStart the fragment start.
      * @param fragmentEnd the fragment end.
      * @param maxLength the maximum length of the fragment.
      * @return a default excerpt.
@@ -393,10 +391,24 @@ public class DefaultHighlighter {
                 }
             }
         }
-        excerpt.append(Text.encodeIllegalXMLCharacters(tmp.toString()));
+        excerpt.append(escape(tmp.toString()));
         excerpt.append(fragmentEnd).append(excerptEnd);
         return excerpt.toString();
     }
+    
+    
+    /**
+     * Escapes input text suitable for the output format.
+     * <p>
+     * By default does XML-escaping. Can be overridden for
+     * other formats.
+     * 
+     * @param input raw text.
+     * @return text suitably escaped.
+     */
+    protected String escape(String input) {
+        return Text.encodeIllegalXMLCharacters(input);
+    }
 
     private static class FragmentInfo {
         List<TermVectorOffsetInfo> offsetInfosList;

Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/WeightedHighlighter.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/WeightedHighlighter.java?rev=1173196&r1=1173195&r2=1173196&view=diff
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/WeightedHighlighter.java (original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/WeightedHighlighter.java Tue Sep 20 15:36:10 2011
@@ -195,14 +195,17 @@ public class WeightedHighlighter extends
                 TermVectorOffsetInfo oi = fIt.next();
                 if (lastOffsetInfo != null) {
                     // fill in text between terms
-                    sb.append(text.substring(lastOffsetInfo.getEndOffset(), oi.getStartOffset()));
+                    sb.append(escape(text.substring(
+                            lastOffsetInfo.getEndOffset(), oi.getStartOffset())));
                 }
                 sb.append(hlStart);
-                sb.append(text.substring(oi.getStartOffset(), oi.getEndOffset()));
+                sb.append(escape(text.substring(oi.getStartOffset(),
+                        oi.getEndOffset())));
                 sb.append(hlEnd);
                 lastOffsetInfo = oi;
             }
-            limit = Math.min(text.length(), fi.getStartOffset() - len + (surround * 2));
+            limit = Math.min(text.length(), fi.getStartOffset() - len
+                    + (surround * 2));
             endFragment(sb, text, fi.getEndOffset(), limit);
             sb.append(fragmentEnd);
         }
@@ -223,10 +226,10 @@ public class WeightedHighlighter extends
      * @return the length of the start fragment that was appended to
      *         <code>sb</code>.
      */
-    private static int startFragment(StringBuffer sb, String text, int offset, int limit) {
+    private int startFragment(StringBuffer sb, String text, int offset, int limit) {
         if (limit == 0) {
             // append all
-            sb.append(text.substring(0, offset));
+            sb.append(escape(text.substring(0, offset)));
             return offset;
         }
         String intro = "... ";
@@ -242,7 +245,7 @@ public class WeightedHighlighter extends
                 }
             }
         }
-        sb.append(intro).append(text.substring(start, offset));
+        sb.append(intro).append(escape(text.substring(start, offset)));
         return offset - start;
     }
 
@@ -256,10 +259,10 @@ public class WeightedHighlighter extends
      * @param offset the end offset of the last matching term in the fragment.
      * @param limit  do not go further than <code>limit</code>.
      */
-    private static void endFragment(StringBuffer sb, String text, int offset, int limit) {
+    private void endFragment(StringBuffer sb, String text, int offset, int limit) {
         if (limit == text.length()) {
             // append all
-            sb.append(text.substring(offset));
+            sb.append(escape(text.substring(offset)));
             return;
         }
         int end = offset;
@@ -269,7 +272,7 @@ public class WeightedHighlighter extends
                 end = i;
             }
         }
-        sb.append(text.substring(offset, end)).append(" ...");
+        sb.append(escape(text.substring(offset, end))).append(" ...");
     }
 
     private static class FragmentInfo {

Modified: jackrabbit/trunk/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/query/ExcerptTest.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/query/ExcerptTest.java?rev=1173196&r1=1173195&r2=1173196&view=diff
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/query/ExcerptTest.java (original)
+++ jackrabbit/trunk/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/query/ExcerptTest.java Tue Sep 20 15:36:10 2011
@@ -119,6 +119,20 @@ public class ExcerptTest extends Abstrac
         assertEquals(excerpt, rows.nextRow().getValue("rep:excerpt(text)").getString());
     }
 
+    public void testEncodeIllegalCharsHighlights() throws RepositoryException {
+        String text = "bla <strong>bla</strong> foo";
+        String excerpt = createExcerpt("bla &lt;strong&gt;bla&lt;/strong&gt; <strong>foo</strong>");
+        Node n = testRootNode.addNode(nodeName1);
+        n.setProperty("text", text);
+        superuser.save();
+
+        String stmt = getStatement("foo");
+        QueryResult result = executeQuery(stmt);
+        RowIterator rows = result.getRows();
+        assertEquals(1, rows.getSize());
+        assertEquals(excerpt, rows.nextRow().getValue("rep:excerpt(text)").getString());
+    }
+
     private void checkExcerpt(String text, String fragmentText, String terms)
             throws RepositoryException {
         String excerpt = createExcerpt(fragmentText);