You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by us...@apache.org on 2013/08/30 00:01:47 UTC

svn commit: r1518839 - in /lucene/dev/trunk/lucene: CHANGES.txt highlighter/src/java/org/apache/lucene/search/highlight/SimpleHTMLEncoder.java highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java

Author: uschindler
Date: Thu Aug 29 22:01:46 2013
New Revision: 1518839

URL: http://svn.apache.org/r1518839
Log:
LUCENE-5191: Fix Unicode corrumption in HTML escaping of Standard Highlighter and Fast Vector Highlighter.

Modified:
    lucene/dev/trunk/lucene/CHANGES.txt
    lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/SimpleHTMLEncoder.java
    lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java

Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1518839&r1=1518838&r2=1518839&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Thu Aug 29 22:01:46 2013
@@ -152,6 +152,11 @@ Bug Fixes
 * LUCENE-5192: IndexWriter could allow adding same field name with different
   DocValueTypes under some circumstances. (Shai Erera)
 
+* LUCENE-5191: SimpleHTMLEncoder in Highlighter module broke Unicode
+  outside BMP because it encoded UTF-16 chars instead of codepoints.
+  The escaping of codepoints > 127 was removed (not needed for valid HTML)
+  and missing escaping for ' and / was added.  (Uwe Schindler)
+  
 API Changes
 
 * LUCENE-5094: Add ramBytesUsed() to MultiDocValues.OrdinalMap.

Modified: lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/SimpleHTMLEncoder.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/SimpleHTMLEncoder.java?rev=1518839&r1=1518838&r2=1518839&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/SimpleHTMLEncoder.java (original)
+++ lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/SimpleHTMLEncoder.java Thu Aug 29 22:01:46 2013
@@ -47,33 +47,27 @@ public class SimpleHTMLEncoder implement
     {
       char ch = plainText.charAt(index);
 
-      switch (ch)
-      {
+      switch (ch) {
       case '"':
         result.append(""");
         break;
-
       case '&':
         result.append("&");
         break;
-
       case '<':
         result.append("&lt;");
         break;
-
       case '>':
         result.append("&gt;");
         break;
-
+      case '\'':
+        result.append("&#x27;");
+        break;
+      case '/':
+        result.append("&#x2F;");
+        break;
       default:
-           if (ch < 128)
-           {
-                 result.append(ch);
-             }
-           else
-             {
-                 result.append("&#").append((int)ch).append(";");
-             }
+        result.append(ch);
       }
     }
 

Modified: lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java?rev=1518839&r1=1518838&r2=1518839&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java (original)
+++ lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java Thu Aug 29 22:01:46 2013
@@ -85,7 +85,7 @@ public class SimpleFragmentsBuilderTest 
     SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
     String[] preTags = { "[" };
     String[] postTags = { "]" };
-    assertEquals( "&lt;h1&gt; [a] &lt;/h1&gt;",
+    assertEquals( "&lt;h1&gt; [a] &lt;&#x2F;h1&gt;",
         sfb.createFragment( reader, 0, F, ffl, preTags, postTags, new SimpleHTMLEncoder() ) );
   }