You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by us...@apache.org on 2013/08/30 00:01:47 UTC
svn commit: r1518839 - in /lucene/dev/trunk/lucene: CHANGES.txt
highlighter/src/java/org/apache/lucene/search/highlight/SimpleHTMLEncoder.java
highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java
Author: uschindler
Date: Thu Aug 29 22:01:46 2013
New Revision: 1518839
URL: http://svn.apache.org/r1518839
Log:
LUCENE-5191: Fix Unicode corrumption in HTML escaping of Standard Highlighter and Fast Vector Highlighter.
Modified:
lucene/dev/trunk/lucene/CHANGES.txt
lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/SimpleHTMLEncoder.java
lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java
Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1518839&r1=1518838&r2=1518839&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Thu Aug 29 22:01:46 2013
@@ -152,6 +152,11 @@ Bug Fixes
* LUCENE-5192: IndexWriter could allow adding same field name with different
DocValueTypes under some circumstances. (Shai Erera)
+* LUCENE-5191: SimpleHTMLEncoder in Highlighter module broke Unicode
+ outside BMP because it encoded UTF-16 chars instead of codepoints.
+ The escaping of codepoints > 127 was removed (not needed for valid HTML)
+ and missing escaping for ' and / was added. (Uwe Schindler)
+
API Changes
* LUCENE-5094: Add ramBytesUsed() to MultiDocValues.OrdinalMap.
Modified: lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/SimpleHTMLEncoder.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/SimpleHTMLEncoder.java?rev=1518839&r1=1518838&r2=1518839&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/SimpleHTMLEncoder.java (original)
+++ lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/SimpleHTMLEncoder.java Thu Aug 29 22:01:46 2013
@@ -47,33 +47,27 @@ public class SimpleHTMLEncoder implement
{
char ch = plainText.charAt(index);
- switch (ch)
- {
+ switch (ch) {
case '"':
result.append(""");
break;
-
case '&':
result.append("&");
break;
-
case '<':
result.append("<");
break;
-
case '>':
result.append(">");
break;
-
+ case '\'':
+ result.append("'");
+ break;
+ case '/':
+ result.append("/");
+ break;
default:
- if (ch < 128)
- {
- result.append(ch);
- }
- else
- {
- result.append("&#").append((int)ch).append(";");
- }
+ result.append(ch);
}
}
Modified: lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java?rev=1518839&r1=1518838&r2=1518839&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java (original)
+++ lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java Thu Aug 29 22:01:46 2013
@@ -85,7 +85,7 @@ public class SimpleFragmentsBuilderTest
SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
String[] preTags = { "[" };
String[] postTags = { "]" };
- assertEquals( "<h1> [a] </h1>",
+ assertEquals( "<h1> [a] </h1>",
sfb.createFragment( reader, 0, F, ffl, preTags, postTags, new SimpleHTMLEncoder() ) );
}