You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2010/07/26 21:31:34 UTC
svn commit: r979415 -
/lucene/dev/branches/preflexfixes/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java
Author: mikemccand
Date: Mon Jul 26 19:31:34 2010
New Revision: 979415
URL: http://svn.apache.org/viewvc?rev=979415&view=rev
Log:
LUCENE-2554: add comment explaining why we can't assert valid UTF8 when dancing
Modified:
lucene/dev/branches/preflexfixes/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java
Modified: lucene/dev/branches/preflexfixes/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/preflexfixes/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java?rev=979415&r1=979414&r2=979415&view=diff
==============================================================================
--- lucene/dev/branches/preflexfixes/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java (original)
+++ lucene/dev/branches/preflexfixes/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java Mon Jul 26 19:31:34 2010
@@ -290,9 +290,10 @@ public class PreFlexFields extends Field
// unicode character:
assert isHighBMPChar(term.bytes, pos);
- // TODO: understand why this assert sometimes (rarely)
- // trips!
- // assert term.length >= pos + 3: "term.length=" + term.length + " pos+3=" + (pos+3);
+ // NOTE: we cannot make this assert, because
+ // AutomatonQuery legitimately sends us malformed UTF8
+ // (eg the UTF8 bytes with just 0xee)
+ // assert term.length >= pos + 3: "term.length=" + term.length + " pos+3=" + (pos+3) + " byte=" + Integer.toHexString(term.bytes[pos]) + " term=" + term.toString();
// Save the bytes && length, since we need to
// restore this if seek "back" finds no matching