You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@lucene.apache.org by rm...@apache.org on 2010/07/23 17:41:43 UTC

svn commit: r967137 - /lucene/dev/branches/preflexfixes/lucene/src/java/org/apache/lucene/util/BytesRef.java

Author: rmuir
Date: Fri Jul 23 15:41:43 2010
New Revision: 967137

URL: http://svn.apache.org/viewvc?rev=967137&view=rev
Log:
remove nocommits, this isnt really evil

Modified:
    lucene/dev/branches/preflexfixes/lucene/src/java/org/apache/lucene/util/BytesRef.java

Modified: lucene/dev/branches/preflexfixes/lucene/src/java/org/apache/lucene/util/BytesRef.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/preflexfixes/lucene/src/java/org/apache/lucene/util/BytesRef.java?rev=967137&r1=967136&r2=967137&view=diff
==============================================================================
--- lucene/dev/branches/preflexfixes/lucene/src/java/org/apache/lucene/util/BytesRef.java (original)
+++ lucene/dev/branches/preflexfixes/lucene/src/java/org/apache/lucene/util/BytesRef.java Fri Jul 23 15:41:43 2010
@@ -331,13 +331,16 @@ public final class BytesRef implements C
           // We know the terms are not equal, but, we may
           // have to carefully fixup the bytes at the
           // difference to match UTF16's sort order:
+          
+          // NOTE: instead of moving supplementary code points (0xee and 0xef) to the unused 0xfe and 0xff, 
+          // we move them to the unused 0xfc and 0xfd [reserved for future 6-byte character sequences]
+          // this reserves 0xff for preflex's term reordering (surrogate dance), and if unicode grows such
+          // that 6-byte sequences are needed we have much bigger problems anyway.
           if (aByte >= 0xee && bByte >= 0xee) {
             if ((aByte & 0xfe) == 0xee) {
-              // nocommit
               aByte += 0xe;
             }
             if ((bByte&0xfe) == 0xee) {
-              // nocommit
               bByte += 0xe;
             }
           }