You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2010/07/23 17:41:43 UTC
svn commit: r967137 -
/lucene/dev/branches/preflexfixes/lucene/src/java/org/apache/lucene/util/BytesRef.java
Author: rmuir
Date: Fri Jul 23 15:41:43 2010
New Revision: 967137
URL: http://svn.apache.org/viewvc?rev=967137&view=rev
Log:
remove nocommits, this isnt really evil
Modified:
lucene/dev/branches/preflexfixes/lucene/src/java/org/apache/lucene/util/BytesRef.java
Modified: lucene/dev/branches/preflexfixes/lucene/src/java/org/apache/lucene/util/BytesRef.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/preflexfixes/lucene/src/java/org/apache/lucene/util/BytesRef.java?rev=967137&r1=967136&r2=967137&view=diff
==============================================================================
--- lucene/dev/branches/preflexfixes/lucene/src/java/org/apache/lucene/util/BytesRef.java (original)
+++ lucene/dev/branches/preflexfixes/lucene/src/java/org/apache/lucene/util/BytesRef.java Fri Jul 23 15:41:43 2010
@@ -331,13 +331,16 @@ public final class BytesRef implements C
// We know the terms are not equal, but, we may
// have to carefully fixup the bytes at the
// difference to match UTF16's sort order:
+
+ // NOTE: instead of moving supplementary code points (0xee and 0xef) to the unused 0xfe and 0xff,
+ // we move them to the unused 0xfc and 0xfd [reserved for future 6-byte character sequences]
+ // this reserves 0xff for preflex's term reordering (surrogate dance), and if unicode grows such
+ // that 6-byte sequences are needed we have much bigger problems anyway.
if (aByte >= 0xee && bByte >= 0xee) {
if ((aByte & 0xfe) == 0xee) {
- // nocommit
aByte += 0xe;
}
if ((bByte&0xfe) == 0xee) {
- // nocommit
bByte += 0xe;
}
}