You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by mi...@apache.org on 2009/11/25 20:49:06 UTC
svn commit: r884240 - in /lucene/java/branches/flex_1458/src/java/org/apache/lucene: index/codecs/standard/StandardTermsDictReader.java util/UnicodeUtil.java

Author: mikemccand
Date: Wed Nov 25 19:49:05 2009
New Revision: 884240

URL: http://svn.apache.org/viewvc?rev=884240&view=rev
Log:
LUCENE-1458 (on flex branch): added some nocommits; removed 0xffff logic from UnicodeUtil

Modified:
    lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java
    lucene/java/branches/flex_1458/src/java/org/apache/lucene/util/UnicodeUtil.java

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java?rev=884240&r1=884239&r2=884240&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java Wed Nov 25 19:49:05 2009
@@ -302,6 +302,7 @@
        *  was found, SeekStatus.END if we hit EOF */
       @Override
       public SeekStatus seek(TermRef term) throws IOException {
+
         ReuseLRUCache<TermRef, CacheEntry> cache = null;
         CacheEntry entry = null;
         TermRef entryKey = null;
@@ -327,6 +328,13 @@
           System.out.println(Thread.currentThread().getName() + ":stdr.seek(text=" + fieldInfo.name + ":" + term + ") seg=" + segment);
         }
 
+        // nocommit -- test if this is really
+        // helping/necessary -- that compareTerm isn't that
+        // cheap, and, how often do callers really seek to
+        // the term they are already on (it's silly to do
+        // so) -- I'd prefer such silly apps take the hit,
+        // not well behaved apps?
+
         if (bytesReader.started && termUpto < numTerms && bytesReader.term.compareTerm(term) == 0) {
           // nocommit -- not right if text is ""?
           // mxx
@@ -341,6 +349,11 @@
         // here, that avoids the binary search if the seek
         // is w/in the current index block
 
+        // nocommit -- also, not sure it'll help, but, we
+        // can bound this binary search, since we know the
+        // term ord we are now on, and we can compare how
+        // this new term compars to our current term
+
         // Find latest index term that's <= our text:
         indexReader.getIndexOffset(term, indexResult);
 
@@ -379,6 +392,10 @@
               //new Throwable().printStackTrace(System.out);
             }
 
+            // nocommit -- see how often an already
+            // NOT_FOUND is then sent back here?  silly for
+            // apps to do so... but we should see if Lucene
+            // does 
             if (docs.canCaptureState()) {
               // Store in cache
               if (cache.eldest != null) {

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/util/UnicodeUtil.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/util/UnicodeUtil.java?rev=884240&r1=884239&r2=884240&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/util/UnicodeUtil.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/util/UnicodeUtil.java Wed Nov 25 19:49:05 2009
@@ -120,7 +120,10 @@
 
   /** Encode characters from a char[] source, starting at
    *  offset and stopping when the character 0xffff is seen.
-   *  Returns the number of bytes written to bytesOut. */
+   *  Returns the number of bytes written to bytesOut.
+   *
+   * @deprecated Use {@link #UTF16toUTF8(char[], int, int,
+   * UTF8Result)} instead. */
   public static void UTF16toUTF8(final char[] source, final int offset, UTF8Result result) {
 
     int upto = 0;
@@ -208,7 +211,12 @@
       } else {
         // surrogate pair
         // confirm valid high surrogate
-        if (code < 0xDC00 && i < end && source[i] != 0xffff) {
+        // nocommit -- I removed the 0xffff check, here, but
+        // technically that's a break in back-compat, though
+        // it seems crazy that any external apps would rely
+        // on this?
+        //if (code < 0xDC00 && i < end && source[i] != 0xffff) {
+        if (code < 0xDC00 && i < end) {
           int utf32 = (int) source[i];
           // confirm valid low surrogate and write pair
           if (utf32 >= 0xDC00 && utf32 <= 0xDFFF) {