You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by mi...@apache.org on 2009/11/25 20:49:06 UTC
svn commit: r884240 - in
/lucene/java/branches/flex_1458/src/java/org/apache/lucene:
index/codecs/standard/StandardTermsDictReader.java util/UnicodeUtil.java
Author: mikemccand
Date: Wed Nov 25 19:49:05 2009
New Revision: 884240
URL: http://svn.apache.org/viewvc?rev=884240&view=rev
Log:
LUCENE-1458 (on flex branch): added some nocommits; removed 0xffff logic from UnicodeUtil
Modified:
lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java
lucene/java/branches/flex_1458/src/java/org/apache/lucene/util/UnicodeUtil.java
Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java?rev=884240&r1=884239&r2=884240&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java Wed Nov 25 19:49:05 2009
@@ -302,6 +302,7 @@
* was found, SeekStatus.END if we hit EOF */
@Override
public SeekStatus seek(TermRef term) throws IOException {
+
ReuseLRUCache<TermRef, CacheEntry> cache = null;
CacheEntry entry = null;
TermRef entryKey = null;
@@ -327,6 +328,13 @@
System.out.println(Thread.currentThread().getName() + ":stdr.seek(text=" + fieldInfo.name + ":" + term + ") seg=" + segment);
}
+ // nocommit -- test if this is really
+ // helping/necessary -- that compareTerm isn't that
+ // cheap, and, how often do callers really seek to
+ // the term they are already on (it's silly to do
+ // so) -- I'd prefer such silly apps take the hit,
+ // not well behaved apps?
+
if (bytesReader.started && termUpto < numTerms && bytesReader.term.compareTerm(term) == 0) {
// nocommit -- not right if text is ""?
// mxx
@@ -341,6 +349,11 @@
// here, that avoids the binary search if the seek
// is w/in the current index block
+ // nocommit -- also, not sure it'll help, but, we
+ // can bound this binary search, since we know the
+ // term ord we are now on, and we can compare how
+ // this new term compars to our current term
+
// Find latest index term that's <= our text:
indexReader.getIndexOffset(term, indexResult);
@@ -379,6 +392,10 @@
//new Throwable().printStackTrace(System.out);
}
+ // nocommit -- see how often an already
+ // NOT_FOUND is then sent back here? silly for
+ // apps to do so... but we should see if Lucene
+ // does
if (docs.canCaptureState()) {
// Store in cache
if (cache.eldest != null) {
Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/util/UnicodeUtil.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/util/UnicodeUtil.java?rev=884240&r1=884239&r2=884240&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/util/UnicodeUtil.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/util/UnicodeUtil.java Wed Nov 25 19:49:05 2009
@@ -120,7 +120,10 @@
/** Encode characters from a char[] source, starting at
* offset and stopping when the character 0xffff is seen.
- * Returns the number of bytes written to bytesOut. */
+ * Returns the number of bytes written to bytesOut.
+ *
+ * @deprecated Use {@link #UTF16toUTF8(char[], int, int,
+ * UTF8Result)} instead. */
public static void UTF16toUTF8(final char[] source, final int offset, UTF8Result result) {
int upto = 0;
@@ -208,7 +211,12 @@
} else {
// surrogate pair
// confirm valid high surrogate
- if (code < 0xDC00 && i < end && source[i] != 0xffff) {
+ // nocommit -- I removed the 0xffff check, here, but
+ // technically that's a break in back-compat, though
+ // it seems crazy that any external apps would rely
+ // on this?
+ //if (code < 0xDC00 && i < end && source[i] != 0xffff) {
+ if (code < 0xDC00 && i < end) {
int utf32 = (int) source[i];
// confirm valid low surrogate and write pair
if (utf32 >= 0xDC00 && utf32 <= 0xDFFF) {