You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by gs...@apache.org on 2010/07/01 15:41:26 UTC
svn commit: r959657 - in /lucene/dev/trunk/lucene: CHANGES.txt
src/java/org/apache/lucene/document/FieldSelectorResult.java
src/java/org/apache/lucene/index/FieldsReader.java
src/test/org/apache/lucene/index/TestFieldsReader.java
Author: gsingers
Date: Thu Jul 1 13:41:26 2010
New Revision: 959657
URL: http://svn.apache.org/viewvc?rev=959657&view=rev
Log:
LUCENE-1810: added LATENT field selector option
Modified:
lucene/dev/trunk/lucene/CHANGES.txt
lucene/dev/trunk/lucene/src/java/org/apache/lucene/document/FieldSelectorResult.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldsReader.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestFieldsReader.java
Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=959657&r1=959656&r2=959657&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Thu Jul 1 13:41:26 2010
@@ -184,6 +184,9 @@ New features
* LUCENE-2489: Added PerFieldCodecWrapper (in oal.index.codecs) which
lets you set the Codec per field (Mike McCandless)
+* LUCENE-1810: Added FieldSelectorResult.LATENT to not cache lazy loaded fields
+ (Tim Smith, Grant Ingersoll)
+
Optimizations
* LUCENE-2410: ~2.5X speedup on exact (slop=0) PhraseQuery matching.
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/document/FieldSelectorResult.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/document/FieldSelectorResult.java?rev=959657&r1=959656&r2=959657&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/document/FieldSelectorResult.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/document/FieldSelectorResult.java Thu Jul 1 13:41:26 2010
@@ -63,5 +63,14 @@ public enum FieldSelectorResult {
SIZE,
/** Expert: Like {@link #SIZE} but immediately break from the field loading loop, i.e., stop loading further fields, after the size is loaded */
- SIZE_AND_BREAK
+ SIZE_AND_BREAK,
+
+ /**
+ * Lazily load this {@link Field}, but do not cache the result. This means the {@link Field} is valid, but it may not actually contain its data until
+ * invoked. {@link Document#getField(String)} SHOULD NOT BE USED. {@link Document#getFieldable(String)} is safe to use and should
+ * return a valid instance of a {@link Fieldable}.
+ *<p/>
+ * {@link Document#add(Fieldable)} should be called by the Reader.
+ */
+ LATENT
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldsReader.java?rev=959657&r1=959656&r2=959657&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldsReader.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldsReader.java Thu Jul 1 13:41:26 2010
@@ -219,7 +219,9 @@ final class FieldsReader implements Clon
break;//Get out of this loop
}
else if (acceptField.equals(FieldSelectorResult.LAZY_LOAD)) {
- addFieldLazy(doc, fi, binary, tokenize);
+ addFieldLazy(doc, fi, binary, tokenize, true);
+ } else if (acceptField.equals(FieldSelectorResult.LATENT)) {
+ addFieldLazy(doc, fi, binary, tokenize, false);
}
else if (acceptField.equals(FieldSelectorResult.SIZE)){
skipField(addFieldSize(doc, fi, binary));
@@ -274,12 +276,12 @@ final class FieldsReader implements Clon
fieldsStream.seek(fieldsStream.getFilePointer() + toRead);
}
- private void addFieldLazy(Document doc, FieldInfo fi, boolean binary, boolean tokenize) throws IOException {
+ private void addFieldLazy(Document doc, FieldInfo fi, boolean binary, boolean tokenize, boolean cacheResult) throws IOException {
if (binary) {
int toRead = fieldsStream.readVInt();
long pointer = fieldsStream.getFilePointer();
//was: doc.add(new Fieldable(fi.name, b, Fieldable.Store.YES));
- doc.add(new LazyField(fi.name, Field.Store.YES, toRead, pointer, binary));
+ doc.add(new LazyField(fi.name, Field.Store.YES, toRead, pointer, binary, cacheResult));
//Need to move the pointer ahead by toRead positions
fieldsStream.seek(pointer + toRead);
} else {
@@ -292,7 +294,7 @@ final class FieldsReader implements Clon
long pointer = fieldsStream.getFilePointer();
//Skip ahead of where we are by the length of what is stored
fieldsStream.seek(pointer+length);
- f = new LazyField(fi.name, store, index, termVector, length, pointer, binary);
+ f = new LazyField(fi.name, store, index, termVector, length, pointer, binary, cacheResult);
f.setOmitNorms(fi.omitNorms);
f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions);
@@ -349,22 +351,25 @@ final class FieldsReader implements Clon
private class LazyField extends AbstractField implements Fieldable {
private int toRead;
private long pointer;
+ private final boolean cacheResult;
- public LazyField(String name, Field.Store store, int toRead, long pointer, boolean isBinary) {
+ public LazyField(String name, Field.Store store, int toRead, long pointer, boolean isBinary, boolean cacheResult) {
super(name, store, Field.Index.NO, Field.TermVector.NO);
this.toRead = toRead;
this.pointer = pointer;
this.isBinary = isBinary;
+ this.cacheResult = cacheResult;
if (isBinary)
binaryLength = toRead;
lazy = true;
}
- public LazyField(String name, Field.Store store, Field.Index index, Field.TermVector termVector, int toRead, long pointer, boolean isBinary) {
+ public LazyField(String name, Field.Store store, Field.Index index, Field.TermVector termVector, int toRead, long pointer, boolean isBinary, boolean cacheResult) {
super(name, store, index, termVector);
this.toRead = toRead;
this.pointer = pointer;
this.isBinary = isBinary;
+ this.cacheResult = cacheResult;
if (isBinary)
binaryLength = toRead;
lazy = true;
@@ -404,17 +409,23 @@ final class FieldsReader implements Clon
return null;
else {
if (fieldsData == null) {
+ String result = null;
IndexInput localFieldsStream = getFieldStream();
try {
localFieldsStream.seek(pointer);
byte[] bytes = new byte[toRead];
localFieldsStream.readBytes(bytes, 0, toRead);
- fieldsData = new String(bytes, "UTF-8");
+ result = new String(bytes, "UTF-8");
} catch (IOException e) {
throw new FieldReaderException(e);
}
+ if (cacheResult == true){
+ fieldsData = result;
+ }
+ return result;
+ } else {
+ return (String) fieldsData;
}
- return (String) fieldsData;
}
}
@@ -438,16 +449,19 @@ final class FieldsReader implements Clon
try {
localFieldsStream.seek(pointer);
localFieldsStream.readBytes(b, 0, toRead);
- fieldsData = b;
} catch (IOException e) {
throw new FieldReaderException(e);
}
binaryOffset = 0;
binaryLength = toRead;
+ if (cacheResult == true){
+ fieldsData = b;
+ }
+ return b;
+ } else {
+ return (byte[]) fieldsData;
}
-
- return (byte[]) fieldsData;
} else
return null;
}
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestFieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestFieldsReader.java?rev=959657&r1=959656&r2=959657&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestFieldsReader.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestFieldsReader.java Thu Jul 1 13:41:26 2010
@@ -128,6 +128,8 @@ public class TestFieldsReader extends Lu
String value = field.stringValue();
assertTrue("value is null and it shouldn't be", value != null);
assertTrue(value + " is not equal to " + DocHelper.LAZY_FIELD_TEXT, value.equals(DocHelper.LAZY_FIELD_TEXT) == true);
+ assertTrue("calling stringValue() twice should give same reference", field.stringValue() == field.stringValue());
+
field = doc.getFieldable(DocHelper.TEXT_FIELD_1_KEY);
assertTrue("field is null and it shouldn't be", field != null);
assertTrue("Field is lazy and it should not be", field.isLazy() == false);
@@ -148,12 +150,85 @@ public class TestFieldsReader extends Lu
byte [] bytes = field.getBinaryValue();
assertTrue("bytes is null and it shouldn't be", bytes != null);
assertTrue("", DocHelper.LAZY_FIELD_BINARY_BYTES.length == bytes.length);
+ assertTrue("calling binaryValue() twice should give same reference", field.getBinaryValue() == field.getBinaryValue());
for (int i = 0; i < bytes.length; i++) {
assertTrue("byte[" + i + "] is mismatched", bytes[i] == DocHelper.LAZY_FIELD_BINARY_BYTES[i]);
}
}
+ public void testLatentFields() throws Exception {
+ assertTrue(dir != null);
+ assertTrue(fieldInfos != null);
+ FieldsReader reader = new FieldsReader(dir, TEST_SEGMENT_NAME, fieldInfos);
+ assertTrue(reader != null);
+ assertTrue(reader.size() == 1);
+ Set loadFieldNames = new HashSet();
+ loadFieldNames.add(DocHelper.TEXT_FIELD_1_KEY);
+ loadFieldNames.add(DocHelper.TEXT_FIELD_UTF1_KEY);
+ Set lazyFieldNames = new HashSet();
+ //new String[]{DocHelper.LARGE_LAZY_FIELD_KEY, DocHelper.LAZY_FIELD_KEY, DocHelper.LAZY_FIELD_BINARY_KEY};
+ lazyFieldNames.add(DocHelper.LARGE_LAZY_FIELD_KEY);
+ lazyFieldNames.add(DocHelper.LAZY_FIELD_KEY);
+ lazyFieldNames.add(DocHelper.LAZY_FIELD_BINARY_KEY);
+ lazyFieldNames.add(DocHelper.TEXT_FIELD_UTF2_KEY);
+
+ // Use LATENT instead of LAZY
+ SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(loadFieldNames, lazyFieldNames) {
+ public FieldSelectorResult accept(String fieldName) {
+ final FieldSelectorResult result = super.accept(fieldName);
+ if (result.equals(FieldSelectorResult.LAZY_LOAD)) {
+ return FieldSelectorResult.LATENT;
+ } else {
+ return result;
+ }
+ }
+ };
+
+ Document doc = reader.doc(0, fieldSelector);
+ assertTrue("doc is null and it shouldn't be", doc != null);
+ Fieldable field = doc.getFieldable(DocHelper.LAZY_FIELD_KEY);
+ assertTrue("field is null and it shouldn't be", field != null);
+ assertTrue("field is not lazy and it should be", field.isLazy());
+ String value = field.stringValue();
+ assertTrue("value is null and it shouldn't be", value != null);
+ assertTrue(value + " is not equal to " + DocHelper.LAZY_FIELD_TEXT, value.equals(DocHelper.LAZY_FIELD_TEXT) == true);
+ assertTrue("calling stringValue() twice should give different references", field.stringValue() != field.stringValue());
+
+ field = doc.getFieldable(DocHelper.TEXT_FIELD_1_KEY);
+ assertTrue("field is null and it shouldn't be", field != null);
+ assertTrue("Field is lazy and it should not be", field.isLazy() == false);
+ assertTrue("calling stringValue() twice should give same reference", field.stringValue() == field.stringValue());
+
+ field = doc.getFieldable(DocHelper.TEXT_FIELD_UTF1_KEY);
+ assertTrue("field is null and it shouldn't be", field != null);
+ assertTrue("Field is lazy and it should not be", field.isLazy() == false);
+ assertTrue(field.stringValue() + " is not equal to " + DocHelper.FIELD_UTF1_TEXT, field.stringValue().equals(DocHelper.FIELD_UTF1_TEXT) == true);
+ assertTrue("calling stringValue() twice should give same reference", field.stringValue() == field.stringValue());
+
+ field = doc.getFieldable(DocHelper.TEXT_FIELD_UTF2_KEY);
+ assertTrue("field is null and it shouldn't be", field != null);
+ assertTrue("Field is lazy and it should not be", field.isLazy() == true);
+ assertTrue(field.stringValue() + " is not equal to " + DocHelper.FIELD_UTF2_TEXT, field.stringValue().equals(DocHelper.FIELD_UTF2_TEXT) == true);
+ assertTrue("calling stringValue() twice should give different references", field.stringValue() != field.stringValue());
+
+ field = doc.getFieldable(DocHelper.LAZY_FIELD_BINARY_KEY);
+ assertTrue("field is null and it shouldn't be", field != null);
+ assertTrue("stringValue isn't null for lazy binary field", field.stringValue() == null);
+ assertTrue("calling binaryValue() twice should give different references", field.getBinaryValue() != field.getBinaryValue());
+
+ byte [] bytes = field.getBinaryValue();
+ assertTrue("bytes is null and it shouldn't be", bytes != null);
+ assertTrue("", DocHelper.LAZY_FIELD_BINARY_BYTES.length == bytes.length);
+ for (int i = 0; i < bytes.length; i++) {
+ assertTrue("byte[" + i + "] is mismatched", bytes[i] == DocHelper.LAZY_FIELD_BINARY_BYTES[i]);
+
+ }
+ }
+
+
+
+
public void testLazyFieldsAfterClose() throws Exception {
assertTrue(dir != null);
assertTrue(fieldInfos != null);