You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by gs...@apache.org on 2010/07/01 16:09:33 UTC
svn commit: r959674 - in /lucene/dev/branches/branch_3x/lucene/src:
java/org/apache/lucene/document/FieldSelectorResult.java
java/org/apache/lucene/index/FieldsReader.java
test/org/apache/lucene/index/TestFieldsReader.java
Author: gsingers
Date: Thu Jul 1 14:09:33 2010
New Revision: 959674
URL: http://svn.apache.org/viewvc?rev=959674&view=rev
Log:
LUCENE-1810: added LATENT field selector option. Backported to 3.x
Modified:
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/document/FieldSelectorResult.java
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/FieldsReader.java
lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestFieldsReader.java
Modified: lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/document/FieldSelectorResult.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/document/FieldSelectorResult.java?rev=959674&r1=959673&r2=959674&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/document/FieldSelectorResult.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/document/FieldSelectorResult.java Thu Jul 1 14:09:33 2010
@@ -63,5 +63,14 @@ public enum FieldSelectorResult {
SIZE,
/** Expert: Like {@link #SIZE} but immediately break from the field loading loop, i.e., stop loading further fields, after the size is loaded */
- SIZE_AND_BREAK
+ SIZE_AND_BREAK,
+
+ /**
+ * Lazily load this {@link Field}, but do not cache the result. This means the {@link Field} is valid, but it may not actually contain its data until
+ * invoked. {@link Document#getField(String)} SHOULD NOT BE USED. {@link Document#getFieldable(String)} is safe to use and should
+ * return a valid instance of a {@link Fieldable}.
+ *<p/>
+ * {@link Document#add(Fieldable)} should be called by the Reader.
+ */
+ LATENT
}
Modified: lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/FieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/FieldsReader.java?rev=959674&r1=959673&r2=959674&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/FieldsReader.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/index/FieldsReader.java Thu Jul 1 14:09:33 2010
@@ -243,8 +243,10 @@ final class FieldsReader implements Clon
break;//Get out of this loop
}
else if (acceptField.equals(FieldSelectorResult.LAZY_LOAD)) {
- addFieldLazy(doc, fi, binary, compressed, tokenize);
- }
+ addFieldLazy(doc, fi, binary, compressed, tokenize, true);
+ } else if (acceptField.equals(FieldSelectorResult.LATENT)) {
+ addFieldLazy(doc, fi, binary, compressed, tokenize, false);
+ }
else if (acceptField.equals(FieldSelectorResult.SIZE)){
skipField(binary, compressed, addFieldSize(doc, fi, binary, compressed));
}
@@ -303,12 +305,12 @@ final class FieldsReader implements Clon
}
}
- private void addFieldLazy(Document doc, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize) throws IOException {
+ private void addFieldLazy(Document doc, FieldInfo fi, boolean binary, boolean compressed, boolean tokenize, boolean cacheResult) throws IOException {
if (binary) {
int toRead = fieldsStream.readVInt();
long pointer = fieldsStream.getFilePointer();
//was: doc.add(new Fieldable(fi.name, b, Fieldable.Store.YES));
- doc.add(new LazyField(fi.name, Field.Store.YES, toRead, pointer, binary, compressed));
+ doc.add(new LazyField(fi.name, Field.Store.YES, toRead, pointer, binary, compressed, cacheResult));
//Need to move the pointer ahead by toRead positions
fieldsStream.seek(pointer + toRead);
} else {
@@ -320,7 +322,7 @@ final class FieldsReader implements Clon
if (compressed) {
int toRead = fieldsStream.readVInt();
long pointer = fieldsStream.getFilePointer();
- f = new LazyField(fi.name, store, toRead, pointer, binary, compressed);
+ f = new LazyField(fi.name, store, toRead, pointer, binary, compressed, cacheResult);
//skip over the part that we aren't loading
fieldsStream.seek(pointer + toRead);
f.setOmitNorms(fi.omitNorms);
@@ -334,7 +336,7 @@ final class FieldsReader implements Clon
} else {
fieldsStream.skipChars(length);
}
- f = new LazyField(fi.name, store, index, termVector, length, pointer, binary, compressed);
+ f = new LazyField(fi.name, store, index, termVector, length, pointer, binary, compressed, cacheResult);
f.setOmitNorms(fi.omitNorms);
f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions);
}
@@ -414,23 +416,26 @@ final class FieldsReader implements Clon
/** @deprecated Only kept for backward-compatbility with <3.0 indexes. Will be removed in 4.0. */
@Deprecated
private boolean isCompressed;
+ private boolean cacheResult;
- public LazyField(String name, Field.Store store, int toRead, long pointer, boolean isBinary, boolean isCompressed) {
+ public LazyField(String name, Field.Store store, int toRead, long pointer, boolean isBinary, boolean isCompressed, boolean cacheResult) {
super(name, store, Field.Index.NO, Field.TermVector.NO);
this.toRead = toRead;
this.pointer = pointer;
this.isBinary = isBinary;
+ this.cacheResult = cacheResult;
if (isBinary)
binaryLength = toRead;
lazy = true;
this.isCompressed = isCompressed;
}
- public LazyField(String name, Field.Store store, Field.Index index, Field.TermVector termVector, int toRead, long pointer, boolean isBinary, boolean isCompressed) {
+ public LazyField(String name, Field.Store store, Field.Index index, Field.TermVector termVector, int toRead, long pointer, boolean isBinary, boolean isCompressed, boolean cacheResult) {
super(name, store, index, termVector);
this.toRead = toRead;
this.pointer = pointer;
this.isBinary = isBinary;
+ this.cacheResult = cacheResult;
if (isBinary)
binaryLength = toRead;
lazy = true;
@@ -472,29 +477,36 @@ final class FieldsReader implements Clon
else {
if (fieldsData == null) {
IndexInput localFieldsStream = getFieldStream();
+ String value;
try {
localFieldsStream.seek(pointer);
if (isCompressed) {
final byte[] b = new byte[toRead];
localFieldsStream.readBytes(b, 0, b.length);
- fieldsData = new String(uncompress(b), "UTF-8");
+ value = new String(uncompress(b), "UTF-8");
} else {
if (format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES) {
byte[] bytes = new byte[toRead];
localFieldsStream.readBytes(bytes, 0, toRead);
- fieldsData = new String(bytes, "UTF-8");
+ value = new String(bytes, "UTF-8");
} else {
//read in chars b/c we already know the length we need to read
char[] chars = new char[toRead];
localFieldsStream.readChars(chars, 0, toRead);
- fieldsData = new String(chars);
+ value = new String(chars);
}
}
} catch (IOException e) {
throw new FieldReaderException(e);
}
- }
- return (String) fieldsData;
+ if (cacheResult == true){
+ fieldsData = value;
+ }
+ return value;
+ } else{
+ return (String) fieldsData;
+ }
+
}
}
@@ -526,6 +538,7 @@ final class FieldsReader implements Clon
if (fieldsData == null) {
// Allocate new buffer if result is null or too small
final byte[] b;
+ byte[] value;
if (result == null || result.length < toRead)
b = new byte[toRead];
else
@@ -539,19 +552,25 @@ final class FieldsReader implements Clon
localFieldsStream.seek(pointer);
localFieldsStream.readBytes(b, 0, toRead);
if (isCompressed == true) {
- fieldsData = uncompress(b);
+ value = uncompress(b);
} else {
- fieldsData = b;
- }
+ value = b;
+ }
} catch (IOException e) {
throw new FieldReaderException(e);
}
binaryOffset = 0;
binaryLength = toRead;
- }
+ if (cacheResult == true){
+ fieldsData = value;
+ }
+ return value;
+ } else{
+ return (byte[]) fieldsData;
+ }
- return (byte[]) fieldsData;
+
} else
return null;
}
Modified: lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestFieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestFieldsReader.java?rev=959674&r1=959673&r2=959674&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestFieldsReader.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/index/TestFieldsReader.java Thu Jul 1 14:09:33 2010
@@ -128,6 +128,8 @@ public class TestFieldsReader extends Lu
String value = field.stringValue();
assertTrue("value is null and it shouldn't be", value != null);
assertTrue(value + " is not equal to " + DocHelper.LAZY_FIELD_TEXT, value.equals(DocHelper.LAZY_FIELD_TEXT) == true);
+ assertTrue("calling stringValue() twice should give same reference", field.stringValue() == field.stringValue());
+
field = doc.getFieldable(DocHelper.TEXT_FIELD_1_KEY);
assertTrue("field is null and it shouldn't be", field != null);
assertTrue("Field is lazy and it should not be", field.isLazy() == false);
@@ -148,12 +150,85 @@ public class TestFieldsReader extends Lu
byte [] bytes = field.getBinaryValue();
assertTrue("bytes is null and it shouldn't be", bytes != null);
assertTrue("", DocHelper.LAZY_FIELD_BINARY_BYTES.length == bytes.length);
+ assertTrue("calling binaryValue() twice should give same reference", field.getBinaryValue() == field.getBinaryValue());
for (int i = 0; i < bytes.length; i++) {
assertTrue("byte[" + i + "] is mismatched", bytes[i] == DocHelper.LAZY_FIELD_BINARY_BYTES[i]);
}
}
+ public void testLatentFields() throws Exception {
+ assertTrue(dir != null);
+ assertTrue(fieldInfos != null);
+ FieldsReader reader = new FieldsReader(dir, TEST_SEGMENT_NAME, fieldInfos);
+ assertTrue(reader != null);
+ assertTrue(reader.size() == 1);
+ Set loadFieldNames = new HashSet();
+ loadFieldNames.add(DocHelper.TEXT_FIELD_1_KEY);
+ loadFieldNames.add(DocHelper.TEXT_FIELD_UTF1_KEY);
+ Set lazyFieldNames = new HashSet();
+ //new String[]{DocHelper.LARGE_LAZY_FIELD_KEY, DocHelper.LAZY_FIELD_KEY, DocHelper.LAZY_FIELD_BINARY_KEY};
+ lazyFieldNames.add(DocHelper.LARGE_LAZY_FIELD_KEY);
+ lazyFieldNames.add(DocHelper.LAZY_FIELD_KEY);
+ lazyFieldNames.add(DocHelper.LAZY_FIELD_BINARY_KEY);
+ lazyFieldNames.add(DocHelper.TEXT_FIELD_UTF2_KEY);
+
+ // Use LATENT instead of LAZY
+ SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(loadFieldNames, lazyFieldNames) {
+ public FieldSelectorResult accept(String fieldName) {
+ final FieldSelectorResult result = super.accept(fieldName);
+ if (result.equals(FieldSelectorResult.LAZY_LOAD)) {
+ return FieldSelectorResult.LATENT;
+ } else {
+ return result;
+ }
+ }
+ };
+
+ Document doc = reader.doc(0, fieldSelector);
+ assertTrue("doc is null and it shouldn't be", doc != null);
+ Fieldable field = doc.getFieldable(DocHelper.LAZY_FIELD_KEY);
+ assertTrue("field is null and it shouldn't be", field != null);
+ assertTrue("field is not lazy and it should be", field.isLazy());
+ String value = field.stringValue();
+ assertTrue("value is null and it shouldn't be", value != null);
+ assertTrue(value + " is not equal to " + DocHelper.LAZY_FIELD_TEXT, value.equals(DocHelper.LAZY_FIELD_TEXT) == true);
+ assertTrue("calling stringValue() twice should give different references", field.stringValue() != field.stringValue());
+
+ field = doc.getFieldable(DocHelper.TEXT_FIELD_1_KEY);
+ assertTrue("field is null and it shouldn't be", field != null);
+ assertTrue("Field is lazy and it should not be", field.isLazy() == false);
+ assertTrue("calling stringValue() twice should give same reference", field.stringValue() == field.stringValue());
+
+ field = doc.getFieldable(DocHelper.TEXT_FIELD_UTF1_KEY);
+ assertTrue("field is null and it shouldn't be", field != null);
+ assertTrue("Field is lazy and it should not be", field.isLazy() == false);
+ assertTrue(field.stringValue() + " is not equal to " + DocHelper.FIELD_UTF1_TEXT, field.stringValue().equals(DocHelper.FIELD_UTF1_TEXT) == true);
+ assertTrue("calling stringValue() twice should give same reference", field.stringValue() == field.stringValue());
+
+ field = doc.getFieldable(DocHelper.TEXT_FIELD_UTF2_KEY);
+ assertTrue("field is null and it shouldn't be", field != null);
+ assertTrue("Field is lazy and it should not be", field.isLazy() == true);
+ assertTrue(field.stringValue() + " is not equal to " + DocHelper.FIELD_UTF2_TEXT, field.stringValue().equals(DocHelper.FIELD_UTF2_TEXT) == true);
+ assertTrue("calling stringValue() twice should give different references", field.stringValue() != field.stringValue());
+
+ field = doc.getFieldable(DocHelper.LAZY_FIELD_BINARY_KEY);
+ assertTrue("field is null and it shouldn't be", field != null);
+ assertTrue("stringValue isn't null for lazy binary field", field.stringValue() == null);
+ assertTrue("calling binaryValue() twice should give different references", field.getBinaryValue() != field.getBinaryValue());
+
+ byte [] bytes = field.getBinaryValue();
+ assertTrue("bytes is null and it shouldn't be", bytes != null);
+ assertTrue("", DocHelper.LAZY_FIELD_BINARY_BYTES.length == bytes.length);
+ for (int i = 0; i < bytes.length; i++) {
+ assertTrue("byte[" + i + "] is mismatched", bytes[i] == DocHelper.LAZY_FIELD_BINARY_BYTES[i]);
+
+ }
+ }
+
+
+
+
public void testLazyFieldsAfterClose() throws Exception {
assertTrue(dir != null);
assertTrue(fieldInfos != null);