You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by gs...@apache.org on 2010/07/01 15:41:26 UTC

svn commit: r959657 - in /lucene/dev/trunk/lucene: CHANGES.txt src/java/org/apache/lucene/document/FieldSelectorResult.java src/java/org/apache/lucene/index/FieldsReader.java src/test/org/apache/lucene/index/TestFieldsReader.java

Author: gsingers
Date: Thu Jul  1 13:41:26 2010
New Revision: 959657

URL: http://svn.apache.org/viewvc?rev=959657&view=rev
Log:
LUCENE-1810: added LATENT field selector option

Modified:
    lucene/dev/trunk/lucene/CHANGES.txt
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/document/FieldSelectorResult.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldsReader.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestFieldsReader.java

Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=959657&r1=959656&r2=959657&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Thu Jul  1 13:41:26 2010
@@ -184,6 +184,9 @@ New features
 * LUCENE-2489: Added PerFieldCodecWrapper (in oal.index.codecs) which
   lets you set the Codec per field (Mike McCandless)
 
+* LUCENE-1810: Added FieldSelectorResult.LATENT to not cache lazy loaded fields
+  (Tim Smith, Grant Ingersoll)
+
 Optimizations
 
 * LUCENE-2410: ~2.5X speedup on exact (slop=0) PhraseQuery matching.

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/document/FieldSelectorResult.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/document/FieldSelectorResult.java?rev=959657&r1=959656&r2=959657&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/document/FieldSelectorResult.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/document/FieldSelectorResult.java Thu Jul  1 13:41:26 2010
@@ -63,5 +63,14 @@ public enum FieldSelectorResult {
   SIZE,
 
     /** Expert: Like {@link #SIZE} but immediately break from the field loading loop, i.e., stop loading further fields, after the size is loaded */         
-  SIZE_AND_BREAK
+  SIZE_AND_BREAK,
+
+  /**
+     * Lazily load this {@link Field}, but do not cache the result.  This means the {@link Field} is valid, but it may not actually contain its data until
+     * invoked.  {@link Document#getField(String)} SHOULD NOT BE USED.  {@link Document#getFieldable(String)} is safe to use and should
+     * return a valid instance of a {@link Fieldable}.
+     *<p/>
+     * {@link Document#add(Fieldable)} should be called by the Reader.
+     */
+  LATENT
 }

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldsReader.java?rev=959657&r1=959656&r2=959657&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldsReader.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FieldsReader.java Thu Jul  1 13:41:26 2010
@@ -219,7 +219,9 @@ final class FieldsReader implements Clon
         break;//Get out of this loop
       }
       else if (acceptField.equals(FieldSelectorResult.LAZY_LOAD)) {
-        addFieldLazy(doc, fi, binary, tokenize);
+        addFieldLazy(doc, fi, binary, tokenize, true);
+      } else if (acceptField.equals(FieldSelectorResult.LATENT)) {
+        addFieldLazy(doc, fi, binary, tokenize, false);
       }
       else if (acceptField.equals(FieldSelectorResult.SIZE)){
         skipField(addFieldSize(doc, fi, binary));
@@ -274,12 +276,12 @@ final class FieldsReader implements Clon
     fieldsStream.seek(fieldsStream.getFilePointer() + toRead);
   }
 
-  private void addFieldLazy(Document doc, FieldInfo fi, boolean binary, boolean tokenize) throws IOException {
+  private void addFieldLazy(Document doc, FieldInfo fi, boolean binary, boolean tokenize, boolean cacheResult) throws IOException {
     if (binary) {
       int toRead = fieldsStream.readVInt();
       long pointer = fieldsStream.getFilePointer();
       //was: doc.add(new Fieldable(fi.name, b, Fieldable.Store.YES));
-      doc.add(new LazyField(fi.name, Field.Store.YES, toRead, pointer, binary));
+      doc.add(new LazyField(fi.name, Field.Store.YES, toRead, pointer, binary, cacheResult));
       //Need to move the pointer ahead by toRead positions
       fieldsStream.seek(pointer + toRead);
     } else {
@@ -292,7 +294,7 @@ final class FieldsReader implements Clon
       long pointer = fieldsStream.getFilePointer();
       //Skip ahead of where we are by the length of what is stored
       fieldsStream.seek(pointer+length);
-      f = new LazyField(fi.name, store, index, termVector, length, pointer, binary);
+      f = new LazyField(fi.name, store, index, termVector, length, pointer, binary, cacheResult);
       f.setOmitNorms(fi.omitNorms);
       f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions);
 
@@ -349,22 +351,25 @@ final class FieldsReader implements Clon
   private class LazyField extends AbstractField implements Fieldable {
     private int toRead;
     private long pointer;
+    private final boolean cacheResult;
 
-    public LazyField(String name, Field.Store store, int toRead, long pointer, boolean isBinary) {
+    public LazyField(String name, Field.Store store, int toRead, long pointer, boolean isBinary, boolean cacheResult) {
       super(name, store, Field.Index.NO, Field.TermVector.NO);
       this.toRead = toRead;
       this.pointer = pointer;
       this.isBinary = isBinary;
+      this.cacheResult = cacheResult;
       if (isBinary)
         binaryLength = toRead;
       lazy = true;
     }
 
-    public LazyField(String name, Field.Store store, Field.Index index, Field.TermVector termVector, int toRead, long pointer, boolean isBinary) {
+    public LazyField(String name, Field.Store store, Field.Index index, Field.TermVector termVector, int toRead, long pointer, boolean isBinary, boolean cacheResult) {
       super(name, store, index, termVector);
       this.toRead = toRead;
       this.pointer = pointer;
       this.isBinary = isBinary;
+      this.cacheResult = cacheResult;
       if (isBinary)
         binaryLength = toRead;
       lazy = true;
@@ -404,17 +409,23 @@ final class FieldsReader implements Clon
         return null;
       else {
         if (fieldsData == null) {
+          String result = null;
           IndexInput localFieldsStream = getFieldStream();
           try {
             localFieldsStream.seek(pointer);
             byte[] bytes = new byte[toRead];
             localFieldsStream.readBytes(bytes, 0, toRead);
-            fieldsData = new String(bytes, "UTF-8");
+            result = new String(bytes, "UTF-8");
           } catch (IOException e) {
             throw new FieldReaderException(e);
           }
+          if (cacheResult == true){
+            fieldsData = result;
+          }
+          return result;
+        } else {
+          return (String) fieldsData;
         }
-        return (String) fieldsData;
       }
     }
 
@@ -438,16 +449,19 @@ final class FieldsReader implements Clon
           try {
             localFieldsStream.seek(pointer);
             localFieldsStream.readBytes(b, 0, toRead);
-            fieldsData = b;
           } catch (IOException e) {
             throw new FieldReaderException(e);
           }
 
           binaryOffset = 0;
           binaryLength = toRead;
+          if (cacheResult == true){
+            fieldsData = b;
+          }
+          return b;
+        } else {
+          return (byte[]) fieldsData;
         }
-
-        return (byte[]) fieldsData;
       } else
         return null;     
     }

Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestFieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestFieldsReader.java?rev=959657&r1=959656&r2=959657&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestFieldsReader.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestFieldsReader.java Thu Jul  1 13:41:26 2010
@@ -128,6 +128,8 @@ public class TestFieldsReader extends Lu
     String value = field.stringValue();
     assertTrue("value is null and it shouldn't be", value != null);
     assertTrue(value + " is not equal to " + DocHelper.LAZY_FIELD_TEXT, value.equals(DocHelper.LAZY_FIELD_TEXT) == true);
+    assertTrue("calling stringValue() twice should give same reference", field.stringValue() == field.stringValue());
+
     field = doc.getFieldable(DocHelper.TEXT_FIELD_1_KEY);
     assertTrue("field is null and it shouldn't be", field != null);
     assertTrue("Field is lazy and it should not be", field.isLazy() == false);
@@ -148,12 +150,85 @@ public class TestFieldsReader extends Lu
     byte [] bytes = field.getBinaryValue();
     assertTrue("bytes is null and it shouldn't be", bytes != null);
     assertTrue("", DocHelper.LAZY_FIELD_BINARY_BYTES.length == bytes.length);
+    assertTrue("calling binaryValue() twice should give same reference", field.getBinaryValue() == field.getBinaryValue());
     for (int i = 0; i < bytes.length; i++) {
       assertTrue("byte[" + i + "] is mismatched", bytes[i] == DocHelper.LAZY_FIELD_BINARY_BYTES[i]);
 
     }
   }
 
+  public void testLatentFields() throws Exception {
+    assertTrue(dir != null);
+    assertTrue(fieldInfos != null);
+    FieldsReader reader = new FieldsReader(dir, TEST_SEGMENT_NAME, fieldInfos);
+    assertTrue(reader != null);
+    assertTrue(reader.size() == 1);
+    Set loadFieldNames = new HashSet();
+    loadFieldNames.add(DocHelper.TEXT_FIELD_1_KEY);
+    loadFieldNames.add(DocHelper.TEXT_FIELD_UTF1_KEY);
+    Set lazyFieldNames = new HashSet();
+    //new String[]{DocHelper.LARGE_LAZY_FIELD_KEY, DocHelper.LAZY_FIELD_KEY, DocHelper.LAZY_FIELD_BINARY_KEY};
+    lazyFieldNames.add(DocHelper.LARGE_LAZY_FIELD_KEY);
+    lazyFieldNames.add(DocHelper.LAZY_FIELD_KEY);
+    lazyFieldNames.add(DocHelper.LAZY_FIELD_BINARY_KEY);
+    lazyFieldNames.add(DocHelper.TEXT_FIELD_UTF2_KEY);
+
+    // Use LATENT instead of LAZY
+    SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(loadFieldNames, lazyFieldNames) {
+        public FieldSelectorResult accept(String fieldName) {
+          final FieldSelectorResult result = super.accept(fieldName);
+          if (result.equals(FieldSelectorResult.LAZY_LOAD)) {
+            return FieldSelectorResult.LATENT;
+          } else {
+            return result;
+          }
+        }
+      };
+
+    Document doc = reader.doc(0, fieldSelector);
+    assertTrue("doc is null and it shouldn't be", doc != null);
+    Fieldable field = doc.getFieldable(DocHelper.LAZY_FIELD_KEY);
+    assertTrue("field is null and it shouldn't be", field != null);
+    assertTrue("field is not lazy and it should be", field.isLazy());
+    String value = field.stringValue();
+    assertTrue("value is null and it shouldn't be", value != null);
+    assertTrue(value + " is not equal to " + DocHelper.LAZY_FIELD_TEXT, value.equals(DocHelper.LAZY_FIELD_TEXT) == true);
+    assertTrue("calling stringValue() twice should give different references", field.stringValue() != field.stringValue());
+
+    field = doc.getFieldable(DocHelper.TEXT_FIELD_1_KEY);
+    assertTrue("field is null and it shouldn't be", field != null);
+    assertTrue("Field is lazy and it should not be", field.isLazy() == false);
+    assertTrue("calling stringValue() twice should give same reference", field.stringValue() == field.stringValue());
+
+    field = doc.getFieldable(DocHelper.TEXT_FIELD_UTF1_KEY);
+    assertTrue("field is null and it shouldn't be", field != null);
+    assertTrue("Field is lazy and it should not be", field.isLazy() == false);
+    assertTrue(field.stringValue() + " is not equal to " + DocHelper.FIELD_UTF1_TEXT, field.stringValue().equals(DocHelper.FIELD_UTF1_TEXT) == true);
+    assertTrue("calling stringValue() twice should give same reference", field.stringValue() == field.stringValue());
+
+    field = doc.getFieldable(DocHelper.TEXT_FIELD_UTF2_KEY);
+    assertTrue("field is null and it shouldn't be", field != null);
+    assertTrue("Field is lazy and it should not be", field.isLazy() == true);
+    assertTrue(field.stringValue() + " is not equal to " + DocHelper.FIELD_UTF2_TEXT, field.stringValue().equals(DocHelper.FIELD_UTF2_TEXT) == true);
+    assertTrue("calling stringValue() twice should give different references", field.stringValue() != field.stringValue());
+
+    field = doc.getFieldable(DocHelper.LAZY_FIELD_BINARY_KEY);
+    assertTrue("field is null and it shouldn't be", field != null);
+    assertTrue("stringValue isn't null for lazy binary field", field.stringValue() == null);
+    assertTrue("calling binaryValue() twice should give different references", field.getBinaryValue() != field.getBinaryValue());
+
+    byte [] bytes = field.getBinaryValue();
+    assertTrue("bytes is null and it shouldn't be", bytes != null);
+    assertTrue("", DocHelper.LAZY_FIELD_BINARY_BYTES.length == bytes.length);
+    for (int i = 0; i < bytes.length; i++) {
+      assertTrue("byte[" + i + "] is mismatched", bytes[i] == DocHelper.LAZY_FIELD_BINARY_BYTES[i]);
+
+    }
+  }
+
+
+
+
   public void testLazyFieldsAfterClose() throws Exception {
     assertTrue(dir != null);
     assertTrue(fieldInfos != null);