You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2012/11/19 05:00:30 UTC

svn commit: r1411059 - in /lucene/dev/branches/lucene4547/lucene: codecs/src/java/org/apache/lucene/codecs/simpletext/ core/src/java/org/apache/lucene/codecs/ core/src/java/org/apache/lucene/index/ core/src/java/org/apache/lucene/search/

Author: rmuir
Date: Mon Nov 19 04:00:29 2012
New Revision: 1411059

URL: http://svn.apache.org/viewvc?rev=1411059&view=rev
Log:
fieldcache=caching, codec=datastructure/encoding (TODO: fix sorted/numeric too)

Modified:
    lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSimpleDocValuesFormat.java
    lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/BinaryDocValuesConsumer.java
    lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/SimpleDVConsumer.java
    lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/BinaryDocValues.java
    lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java

Modified: lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSimpleDocValuesFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSimpleDocValuesFormat.java?rev=1411059&r1=1411058&r2=1411059&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSimpleDocValuesFormat.java (original)
+++ lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSimpleDocValuesFormat.java Mon Nov 19 04:00:29 2012
@@ -29,9 +29,7 @@ import java.util.Map;
 import java.util.Set;
 
 import org.apache.lucene.codecs.BinaryDocValuesConsumer;
-import org.apache.lucene.codecs.DocValuesArraySource;
 import org.apache.lucene.codecs.NumericDocValuesConsumer;
-import org.apache.lucene.codecs.PerDocProducer;
 import org.apache.lucene.codecs.SimpleDVConsumer;
 import org.apache.lucene.codecs.SimpleDVProducer;
 import org.apache.lucene.codecs.SimpleDocValuesFormat;
@@ -54,7 +52,6 @@ import org.apache.lucene.store.IndexOutp
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.StringHelper;
-import org.apache.lucene.util.packed.PackedInts;
 
 
 /**
@@ -70,6 +67,7 @@ public class SimpleTextSimpleDocValuesFo
   final static BytesRef MINVALUE = new BytesRef("  minvalue ");
   final static BytesRef PATTERN  = new BytesRef("  pattern ");
   // used for bytes
+  final static BytesRef FIXEDLENGTH = new BytesRef("  fixedlength ");
   final static BytesRef MAXLENGTH = new BytesRef("  maxlength ");
   final static BytesRef LENGTH = new BytesRef("length ");
   // used for sorted bytes
@@ -103,6 +101,7 @@ public class SimpleTextSimpleDocValuesFo
    *  for bytes this is also a "fixed-width" file, for example:
    *  <pre>
    *  field myField
+   *    fixedlength false
    *    maxlength 8
    *    pattern 0
    *  length 6
@@ -142,8 +141,7 @@ public class SimpleTextSimpleDocValuesFo
   static class SimpleTextDocValuesWriter extends SimpleDVConsumer {
     final IndexOutput data;
     final BytesRef scratch = new BytesRef();
-
-    final int numDocs; // for asserting
+    final int numDocs;
     private final Set<String> fieldsSeen = new HashSet<String>(); // for asserting
     
     SimpleTextDocValuesWriter(Directory dir, SegmentInfo si, IOContext context) throws IOException {
@@ -215,6 +213,10 @@ public class SimpleTextSimpleDocValuesFo
     public BinaryDocValuesConsumer addBinaryField(FieldInfo field, boolean fixedLength, final int maxLength) throws IOException {
       assert fieldSeen(field.name);
       writeFieldEntry(field);
+      // write fixedlength
+      SimpleTextUtil.write(data, FIXEDLENGTH);
+      SimpleTextUtil.write(data, Boolean.toString(fixedLength), scratch);
+      SimpleTextUtil.writeNewline(data);
       // write maxLength
       SimpleTextUtil.write(data, MAXLENGTH);
       SimpleTextUtil.write(data, Integer.toString(maxLength), scratch);
@@ -377,6 +379,7 @@ public class SimpleTextSimpleDocValuesFo
       String pattern;
       String ordPattern;
       int maxLength;
+      boolean fixedLength;
       long minValue;
       int numValues;
     };
@@ -420,6 +423,9 @@ public class SimpleTextSimpleDocValuesFo
           data.seek(data.getFilePointer() + (1+field.pattern.length()) * maxDoc);
         } else if (DocValues.isBytes(dvType)) {
           readLine();
+          assert startsWith(FIXEDLENGTH);
+          field.fixedLength = Boolean.parseBoolean(stripPrefix(FIXEDLENGTH));
+          readLine();
           assert startsWith(MAXLENGTH);
           field.maxLength = Integer.parseInt(stripPrefix(MAXLENGTH));
           readLine();
@@ -529,6 +535,21 @@ public class SimpleTextSimpleDocValuesFo
             throw new RuntimeException(ioe);
           }
         }
+
+        @Override
+        public int size() {
+          return maxDoc;
+        }
+
+        @Override
+        public boolean isFixedLength() {
+          return field.fixedLength;
+        }
+
+        @Override
+        public int maxLength() {
+          return field.maxLength;
+        }
       };
     }
 

Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/BinaryDocValuesConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/BinaryDocValuesConsumer.java?rev=1411059&r1=1411058&r2=1411059&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/BinaryDocValuesConsumer.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/BinaryDocValuesConsumer.java Mon Nov 19 04:00:29 2012
@@ -21,8 +21,6 @@ import java.io.IOException;
 
 import org.apache.lucene.index.AtomicReader;
 import org.apache.lucene.index.BinaryDocValues;
-import org.apache.lucene.index.DocValues.Source;
-import org.apache.lucene.index.FieldInfos;
 import org.apache.lucene.index.MergeState;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
@@ -40,7 +38,7 @@ public abstract class BinaryDocValuesCon
 
       BinaryDocValues source = reader.getBinaryDocValues(mergeState.fieldInfo.name);
       if (source == null) {
-        source = BinaryDocValues.DEFAULT;
+        source = new BinaryDocValues.EMPTY(maxDoc);
       }
 
       for (int i = 0; i < maxDoc; i++) {

Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/SimpleDVConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/SimpleDVConsumer.java?rev=1411059&r1=1411058&r2=1411059&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/SimpleDVConsumer.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/SimpleDVConsumer.java Mon Nov 19 04:00:29 2012
@@ -104,6 +104,7 @@ public abstract class SimpleDVConsumer i
   // dead simple impl: codec can optimize
   protected void mergeBinaryField(MergeState mergeState) throws IOException {
     // first compute fixedLength and maxLength of live ones to be merged.
+    // nocommit: messy, and can be simplified by using docValues.maxLength/fixedLength in many cases.
     boolean fixedLength = true;
     int maxLength = -1;
     BytesRef bytes = new BytesRef();
@@ -112,7 +113,7 @@ public abstract class SimpleDVConsumer i
       final Bits liveDocs = reader.getLiveDocs();
       BinaryDocValues docValues = reader.getBinaryDocValues(mergeState.fieldInfo.name);
       if (docValues == null) {
-        docValues = BinaryDocValues.DEFAULT;
+        docValues = new BinaryDocValues.EMPTY(maxDoc);
       }
       for (int i = 0; i < maxDoc; i++) {
         if (liveDocs == null || liveDocs.get(i)) {

Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/BinaryDocValues.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/BinaryDocValues.java?rev=1411059&r1=1411058&r2=1411059&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/BinaryDocValues.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/BinaryDocValues.java Mon Nov 19 04:00:29 2012
@@ -23,11 +23,77 @@ import org.apache.lucene.util.BytesRef;
 public abstract class BinaryDocValues {
   // nocommit throws IOE or not?
   public abstract void get(int docID, BytesRef result);
+  
+  public abstract int size();
+  
+  public abstract boolean isFixedLength();
+  public abstract int maxLength();
+  
+  public BinaryDocValues newRAMInstance() {
+    // TODO: optimize this default impl with e.g. isFixedLength/maxLength and so on
+    // nocommit used packed ints/pagedbytes and so on
+    final int maxDoc = size();
+    final int maxLength = maxLength();
+    final boolean fixedLength = isFixedLength();
+    final byte[][] values = new byte[maxDoc][];
+    BytesRef scratch = new BytesRef();
+    for(int docID=0;docID<maxDoc;docID++) {
+      get(docID, scratch);
+      values[docID] = new byte[scratch.length];
+      System.arraycopy(scratch.bytes, scratch.offset, values[docID], 0, scratch.length);
+    }
+    
+    return new BinaryDocValues() {
 
-  public static final BinaryDocValues DEFAULT = new BinaryDocValues() {
       @Override
-      public void get(int docID, BytesRef ret) {
-        ret.length = 0;
+      public void get(int docID, BytesRef result) {
+        result.bytes = values[docID];
+        result.offset = 0;
+        result.length = result.bytes.length;
+      }
+
+      @Override
+      public int size() {
+        return maxDoc;
+      }
+
+      @Override
+      public boolean isFixedLength() {
+        return fixedLength;
+      }
+
+      @Override
+      public int maxLength() {
+        return maxLength;
       }
     };
+  }
+  
+  public static class EMPTY extends BinaryDocValues {
+    private final int size;
+    
+    public EMPTY(int size) {
+      this.size = size;
+    }
+    
+    @Override
+    public void get(int docID, BytesRef result) {
+      result.length = 0;
+    }
+
+    @Override
+    public int size() {
+      return size;
+    }
+
+    @Override
+    public boolean isFixedLength() {
+      return true;
+    }
+
+    @Override
+    public int maxLength() {
+      return 0;
+    }
+  };
 }

Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java?rev=1411059&r1=1411058&r2=1411059&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java Mon Nov 19 04:00:29 2012
@@ -1334,22 +1334,15 @@ class FieldCacheImpl implements FieldCac
     protected Object createValue(AtomicReader reader, CacheKey key, boolean setDocsWithField /* ignored */)
         throws IOException {
 
-      final int maxDoc = reader.maxDoc();
       BinaryDocValues valuesIn = reader.getBinaryDocValues(key.field);
       if (valuesIn != null) {
-        // nocommit used packed ints like below!
-        final byte[][] values = new byte[maxDoc][];
-        BytesRef scratch = new BytesRef();
-        for(int docID=0;docID<maxDoc;docID++) {
-          valuesIn.get(docID, scratch);
-          values[docID] = new byte[scratch.length];
-          System.arraycopy(scratch.bytes, scratch.offset, values[docID], 0, scratch.length);
-        }
-
+        final BinaryDocValues ramInstance = valuesIn.newRAMInstance();
         return new DocTerms() {
+
           @Override
-          public int size() {
-            return maxDoc;
+          public BytesRef getTerm(int docID, BytesRef ret) {
+            ramInstance.get(docID, ret);
+            return ret;
           }
 
           @Override
@@ -1359,15 +1352,12 @@ class FieldCacheImpl implements FieldCac
           }
 
           @Override
-          public BytesRef getTerm(int docID, BytesRef ret) {
-            ret.bytes = values[docID];
-            ret.length = ret.bytes.length;
-            ret.offset = 0;
-            return ret;
-          }      
+          public int size() {
+            return ramInstance.size();
+          }     
         };
       } else {
-
+        final int maxDoc = reader.maxDoc();
         Terms terms = reader.terms(key.field);
 
         final float acceptableOverheadRatio = ((Float) key.custom).floatValue();