You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by si...@apache.org on 2010/11/16 19:34:16 UTC

svn commit: r1035731 - in /lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index: ./ values/

Author: simonw
Date: Tue Nov 16 18:34:16 2010
New Revision: 1035731

URL: http://svn.apache.org/viewvc?rev=1035731&view=rev
Log:
Use pagedBytes in DocValues Byte variants instead of direct byte arrays

Modified:
    lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java
    lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/Bytes.java
    lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java
    lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java
    lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java
    lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java
    lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java
    lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java?rev=1035731&r1=1035730&r2=1035731&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java Tue Nov 16 18:34:16 2010
@@ -601,7 +601,7 @@ final class DocumentsWriter {
   }
 
   synchronized private void initFlushState(boolean onlyDocStore) {
-    initSegmentName(onlyDocStore); // nocommit - this does not work for docValues
+    initSegmentName(onlyDocStore);
     final SegmentCodecs info = SegmentCodecs.build(docFieldProcessor.fieldInfos, writer.codecs);
     flushState = new SegmentWriteState(infoStream, directory, segment, docFieldProcessor.fieldInfos,
                                        docStoreSegment, numDocsInRAM, numDocsInStore, writer.getConfig().getTermIndexInterval(), info);

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/Bytes.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/Bytes.java?rev=1035731&r1=1035730&r2=1035731&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/Bytes.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/Bytes.java Tue Nov 16 18:34:16 2010
@@ -32,6 +32,7 @@ import org.apache.lucene.store.IndexOutp
 import org.apache.lucene.util.ByteBlockPool;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.CodecUtil;
+import org.apache.lucene.util.PagedBytes;
 
 /**
  * Provides concrete Writer/Reader impls for byte[] value per document. There
@@ -46,7 +47,7 @@ import org.apache.lucene.util.CodecUtil;
  * NOTE: Each byte[] must be <= 32768 bytes in length
  * </p>
  */
-//TODO - add bulk copy where possible
+// TODO - add bulk copy where possible
 public final class Bytes {
 
   // don't instantiate!
@@ -57,7 +58,6 @@ public final class Bytes {
     STRAIGHT, DEREF, SORTED
   };
 
-  
   // TODO -- i shouldn't have to specify fixed? can
   // track itself & do the write thing at write time?
   public static Writer getWriter(Directory dir, String id, Mode mode,
@@ -124,29 +124,65 @@ public final class Bytes {
     protected final IndexInput datIn;
     protected final IndexInput idxIn;
     protected final BytesRef defaultValue = new BytesRef();
-
-    protected BytesBaseSource(IndexInput datIn, IndexInput idxIn) {
+    protected final static int PAGED_BYTES_BITS = 15;
+    private final PagedBytes pagedBytes;
+    protected final PagedBytes.Reader data;
+    protected final long totalLengthInBytes;
+
+    protected BytesBaseSource(IndexInput datIn, IndexInput idxIn, PagedBytes pagedBytes, long bytesToRead)
+        throws IOException {
+      assert bytesToRead <= datIn.length() : " file size is less than the expected size diff: " + (bytesToRead - datIn.length()) + " pos: " + datIn.getFilePointer();
       this.datIn = datIn;
+      this.totalLengthInBytes = bytesToRead;
+      this.pagedBytes = pagedBytes;
+      this.pagedBytes.copy(datIn, bytesToRead);
+      data = pagedBytes.freeze(true);
       this.idxIn = idxIn;
     }
 
     public void close() throws IOException {
-      if (datIn != null)
-        datIn.close();
-      if (idxIn != null) // if straight
-        idxIn.close();
-
+      data.close();
+      try {
+        if (datIn != null)
+          datIn.close();
+      } finally {
+        if (idxIn != null) // if straight
+          idxIn.close();
+      }
+    }
+    public long ramBytesUsed() {
+      return 0; //TOODO
     }
+
   }
 
   static abstract class BytesBaseSortedSource extends SortedSource {
     protected final IndexInput datIn;
     protected final IndexInput idxIn;
     protected final BytesRef defaultValue = new BytesRef();
+    protected final static int PAGED_BYTES_BITS = 15;
+    private final PagedBytes pagedBytes;
+    protected final PagedBytes.Reader data;
+    protected final BytesRef bytesRef = new BytesRef();
+    protected final LookupResult lookupResult = new LookupResult();
+    private final Comparator<BytesRef> comp;
+
 
-    protected BytesBaseSortedSource(IndexInput datIn, IndexInput idxIn) {
+    protected BytesBaseSortedSource(IndexInput datIn, IndexInput idxIn, Comparator<BytesRef> comp, PagedBytes pagedBytes, long bytesToRead) throws IOException {
+      assert bytesToRead <= datIn.length() : " file size is less than the expected size diff: " + (bytesToRead - datIn.length()) + " pos: " + datIn.getFilePointer();
       this.datIn = datIn;
+      this.pagedBytes = pagedBytes;
+      this.pagedBytes.copy(datIn, bytesToRead);
+      data = pagedBytes.freeze(true);
       this.idxIn = idxIn;
+      this.comp = comp == null ? BytesRef.getUTF8SortedAsUnicodeComparator()
+          : comp;
+      
+    }
+    
+    @Override
+    public BytesRef getByOrd(int ord) {
+      return ord == 0 ? defaultValue : deref(--ord);
     }
 
     public void close() throws IOException {
@@ -154,13 +190,35 @@ public final class Bytes {
         datIn.close();
       if (idxIn != null) // if straight
         idxIn.close();
+    }
+    
+    protected abstract BytesRef deref(int ord);
 
+    
+    protected LookupResult binarySearch(BytesRef b, int low, int high) {
+      while (low <= high) {
+        int mid = (low + high) >>> 1;
+        deref(mid);
+        final int cmp = comp.compare(bytesRef, b);
+        if (cmp < 0) {
+          low = mid + 1;
+        } else if (cmp > 0) {
+          high = mid - 1;
+        } else {
+          lookupResult.ord = mid + 1;
+          lookupResult.found = true;
+          return lookupResult;
+        }
+      }
+      assert comp.compare(bytesRef, b) != 0;
+      lookupResult.ord = low;
+      lookupResult.found = false;
+      return lookupResult;
     }
   }
 
   static abstract class BytesWriterBase extends Writer {
 
-
     private final Directory dir;
     private final String id;
     protected IndexOutput idxOut;
@@ -172,7 +230,8 @@ public final class Bytes {
     protected final AtomicLong bytesUsed;
 
     protected BytesWriterBase(Directory dir, String id, String codecName,
-        int version, boolean initIndex, boolean initData, ByteBlockPool pool, AtomicLong bytesUsed) throws IOException {
+        int version, boolean initIndex, boolean initData, ByteBlockPool pool,
+        AtomicLong bytesUsed) throws IOException {
       this.dir = dir;
       this.id = id;
       this.codecName = codecName;
@@ -214,7 +273,7 @@ public final class Bytes {
         datOut.close();
       if (idxOut != null)
         idxOut.close();
-      if(pool != null)
+      if (pool != null)
         pool.reset();
     }
 
@@ -228,11 +287,11 @@ public final class Bytes {
       bytesRef = attr.bytes();
       assert bytesRef != null;
     }
-    
+
     @Override
     public void add(int docID, ValuesAttribute attr) throws IOException {
       final BytesRef ref;
-      if((ref = attr.bytes()) != null) {
+      if ((ref = attr.bytes()) != null) {
         add(docID, ref);
       }
     }
@@ -242,9 +301,10 @@ public final class Bytes {
       assert datOut != null;
       files.add(IndexFileNames.segmentFileName(id, "",
           IndexFileNames.CSF_DATA_EXTENSION));
-      if(idxOut != null) { // called after flush - so this must be initialized if needed or present
+      if (idxOut != null) { // called after flush - so this must be initialized
+                            // if needed or present
         final String idxFile = IndexFileNames.segmentFileName(id, "",
-          IndexFileNames.CSF_INDEX_EXTENSION);
+            IndexFileNames.CSF_INDEX_EXTENSION);
         files.add(idxFile);
       }
     }
@@ -254,7 +314,7 @@ public final class Bytes {
    * Opens all necessary files, but does not read any data in until you call
    * {@link #load}.
    */
-   static abstract class BytesReaderBase extends DocValues {
+  static abstract class BytesReaderBase extends DocValues {
     protected final IndexInput idxIn;
     protected final IndexInput datIn;
     protected final int version;
@@ -276,6 +336,7 @@ public final class Bytes {
       } else {
         idxIn = null;
       }
+
     }
 
     protected final IndexInput cloneData() {
@@ -283,7 +344,8 @@ public final class Bytes {
       return (IndexInput) datIn.clone();
     }
 
-    protected final IndexInput cloneIndex() { // TODO assert here for null rather than return null
+    protected final IndexInput cloneIndex() { // TODO assert here for null
+                                              // rather than return null
       return idxIn == null ? null : (IndexInput) idxIn.clone();
     }
 

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java?rev=1035731&r1=1035730&r2=1035731&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java Tue Nov 16 18:34:16 2010
@@ -30,6 +30,7 @@ import org.apache.lucene.util.AttributeS
 import org.apache.lucene.util.ByteBlockPool;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefHash;
+import org.apache.lucene.util.PagedBytes;
 import org.apache.lucene.util.RamUsageEstimator;
 import org.apache.lucene.util.ByteBlockPool.Allocator;
 import org.apache.lucene.util.ByteBlockPool.DirectAllocator;
@@ -133,27 +134,22 @@ class FixedDerefBytesImpl {
 
     @Override
     public Source load() throws IOException {
-      return new Source(cloneData(), cloneIndex(), size);
+      final IndexInput index = cloneIndex();
+      return new Source(cloneData(), index , size, index.readInt());
     }
 
     private static class Source extends BytesBaseSource {
-      // TODO: paged data or mmap?
-      private final byte[] data;
       private final BytesRef bytesRef = new BytesRef();
       private final PackedInts.Reader index;
-      private final int numValue;
       private final int size;
+      private final int numValues;
 
-      protected Source(IndexInput datIn, IndexInput idxIn, int size)
+      protected Source(IndexInput datIn, IndexInput idxIn, int size, int numValues)
           throws IOException {
-        super(datIn, idxIn);
+        super(datIn, idxIn, new PagedBytes(PAGED_BYTES_BITS), size * numValues);
         this.size = size;
-        numValue = idxIn.readInt();
-        data = new byte[size * numValue];
-        datIn.readBytes(data, 0, size * numValue);
+        this.numValues = numValues;
         index = PackedInts.getReader(idxIn);
-        bytesRef.bytes = data;
-        bytesRef.length = size;
       }
 
       @Override
@@ -162,22 +158,13 @@ class FixedDerefBytesImpl {
         if (id == 0) {
           return defaultValue;
         }
-        bytesRef.offset = ((id - 1) * size);
-        return bytesRef;
-      }
-
-      public long ramBytesUsed() {
-        // TODO(simonw): move ram calculation to PackedInts?!
-        return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER
-            + data.length
-            + (RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + index
-                .getBitsPerValue()
-                * index.size());
+        return data.fill(bytesRef, ((id - 1) * size), size);
       }
 
+      
       @Override
       public int getValueCount() {
-        return numValue;
+        return numValues;
       }
     }
 

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java?rev=1035731&r1=1035730&r2=1035731&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java Tue Nov 16 18:34:16 2010
@@ -33,6 +33,7 @@ import org.apache.lucene.util.ByteBlockP
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefHash;
 import org.apache.lucene.util.CodecUtil;
+import org.apache.lucene.util.PagedBytes;
 import org.apache.lucene.util.RamUsageEstimator;
 import org.apache.lucene.util.ByteBlockPool.Allocator;
 import org.apache.lucene.util.ByteBlockPool.DirectAllocator;
@@ -52,88 +53,93 @@ class FixedSortedBytesImpl {
     private int[] docToEntry;
     private final Comparator<BytesRef> comp;
 
-   
     private final BytesRefHash hash = new BytesRefHash(pool);
-    
-    public Writer(Directory dir, String id,  Comparator<BytesRef> comp) throws IOException {
+
+    public Writer(Directory dir, String id, Comparator<BytesRef> comp)
+        throws IOException {
       this(dir, id, comp, new DirectAllocator(ByteBlockPool.BYTE_BLOCK_SIZE),
           new AtomicLong());
     }
 
-    public Writer(Directory dir, String id,  Comparator<BytesRef> comp, Allocator allocator, AtomicLong bytesUsed) throws IOException {
-      super(dir, id, CODEC_NAME, VERSION_CURRENT, false, false, new ByteBlockPool(allocator), bytesUsed);
+    public Writer(Directory dir, String id, Comparator<BytesRef> comp,
+        Allocator allocator, AtomicLong bytesUsed) throws IOException {
+      super(dir, id, CODEC_NAME, VERSION_CURRENT, false, false,
+          new ByteBlockPool(allocator), bytesUsed);
       docToEntry = new int[1];
-//      docToEntry[0] = -1;
+      // docToEntry[0] = -1;
       bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT);
       this.comp = comp;
     }
 
     @Override
     synchronized public void add(int docID, BytesRef bytes) throws IOException {
-      if(bytes.length == 0)
+      if (bytes.length == 0)
         return; // default - skip it
       if (size == -1) {
         size = bytes.length;
         initDataOut();
         datOut.writeInt(size);
       } else if (bytes.length != size) {
-        throw new IllegalArgumentException("expected bytes size=" + size + " but got " + bytes.length);
+        throw new IllegalArgumentException("expected bytes size=" + size
+            + " but got " + bytes.length);
       }
       if (docID >= docToEntry.length) {
         int[] newArray = new int[ArrayUtil.oversize(1 + docID,
             RamUsageEstimator.NUM_BYTES_INT)];
         System.arraycopy(docToEntry, 0, newArray, 0, docToEntry.length);
-//        Arrays.fill(newArray, docToEntry.length, newArray.length, -1);
+        // Arrays.fill(newArray, docToEntry.length, newArray.length, -1);
 
-        bytesUsed.addAndGet((newArray.length - docToEntry.length) * RamUsageEstimator.NUM_BYTES_INT);
+        bytesUsed.addAndGet((newArray.length - docToEntry.length)
+            * RamUsageEstimator.NUM_BYTES_INT);
         docToEntry = newArray;
       }
       int e = hash.add(bytes);
-      docToEntry[docID] = 1+(e < 0? (-e)-1: e);
+      docToEntry[docID] = 1 + (e < 0 ? (-e) - 1 : e);
     }
 
-
     // Important that we get docCount, in case there were
     // some last docs that we didn't see
     @Override
     synchronized public void finish(int docCount) throws IOException {
-      if(datOut == null)// no data added
+      if (datOut == null)// no data added
         return;
       initIndexOut();
       final int[] sortedEntries = hash.sort(comp);
       final int count = hash.size();
-      int[] address= new int[count];
+      int[] address = new int[count];
       // first dump bytes data, recording address as we go
-      for(int i=0;i<count;i++) {
+      for (int i = 0; i < count; i++) {
         final int e = sortedEntries[i];
         final BytesRef bytes = hash.get(e, new BytesRef());
         assert bytes.length == size;
         datOut.writeBytes(bytes.bytes, bytes.offset, bytes.length);
-        address[e] = 1+i;
+        address[e] = 1 + i;
       }
 
       idxOut.writeInt(count);
 
       // next write index
-      PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount, PackedInts.bitsRequired(count));
+      PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount, PackedInts
+          .bitsRequired(count));
       final int limit;
       if (docCount > docToEntry.length) {
         limit = docToEntry.length;
       } else {
         limit = docCount;
       }
-      for(int i=0;i<limit;i++) {
+      for (int i = 0; i < limit; i++) {
         final int e = docToEntry[i];
         if (e == 0) {
           // null is encoded as zero
           w.add(0);
         } else {
-          assert e > 0 && e <= count: "index must  0 > && <= " + count + " was: " + e;
-          w.add(address[e-1]);
+          assert e > 0 && e <= count : "index must  0 > && <= " + count
+              + " was: " + e;
+          w.add(address[e - 1]);
         }
       }
 
-      for(int i=limit;i<docCount;i++) {
+      for (int i = limit; i < docCount; i++) {
         w.add(0);
       }
       w.finish();
@@ -144,56 +150,45 @@ class FixedSortedBytesImpl {
       docToEntry = null;
     }
   }
-  
+
   public static class Reader extends BytesReaderBase {
-    // nocommit -- allow/require byte[] paging here?
     private final int size;
 
-    public Reader(Directory dir, String id, int maxDoc)
-      throws IOException {
+    public Reader(Directory dir, String id, int maxDoc) throws IOException {
       super(dir, id, CODEC_NAME, VERSION_START, true);
       size = datIn.readInt();
     }
 
     @Override
-    public org.apache.lucene.index.values.DocValues.Source load() throws IOException {
+    public org.apache.lucene.index.values.DocValues.Source load()
+        throws IOException {
       return loadSorted(null);
     }
 
     @Override
-    public SortedSource loadSorted(Comparator<BytesRef> comp) throws IOException {
-      return new Source(cloneData(), cloneIndex(), size, comp);
+    public SortedSource loadSorted(Comparator<BytesRef> comp)
+        throws IOException {
+      final IndexInput idxInput = cloneIndex();
+      final IndexInput datInput = cloneData();
+      datInput.seek(CodecUtil.headerLength(CODEC_NAME) + 4);
+      idxInput.seek(CodecUtil.headerLength(CODEC_NAME));
+      return new Source(datInput, idxInput, size, idxInput.readInt(), comp);
     }
 
     private static class Source extends BytesBaseSortedSource {
 
-      // TODO: paged data
-      private final byte[] data;
-      private final BytesRef bytesRef = new BytesRef();
       private final PackedInts.Reader index;
-      private final LookupResult lookupResult = new LookupResult();
       private final int numValue;
-      private final Comparator<BytesRef> comp;
       private final int size;
 
-      public Source(IndexInput datIn, IndexInput idxIn, int size, Comparator<BytesRef> comp) throws IOException {
-        super(datIn, idxIn);
+      public Source(IndexInput datIn, IndexInput idxIn, int size, int numValues,
+          Comparator<BytesRef> comp) throws IOException {
+        super(datIn, idxIn, comp, new PagedBytes(PAGED_BYTES_BITS), size*numValues );
         this.size = size;
-        datIn.seek(CodecUtil.headerLength(CODEC_NAME) + 4);
-        idxIn.seek(CodecUtil.headerLength(CODEC_NAME));
-
-        numValue = idxIn.readInt();
-        data = new byte[size*numValue];
-        datIn.readBytes(data, 0, size*numValue);
-        datIn.close();
-
+        this.numValue = numValues;
         index = PackedInts.getReader(idxIn);
-        idxIn.close(); // do we need to close that here?
 
-        bytesRef.bytes = data;
         bytesRef.length = size;
-        // default byte sort order 
-        this.comp = comp==null?BytesRef.getUTF8SortedAsUnicodeComparator():comp;
       }
 
       @Override
@@ -202,59 +197,37 @@ class FixedSortedBytesImpl {
       }
 
       @Override
-      public BytesRef getByOrd(int ord) {
-        if (ord == 0) {
-          return defaultValue;
-        } else {
-          bytesRef.offset = ((ord-1) * size);
-          return bytesRef;
-        }
-      }
-
-      @Override
       public LookupResult getByValue(BytesRef bytes) {
-        return binarySearch(bytes, 0, numValue-1);
+        return binarySearch(bytes, 0, numValue - 1);
       }
 
       public long ramBytesUsed() {
         // TODO(simonw): move ram calcultation to PackedInts?
-        return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + data.length +
-            (RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + index.getBitsPerValue() * index.size());
+        return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER
+            + size
+            * numValue
+            + (RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + index
+                .getBitsPerValue()
+                * index.size());
       }
 
       @Override
       public int getValueCount() {
         return numValue;
       }
-
-      private LookupResult binarySearch(BytesRef b, int low, int high) {
-        
-        while (low <= high) {
-          int mid = (low + high) >>> 1;
-          bytesRef.offset = mid * size;
-          int cmp = comp.compare(bytesRef, b);
-          if (cmp < 0) {
-            low = mid + 1;
-          } else if (cmp > 0) {
-            high = mid - 1;
-          } else {
-            lookupResult.ord = mid+1;
-            lookupResult.found = true;
-            return lookupResult;
-          }
-        }
-        lookupResult.ord = low;
-        lookupResult.found = false;
-        return lookupResult;
+      @Override
+      protected BytesRef deref(int ord) {
+        return data.fill(bytesRef, (ord* size), size);
       }
     }
 
     @Override
     public ValuesEnum getEnum(AttributeSource source) throws IOException {
-        // do unsorted
-        return new DerefBytesEnum(source, cloneData(), cloneIndex(), CODEC_NAME, size);
+      // do unsorted
+      return new DerefBytesEnum(source, cloneData(), cloneIndex(), CODEC_NAME,
+          size);
     }
-    
+
     @Override
     public Values type() {
       return Values.BYTES_FIXED_SORTED;

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java?rev=1035731&r1=1035730&r2=1035731&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java Tue Nov 16 18:34:16 2010
@@ -26,7 +26,7 @@ import org.apache.lucene.store.Directory
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.RamUsageEstimator;
+import org.apache.lucene.util.PagedBytes;
 
 // Simplest storage: stores fixed length byte[] per
 // document, with no dedup and no sorting.
@@ -133,30 +133,17 @@ class FixedStraightBytesImpl {
     }
 
     private static class Source extends BytesBaseSource {
-      // TODO: paged data
-      private final byte[] data;
       private final BytesRef bytesRef = new BytesRef();
       private final int size;
 
       public Source(IndexInput datIn, IndexInput idxIn, int size, int maxDoc) throws IOException {
-        super(datIn, idxIn);
+        super(datIn, idxIn, new PagedBytes(PAGED_BYTES_BITS), size*maxDoc);
         this.size = size;
-        final int sizeInBytes = size*maxDoc;
-        data = new byte[sizeInBytes];
-        assert data.length <= datIn.length() : " file size is less than the expected size diff: " + (data.length - datIn.length()) + " size: " + size + " maxDoc " + maxDoc + " pos: " + datIn.getFilePointer();
-        datIn.readBytes(data, 0, sizeInBytes);
-        bytesRef.bytes = data;
-        bytesRef.length = size;
       }
-
+      
       @Override
-      public BytesRef getBytes(int docID) {
-        bytesRef.offset = docID * size;
-        return bytesRef;
-      }
-
-      public long ramBytesUsed() {
-        return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + data.length;
+      public BytesRef getBytes(int docID) { 
+        return data.fill(bytesRef, docID * size, size);
       }
 
       @Override

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java?rev=1035731&r1=1035730&r2=1035731&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java Tue Nov 16 18:34:16 2010
@@ -33,6 +33,7 @@ import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefHash;
 import org.apache.lucene.util.CodecUtil;
 import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.PagedBytes;
 import org.apache.lucene.util.RamUsageEstimator;
 import org.apache.lucene.util.ByteBlockPool.Allocator;
 import org.apache.lucene.util.ByteBlockPool.DirectAllocator;
@@ -165,44 +166,31 @@ class VarDerefBytesImpl {
 
     @Override
     public Source load() throws IOException {
-      return new Source(cloneData(), cloneIndex());
+      final IndexInput data = cloneData();
+      final IndexInput index = cloneIndex();
+      data.seek(CodecUtil.headerLength(CODEC_NAME));
+      index.seek(CodecUtil.headerLength(CODEC_NAME));
+      final long totalBytes = index.readInt(); // should be long
+      return new Source(data,index, totalBytes);
     }
 
     private static class Source extends BytesBaseSource {
-      // TODO: paged data
-      private final byte[] data;
       private final BytesRef bytesRef = new BytesRef();
       private final PackedInts.Reader index;
 
-      public Source(IndexInput datIn, IndexInput idxIn) throws IOException {
-        super(datIn, idxIn);
-        datIn.seek(CodecUtil.headerLength(CODEC_NAME));
-        idxIn.seek(CodecUtil.headerLength(CODEC_NAME));
-
-        final int totBytes = idxIn.readInt();
-        data = new byte[totBytes];
-        datIn.readBytes(data, 0, totBytes);
-
+      public Source(IndexInput datIn, IndexInput idxIn, long totalBytes) throws IOException {
+        super(datIn, idxIn, new PagedBytes(PAGED_BYTES_BITS), totalBytes);
         index = PackedInts.getReader(idxIn);
-        bytesRef.bytes = data;
       }
 
       @Override
       public BytesRef getBytes(int docID) {
-        int address = (int) index.get(docID);
+        long address =  index.get(docID);
         if (address == 0) {
           assert defaultValue.length == 0: " default value manipulated";
           return defaultValue;
         } else {
-          address--;
-          if ((data[address] & 0x80) == 0) {
-            // length is 1 byte
-            bytesRef.length = data[address];
-            bytesRef.offset = address+1;
-          } else {
-            bytesRef.length = (data[address]&0x7f) + ((data[address+1]&0xff)<<7);
-            bytesRef.offset = address+2;
-          }
+          data.fillUsingLengthPrefix2(bytesRef, --address);
           return bytesRef;
         }
       }
@@ -211,12 +199,6 @@ class VarDerefBytesImpl {
       public int getValueCount() {
         return index.size();
       }
-
-      public long ramBytesUsed() {
-        // TODO(simonw): move address ram usage to PackedInts? 
-        return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + data.length + 
-        (RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + index.getBitsPerValue() * index.size());
-      }
     }
 
     @Override

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java?rev=1035731&r1=1035730&r2=1035731&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/VarSortedBytesImpl.java Tue Nov 16 18:34:16 2010
@@ -32,6 +32,7 @@ import org.apache.lucene.util.AttributeS
 import org.apache.lucene.util.ByteBlockPool;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefHash;
+import org.apache.lucene.util.PagedBytes;
 import org.apache.lucene.util.RamUsageEstimator;
 import org.apache.lucene.util.ByteBlockPool.Allocator;
 import org.apache.lucene.util.ByteBlockPool.DirectAllocator;
@@ -165,42 +166,29 @@ class VarSortedBytesImpl {
     @Override
     public SortedSource loadSorted(Comparator<BytesRef> comp)
         throws IOException {
-      return new Source(cloneData(), cloneIndex(), comp);
+      IndexInput indexIn = cloneIndex();
+      return new Source(cloneData(), indexIn , comp, indexIn.readLong());
     }
 
     private static class Source extends BytesBaseSortedSource {
       // TODO: paged data
-      private final byte[] data;
-      private final BytesRef bytesRef = new BytesRef();
       private final PackedInts.Reader docToOrdIndex;
       private final PackedInts.Reader ordToOffsetIndex; // 0-based
       private final long totBytes;
       private final int valueCount;
-      private final LookupResult lookupResult = new LookupResult();
-      private final Comparator<BytesRef> comp;
 
       public Source(IndexInput datIn, IndexInput idxIn,
-          Comparator<BytesRef> comp) throws IOException {
-        super(datIn, idxIn);
-        totBytes = idxIn.readLong();
-        data = new byte[(int) totBytes];
-        datIn.readBytes(data, 0, (int) totBytes);
+          Comparator<BytesRef> comp,  long dataLength) throws IOException {
+        super(datIn, idxIn, comp, new PagedBytes(PAGED_BYTES_BITS), dataLength);
+        totBytes = dataLength;
         docToOrdIndex = PackedInts.getReader(idxIn);
         ordToOffsetIndex = PackedInts.getReader(idxIn);
         valueCount = ordToOffsetIndex.size();
-        bytesRef.bytes = data;
         // default byte sort order
-        this.comp = comp == null ? BytesRef.getUTF8SortedAsUnicodeComparator()
-            : comp;
 
       }
 
       @Override
-      public BytesRef getByOrd(int ord) {
-        return ord == 0 ? defaultValue : deref(--ord);
-      }
-
-      @Override
       public int ord(int docID) {
         return (int) docToOrdIndex.get(docID);
       }
@@ -213,7 +201,7 @@ class VarSortedBytesImpl {
       public long ramBytesUsed() {
         // TODO(simonw): move ram usage to PackedInts?
         return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER
-            + data.length
+            + totBytes
             + (RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + docToOrdIndex
                 .getBitsPerValue()
                 * docToOrdIndex.getBitsPerValue())
@@ -228,40 +216,21 @@ class VarSortedBytesImpl {
       }
 
       // ord is 0-based
-      private BytesRef deref(int ord) {
-        bytesRef.offset = (int) ordToOffsetIndex.get(ord);
+      @Override
+      protected BytesRef deref(int ord) {
+        
         final long nextOffset;
         if (ord == valueCount - 1) {
           nextOffset = totBytes;
         } else {
           nextOffset = ordToOffsetIndex.get(1 + ord);
         }
-        bytesRef.length = (int) (nextOffset - bytesRef.offset);
+        final long offset = ordToOffsetIndex.get(ord);
+        data.fill(bytesRef, offset , (int)(nextOffset - offset));
         return bytesRef;
       }
 
-      // TODO: share w/ FixedSortedBytesValues?
-      private LookupResult binarySearch(BytesRef b, int low, int high) {
-
-        while (low <= high) {
-          int mid = (low + high) >>> 1;
-          deref(mid);
-          final int cmp = comp.compare(bytesRef, b);
-          if (cmp < 0) {
-            low = mid + 1;
-          } else if (cmp > 0) {
-            high = mid - 1;
-          } else {
-            lookupResult.ord = mid + 1;
-            lookupResult.found = true;
-            return lookupResult;
-          }
-        }
-        assert comp.compare(bytesRef, b) != 0;
-        lookupResult.ord = low;
-        lookupResult.found = false;
-        return lookupResult;
-      }
+      
     }
 
     @Override

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java?rev=1035731&r1=1035730&r2=1035731&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/VarStraightBytesImpl.java Tue Nov 16 18:34:16 2010
@@ -28,6 +28,7 @@ import org.apache.lucene.store.IndexInpu
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.PagedBytes;
 import org.apache.lucene.util.RamUsageEstimator;
 import org.apache.lucene.util.packed.PackedInts;
 
@@ -44,8 +45,9 @@ class VarStraightBytesImpl {
     // start at -1 if the first added value is > 0
     private int lastDocID = -1;
     private int[] docToAddress;
-    
-    public Writer(Directory dir, String id, AtomicLong bytesUsed) throws IOException {
+
+    public Writer(Directory dir, String id, AtomicLong bytesUsed)
+        throws IOException {
       super(dir, id, CODEC_NAME, VERSION_CURRENT, false, false, null, bytesUsed);
       docToAddress = new int[1];
       bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT);
@@ -60,7 +62,8 @@ class VarStraightBytesImpl {
       if (docID >= docToAddress.length) {
         int oldSize = docToAddress.length;
         docToAddress = ArrayUtil.grow(docToAddress, 1 + docID);
-        bytesUsed.addAndGet(-(docToAddress.length-oldSize)*RamUsageEstimator.NUM_BYTES_INT);
+        bytesUsed.addAndGet(-(docToAddress.length - oldSize)
+            * RamUsageEstimator.NUM_BYTES_INT);
       }
       for (int i = lastDocID + 1; i < docID; i++) {
         docToAddress[i] = address;
@@ -70,7 +73,7 @@ class VarStraightBytesImpl {
 
     @Override
     synchronized public void add(int docID, BytesRef bytes) throws IOException {
-      if(bytes.length == 0)
+      if (bytes.length == 0)
         return; // default
       if (datOut == null)
         initDataOut();
@@ -97,7 +100,8 @@ class VarStraightBytesImpl {
         w.add(docToAddress[i]);
       }
       w.finish();
-      bytesUsed.addAndGet(-(docToAddress.length)*RamUsageEstimator.NUM_BYTES_INT);
+      bytesUsed.addAndGet(-(docToAddress.length)
+          * RamUsageEstimator.NUM_BYTES_INT);
       docToAddress = null;
       super.finish(docCount);
     }
@@ -121,46 +125,29 @@ class VarStraightBytesImpl {
     }
 
     private class Source extends BytesBaseSource {
-      private final int totBytes;
-      // TODO: paged data
-      private final byte[] data;
       private final BytesRef bytesRef = new BytesRef();
       private final PackedInts.Reader addresses;
 
       public Source(IndexInput datIn, IndexInput idxIn) throws IOException {
-        super(datIn, idxIn);
-        totBytes = idxIn.readVInt();
-        data = new byte[totBytes];
-        datIn.readBytes(data, 0, totBytes);
+        super(datIn, idxIn, new PagedBytes(PAGED_BYTES_BITS), idxIn.readVInt()); // TODO
+                                                                                 // should
+                                                                                 // be
+                                                                                 // long
         addresses = PackedInts.getReader(idxIn);
-        bytesRef.bytes = data;
       }
 
       @Override
       public BytesRef getBytes(int docID) {
         final int address = (int) addresses.get(docID);
-        bytesRef.offset = address;
-        if (docID == maxDoc - 1) {
-          bytesRef.length = totBytes - bytesRef.offset;
-        } else {
-          bytesRef.length = (int) addresses.get(1 + docID) - bytesRef.offset;
-        }
-        return bytesRef;
+        final int length = docID == maxDoc - 1 ? (int) (totalLengthInBytes - address)
+            : (int) (addresses.get(1 + docID) - address);
+        return data.fill(bytesRef, address, length);
       }
 
       @Override
       public int getValueCount() {
         throw new UnsupportedOperationException();
       }
-
-      public long ramBytesUsed() {
-        // TODO(simonw): move address ram usage to PackedInts?
-        return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER
-            + data.length
-            + (RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + addresses
-                .getBitsPerValue()
-                * addresses.size());
-      }
     }
 
     @Override
@@ -226,10 +213,10 @@ class VarStraightBytesImpl {
 
       @Override
       public int nextDoc() throws IOException {
-        return advance(pos+1);
+        return advance(pos + 1);
       }
     }
-    
+
     @Override
     public Values type() {
       return Values.BYTES_VAR_STRAIGHT;