You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by si...@apache.org on 2010/10/12 08:15:05 UTC
svn commit: r1021636 [2/3] - in /lucene/dev/branches/docvalues/lucene: contrib/benchmark/conf/ contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ src/java/org/apache...

Added: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java?rev=1021636&view=auto
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java (added)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java Tue Oct 12 06:15:03 2010
@@ -0,0 +1,262 @@
+package org.apache.lucene.index.values;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.concurrent.atomic.AtomicLong;
+
+import org.apache.lucene.index.values.Bytes.BytesBaseSource;
+import org.apache.lucene.index.values.Bytes.BytesReaderBase;
+import org.apache.lucene.index.values.Bytes.BytesWriterBase;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.AttributeSource;
+import org.apache.lucene.util.ByteBlockPool;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefHash;
+import org.apache.lucene.util.RamUsageEstimator;
+import org.apache.lucene.util.ByteBlockPool.Allocator;
+import org.apache.lucene.util.ByteBlockPool.DirectAllocator;
+import org.apache.lucene.util.packed.PackedInts;
+
+// Stores fixed-length byte[] by deref, ie when two docs
+// have the same value, they store only 1 byte[]
+
+class FixedDerefBytesImpl {
+
+  static final String CODEC_NAME = "FixedDerefBytes";
+  static final int VERSION_START = 0;
+  static final int VERSION_CURRENT = VERSION_START;
+
+  static class Writer extends BytesWriterBase {
+    private int size = -1;
+    private int[] docToID;
+    private final BytesRefHash hash = new BytesRefHash(pool);
+    
+    public Writer(Directory dir, String id) throws IOException {
+      this(dir, id, new DirectAllocator(ByteBlockPool.BYTE_BLOCK_SIZE),
+          new AtomicLong());
+    }
+
+    public Writer(Directory dir, String id, Allocator allocator,
+        AtomicLong bytesUsed) throws IOException {
+      super(dir, id, CODEC_NAME, VERSION_CURRENT, false, false,
+          new ByteBlockPool(allocator), bytesUsed);
+      docToID = new int[1];
+      bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT);
+    }
+
+    @Override
+    synchronized public void add(int docID, BytesRef bytes) throws IOException {
+      if(bytes.length == 0) // default value - skip it
+        return;
+      if (size == -1) {
+        size = bytes.length;
+        initDataOut();
+        datOut.writeInt(size);
+      } else if (bytes.length != size) {
+        throw new IllegalArgumentException("expected bytes size=" + size
+            + " but got " + bytes.length);
+      }
+      int ord = hash.add(bytes);
+
+      if (ord >= 0) {
+        // new added entry
+        datOut.writeBytes(bytes.bytes, bytes.offset, bytes.length);
+      } else {
+        ord = (-ord)-1;
+      }
+
+      if (docID >= docToID.length) {
+        int size = docToID.length;
+        docToID = ArrayUtil.grow(docToID, 1 + docID);
+        bytesUsed.addAndGet((docToID.length - size) * RamUsageEstimator.NUM_BYTES_INT);
+      }
+      docToID[docID] = 1+ord;
+    }
+
+
+    // Important that we get docCount, in case there were
+    // some last docs that we didn't see
+    @Override
+    synchronized public void finish(int docCount) throws IOException {
+      if (datOut == null) // no added data
+        return;
+      initIndexOut();
+      final int count = 1+hash.size();
+      idxOut.writeInt(count - 1);
+      // write index
+      final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount,
+          PackedInts.bitsRequired(count - 1));
+      final int limit = docCount > docToID.length ? docToID.length : docCount;
+      for (int i = 0; i < limit; i++) {
+        w.add(docToID[i]);
+      }
+      // fill up remaining doc with zeros
+      for (int i = limit; i < docCount; i++) {
+        w.add(0);
+      }
+      w.finish();
+      hash.clear();
+
+      super.finish(docCount);
+    }
+  }
+
+  public static class Reader extends BytesReaderBase {
+    private final int size;
+
+    Reader(Directory dir, String id, int maxDoc) throws IOException {
+      super(dir, id, CODEC_NAME, VERSION_START, true);
+      try {
+        size = datIn.readInt();
+      } catch (IOException e) {
+        throw e;
+      }
+    }
+
+    @Override
+    public Source load() throws IOException {
+      return new Source(cloneData(), cloneIndex(), size);
+    }
+
+    private static class Source extends BytesBaseSource {
+      // TODO: paged data or mmap?
+      private final byte[] data;
+      private final BytesRef bytesRef = new BytesRef();
+      private final PackedInts.Reader index;
+      private final int numValue;
+      private final int size;
+
+      protected Source(IndexInput datIn, IndexInput idxIn, int size)
+          throws IOException {
+        super(datIn, idxIn);
+        this.size = size;
+        numValue = idxIn.readInt();
+        data = new byte[size * numValue];
+        datIn.readBytes(data, 0, size * numValue);
+        index = PackedInts.getReader(idxIn);
+        bytesRef.bytes = data;
+        bytesRef.length = size;
+      }
+
+      @Override
+      public BytesRef bytes(int docID) {
+        final int id = (int) index.get(docID);
+        if (id == 0) {
+          return defaultValue;
+        }
+        bytesRef.offset = ((id - 1) * size);
+        return bytesRef;
+      }
+
+      public long ramBytesUsed() {
+        // TODO(simonw): move ram calculation to PackedInts?!
+        return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER
+            + data.length
+            + (RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + index
+                .getBitsPerValue()
+                * index.size());
+      }
+
+      @Override
+      public int getValueCount() {
+        return numValue;
+      }
+    }
+
+    @Override
+    public ValuesEnum getEnum(AttributeSource source) throws IOException {
+      return new DerefBytesEnum(source, cloneData(), cloneIndex(), CODEC_NAME,
+          size);
+    }
+
+    static class DerefBytesEnum extends ValuesEnum {
+      protected final IndexInput datIn;
+      private final PackedInts.ReaderIterator idx;
+      protected final long fp;
+      private final int size;
+      protected final BytesRef ref;
+      private final int valueCount;
+      private int pos = -1;
+
+      public DerefBytesEnum(AttributeSource source, IndexInput datIn,
+          IndexInput idxIn, String codecName, int size) throws IOException {
+        this(source, datIn, idxIn, codecName, size, Values.BYTES_FIXED_DEREF);
+      }
+
+      protected DerefBytesEnum(AttributeSource source, IndexInput datIn,
+          IndexInput idxIn, String codecName, int size, Values enumType)
+          throws IOException {
+        super(source, enumType);
+        ref = attr.bytes();
+        this.datIn = datIn;
+        this.size = size == -1 ? 128 : size;
+        idxIn.readInt();// read valueCount
+        idx = PackedInts.getReaderIterator(idxIn);
+        fp = datIn.getFilePointer();
+        ref.grow(this.size);
+        ref.length = this.size;
+        ref.offset = 0;
+        valueCount = idx.size();
+      }
+
+      @Override
+      public int advance(int target) throws IOException {
+        if (target < valueCount) {
+          final long address = idx.advance(target);
+          pos = idx.ord();
+          if(address == 0) {
+            // default is empty
+            ref.length = 0;
+            ref.offset = 0;
+            return pos;
+          }
+          fill(address, ref);
+          return pos;
+        }
+        return pos = NO_MORE_DOCS;
+      }
+
+      @Override
+      public int nextDoc() throws IOException {
+        return advance(pos + 1);
+      }
+
+      public void close() throws IOException {
+        datIn.close();
+        idx.close();
+      }
+
+      protected void fill(long address, BytesRef ref) throws IOException {
+        datIn.seek(fp + ((address - 1) * size));
+        datIn.readBytes(ref.bytes, 0, size);
+        ref.length = size;
+        ref.offset = 0;
+      }
+
+      @Override
+      public int docID() {
+        return pos;
+      }
+
+    }
+  }
+
+}

Propchange: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/FixedDerefBytesImpl.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Added: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java?rev=1021636&view=auto
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java (added)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java Tue Oct 12 06:15:03 2010
@@ -0,0 +1,258 @@
+package org.apache.lucene.index.values;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Comparator;
+import java.util.concurrent.atomic.AtomicLong;
+
+import org.apache.lucene.index.values.Bytes.BytesBaseSortedSource;
+import org.apache.lucene.index.values.Bytes.BytesReaderBase;
+import org.apache.lucene.index.values.Bytes.BytesWriterBase;
+import org.apache.lucene.index.values.FixedDerefBytesImpl.Reader.DerefBytesEnum;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.AttributeSource;
+import org.apache.lucene.util.ByteBlockPool;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefHash;
+import org.apache.lucene.util.CodecUtil;
+import org.apache.lucene.util.RamUsageEstimator;
+import org.apache.lucene.util.ByteBlockPool.Allocator;
+import org.apache.lucene.util.ByteBlockPool.DirectAllocator;
+import org.apache.lucene.util.packed.PackedInts;
+
+// Stores fixed-length byte[] by deref, ie when two docs
+// have the same value, they store only 1 byte[]
+
+class FixedSortedBytesImpl {
+
+  static final String CODEC_NAME = "FixedSortedBytes";
+  static final int VERSION_START = 0;
+  static final int VERSION_CURRENT = VERSION_START;
+
+  static class Writer extends BytesWriterBase {
+    private int size = -1;
+    private int[] docToEntry;
+    private final Comparator<BytesRef> comp;
+
+   
+    private final BytesRefHash hash = new BytesRefHash(pool);
+    
+    public Writer(Directory dir, String id,  Comparator<BytesRef> comp) throws IOException {
+      this(dir, id, comp, new DirectAllocator(ByteBlockPool.BYTE_BLOCK_SIZE),
+          new AtomicLong());
+    }
+
+    public Writer(Directory dir, String id,  Comparator<BytesRef> comp, Allocator allocator, AtomicLong bytesUsed) throws IOException {
+      super(dir, id, CODEC_NAME, VERSION_CURRENT, false, false, new ByteBlockPool(allocator), bytesUsed);
+      docToEntry = new int[1];
+//      docToEntry[0] = -1;
+      bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT);
+      this.comp = comp;
+    }
+
+    @Override
+    synchronized public void add(int docID, BytesRef bytes) throws IOException {
+      if(bytes.length == 0)
+        return; // default - skip it
+      if (size == -1) {
+        size = bytes.length;
+        initDataOut();
+        datOut.writeInt(size);
+      } else if (bytes.length != size) {
+        throw new IllegalArgumentException("expected bytes size=" + size + " but got " + bytes.length);
+      }
+      if (docID >= docToEntry.length) {
+        int[] newArray = new int[ArrayUtil.oversize(1 + docID,
+            RamUsageEstimator.NUM_BYTES_INT)];
+        System.arraycopy(docToEntry, 0, newArray, 0, docToEntry.length);
+//        Arrays.fill(newArray, docToEntry.length, newArray.length, -1);
+
+        bytesUsed.addAndGet((newArray.length - docToEntry.length) * RamUsageEstimator.NUM_BYTES_INT);
+        docToEntry = newArray;
+      }
+      int e = hash.add(bytes);
+      docToEntry[docID] = 1+(e < 0? (-e)-1: e);
+    }
+
+
+    // Important that we get docCount, in case there were
+    // some last docs that we didn't see
+    @Override
+    synchronized public void finish(int docCount) throws IOException {
+      if(datOut == null)// no data added
+        return;
+      initIndexOut();
+      final int[] sortedEntries = hash.sort(comp);
+      final int count = hash.size();
+      int[] address= new int[count];
+      // first dump bytes data, recording address as we go
+      for(int i=0;i<count;i++) {
+        final int e = sortedEntries[i];
+        final BytesRef bytes = hash.get(e);
+        assert bytes.length == size;
+        datOut.writeBytes(bytes.bytes, bytes.offset, bytes.length);
+        address[e] = 1+i;
+      }
+
+      idxOut.writeInt(count);
+
+      // next write index
+      PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount, PackedInts.bitsRequired(count));
+      final int limit;
+      if (docCount > docToEntry.length) {
+        limit = docToEntry.length;
+      } else {
+        limit = docCount;
+      }
+      for(int i=0;i<limit;i++) {
+        final int e = docToEntry[i];
+        if (e == 0) {
+          // null is encoded as zero
+          w.add(0);
+        } else {
+          assert e > 0 && e <= count: "index must  0 > && <= " + count + " was: " + e;
+          w.add(address[e-1]);
+        }
+      }
+
+      for(int i=limit;i<docCount;i++) {
+        w.add(0);
+      }
+      w.finish();
+
+      super.finish(docCount);
+      bytesUsed.addAndGet((-docToEntry.length)
+          * RamUsageEstimator.NUM_BYTES_INT);
+      docToEntry = null;
+    }
+  }
+  
+  public static class Reader extends BytesReaderBase {
+    // nocommit -- allow/require byte[] paging here?
+    private final int size;
+
+    public Reader(Directory dir, String id, int maxDoc)
+      throws IOException {
+      super(dir, id, CODEC_NAME, VERSION_START, true);
+      size = datIn.readInt();
+    }
+
+    @Override
+    public org.apache.lucene.index.values.Reader.Source load() throws IOException {
+      return loadSorted(null);
+    }
+
+    @Override
+    public SortedSource loadSorted(Comparator<BytesRef> comp) throws IOException {
+      return new Source(cloneData(), cloneIndex(), size, comp);
+    }
+
+    private static class Source extends BytesBaseSortedSource {
+
+      // TODO: paged data
+      private final byte[] data;
+      private final BytesRef bytesRef = new BytesRef();
+      private final PackedInts.Reader index;
+      private final LookupResult lookupResult = new LookupResult();
+      private final int numValue;
+      private final Comparator<BytesRef> comp;
+      private final int size;
+
+      public Source(IndexInput datIn, IndexInput idxIn, int size, Comparator<BytesRef> comp) throws IOException {
+        super(datIn, idxIn);
+        this.size = size;
+        datIn.seek(CodecUtil.headerLength(CODEC_NAME) + 4);
+        idxIn.seek(CodecUtil.headerLength(CODEC_NAME));
+
+        numValue = idxIn.readInt();
+        data = new byte[size*numValue];
+        datIn.readBytes(data, 0, size*numValue);
+        datIn.close();
+
+        index = PackedInts.getReader(idxIn);
+        idxIn.close(); // do we need to close that here?
+
+        bytesRef.bytes = data;
+        bytesRef.length = size;
+        // default byte sort order 
+        this.comp = comp==null?BytesRef.getUTF8SortedAsUnicodeComparator():comp;
+      }
+
+      @Override
+      public int ord(int docID) {
+        return (int) index.get(docID);
+      }
+
+      @Override
+      public BytesRef getByOrd(int ord) {
+        if (ord == 0) {
+          return defaultValue;
+        } else {
+          bytesRef.offset = ((ord-1) * size);
+          return bytesRef;
+        }
+      }
+
+      @Override
+      public LookupResult getByValue(BytesRef bytes) {
+        return binarySearch(bytes, 0, numValue-1);
+      }
+
+      public long ramBytesUsed() {
+        // TODO(simonw): move ram calcultation to PackedInts?
+        return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + data.length +
+            (RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + index.getBitsPerValue() * index.size());
+      }
+
+      @Override
+      public int getValueCount() {
+        return numValue;
+      }
+
+      private LookupResult binarySearch(BytesRef b, int low, int high) {
+        
+        while (low <= high) {
+          int mid = (low + high) >>> 1;
+          bytesRef.offset = mid * size;
+          int cmp = comp.compare(bytesRef, b);
+          if (cmp < 0) {
+            low = mid + 1;
+          } else if (cmp > 0) {
+            high = mid - 1;
+          } else {
+            lookupResult.ord = mid+1;
+            lookupResult.found = true;
+            return lookupResult;
+          }
+        }
+        lookupResult.ord = low;
+        lookupResult.found = false;
+        return lookupResult;
+      }
+    }
+
+    @Override
+    public ValuesEnum getEnum(AttributeSource source) throws IOException {
+        // do unsorted
+        return new DerefBytesEnum(source, cloneData(), cloneIndex(), CODEC_NAME, size);
+    }
+  }
+}

Propchange: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Added: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java?rev=1021636&view=auto
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java (added)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java Tue Oct 12 06:15:03 2010
@@ -0,0 +1,221 @@
+package org.apache.lucene.index.values;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.values.Bytes.BytesBaseSource;
+import org.apache.lucene.index.values.Bytes.BytesReaderBase;
+import org.apache.lucene.index.values.Bytes.BytesWriterBase;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.AttributeSource;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.RamUsageEstimator;
+
+// Simplest storage: stores fixed length byte[] per
+// document, with no dedup and no sorting.
+
+class FixedStraightBytesImpl {
+
+  static final String CODEC_NAME = "FixedStraightBytes";
+  static final int VERSION_START = 0;
+  static final int VERSION_CURRENT = VERSION_START;
+
+  static class Writer extends BytesWriterBase {
+    private int size = -1;
+    // start at -1 if the first added value is > 0
+    private int lastDocID = -1;
+    private byte[] oneRecord;
+
+    protected Writer(Directory dir, String id) throws IOException {
+      super(dir, id, CODEC_NAME, VERSION_CURRENT, false, false, null, null);
+    }
+    
+    // nocommit - impl bulk copy here!
+
+    @Override
+    synchronized public void add(int docID, BytesRef bytes) throws IOException {
+      if (size == -1) {
+        size = bytes.length;
+        initDataOut();
+        datOut.writeInt(size);
+        oneRecord = new byte[size];
+      } else if (bytes.length != size) {
+        throw new IllegalArgumentException("expected bytes size=" + size + " but got " + bytes.length);
+      }
+      fill(docID);
+      assert bytes.bytes.length >= bytes.length;
+      datOut.writeBytes(bytes.bytes, bytes.offset, bytes.length);
+    }
+
+    /* (non-Javadoc)
+     * @see org.apache.lucene.index.values.Writer#merge(org.apache.lucene.index.values.Writer.MergeState)
+     */
+    @Override
+    protected void merge(MergeState state) throws IOException {
+      if(state.bits == null && state.reader instanceof Reader){
+        Reader reader = (Reader) state.reader;
+        final int maxDocs = reader.maxDoc;
+        if(maxDocs == 0)
+          return;
+        if(size == -1) {
+          size = reader.size;
+          initDataOut();
+          datOut.writeInt(size);
+          oneRecord = new byte[size];
+        }
+       fill(state.docBase);
+       // nocommit should we add a transfer to API to each reader?
+       datOut.copyBytes(reader.cloneData(), size * maxDocs);
+       lastDocID += maxDocs-1;
+      } else
+        super.merge(state);
+    }
+
+    // Fills up to but not including this docID
+    private void fill(int docID) throws IOException {
+      assert size >= 0;
+      for(int i=lastDocID+1;i<docID;i++) {
+        datOut.writeBytes(oneRecord, size);
+      }
+      lastDocID = docID;
+    }
+
+    @Override
+    synchronized public void finish(int docCount) throws IOException {
+      if(datOut == null) // no data added
+        return;
+      fill(docCount);
+      super.finish(docCount);
+    }
+
+    public long ramBytesUsed() {
+      return 0;
+    }
+    
+  }
+
+  public static class Reader extends BytesReaderBase {
+    private final int size;
+    private final int maxDoc;
+
+    Reader(Directory dir, String id, int maxDoc)
+      throws IOException {
+      super(dir, id, CODEC_NAME, VERSION_START, false);
+      size = datIn.readInt();
+      this.maxDoc = maxDoc;
+    }
+
+    @Override
+    public Source load() throws IOException {
+      return new Source(cloneData(), cloneIndex(), size, maxDoc);
+    }
+
+    @Override
+    public void close() throws IOException {
+      datIn.close();
+    }
+
+    private static class Source extends BytesBaseSource {
+      // TODO: paged data
+      private final byte[] data;
+      private final BytesRef bytesRef = new BytesRef();
+      private final int size;
+
+      public Source(IndexInput datIn, IndexInput idxIn, int size, int maxDoc) throws IOException {
+        super(datIn, idxIn);
+        this.size = size;
+        final int sizeInBytes = size*maxDoc;
+        data = new byte[sizeInBytes];
+        assert data.length <= datIn.length() : " file size is less than the expected size diff: " + (data.length - datIn.length()) + " size: " + size + " maxDoc " + maxDoc + " pos: " + datIn.getFilePointer();
+        datIn.readBytes(data, 0, sizeInBytes);
+        bytesRef.bytes = data;
+        bytesRef.length = size;
+      }
+
+      @Override
+      public BytesRef bytes(int docID) {
+        bytesRef.offset = docID * size;
+        return bytesRef;
+      }
+
+      public long ramBytesUsed() {
+        return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + data.length;
+      }
+
+      @Override
+      public int getValueCount() {
+        throw new UnsupportedOperationException();
+      }
+    }
+
+    @Override
+    public ValuesEnum getEnum(AttributeSource source) throws IOException {
+      return new FixedStraightBytesEnum(source, cloneData(), size, maxDoc);
+    }
+    
+    private static final class FixedStraightBytesEnum extends ValuesEnum {
+      private final IndexInput datIn;
+      private final int size;
+      private final int maxDoc;
+      private int pos = -1;
+      private final long fp;
+      private final BytesRef ref;
+
+      public FixedStraightBytesEnum(AttributeSource source, IndexInput datIn, int size, int maxDoc) throws IOException{
+        super(source, Values.BYTES_FIXED_STRAIGHT);
+        this.datIn = datIn;
+        this.size = size;
+        this.maxDoc = maxDoc;
+        ref = attr.bytes();
+        ref.grow(size);
+        ref.length = size;
+        ref.offset = 0;
+        fp = datIn.getFilePointer();
+      }
+     
+      public void close() throws IOException {
+        datIn.close();
+      }
+  
+      @Override
+      public int advance(int target) throws IOException {
+        if(target >= maxDoc){
+          ref.length = 0;
+          ref.offset = 0;
+          return pos = NO_MORE_DOCS;
+        }
+        if((target-1) != pos) // pos inc == 1
+          datIn.seek(fp + target * size);
+        datIn.readBytes(ref.bytes, 0, size);
+        return pos = target;
+      }
+      
+      @Override
+      public int docID() {
+        return pos;
+      }
+      
+      @Override
+      public int nextDoc() throws IOException {
+        return advance(pos+1);
+      }
+    }
+  }
+}

Propchange: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/FixedStraightBytesImpl.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Added: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/Floats.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/Floats.java?rev=1021636&view=auto
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/Floats.java (added)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/Floats.java Tue Oct 12 06:15:03 2010
@@ -0,0 +1,389 @@
+package org.apache.lucene.index.values;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.DoubleBuffer;
+import java.nio.FloatBuffer;
+import java.util.Collection;
+
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.AttributeSource;
+import org.apache.lucene.util.CodecUtil;
+import org.apache.lucene.util.FloatsRef;
+import org.apache.lucene.util.RamUsageEstimator;
+
+/**
+ * Exposes writer/reader for floating point values. You can specify 4 (java
+ * float) or 8 (java double) byte precision.
+ */
+//nocommit - add mmap version
+//nocommti - add bulk copy where possible
+public class Floats {
+  private static final String CODEC_NAME = "SimpleFloats";
+  static final int VERSION_START = 0;
+  static final int VERSION_CURRENT = VERSION_START;
+  private static final int INT_ZERO = Float.floatToRawIntBits(0.0f);
+  private static final long LONG_ZERO = Double.doubleToRawLongBits(0.0);
+
+  public static void files(String id, Collection<String> files) {
+    files.add(id + "." + IndexFileNames.CSF_DATA_EXTENSION);
+  }
+
+  public static Writer getWriter(Directory dir, String id, int precisionBytes)
+      throws IOException {
+    if (precisionBytes != 4 && precisionBytes != 8) {
+      throw new IllegalArgumentException("precisionBytes must be 4 or 8; got "
+          + precisionBytes);
+    }
+    if (precisionBytes == 4) {
+      return new Float4Writer(dir, id);
+    } else {
+      return new Float8Writer(dir, id);
+    }
+  }
+
+  public static Reader getReader(Directory dir, String id, int maxDoc)
+      throws IOException {
+    return new FloatsReader(dir, id, maxDoc);
+  }
+
+  abstract static class FloatsWriter extends Writer {
+    private final Directory dir;
+    private final String id;
+    private FloatsRef floatsRef;
+    protected int lastDocId = -1;
+    protected IndexOutput datOut;
+    private final byte precision;
+
+    protected FloatsWriter(Directory dir, String id, int precision)
+        throws IOException {
+      this.dir = dir;
+      this.id = id;
+      this.precision = (byte) precision;
+    }
+
+    protected void initDatOut() throws IOException {
+      datOut = dir.createOutput(IndexFileNames.segmentFileName(id, "",
+          IndexFileNames.CSF_DATA_EXTENSION));
+      CodecUtil.writeHeader(datOut, CODEC_NAME, VERSION_CURRENT);
+      assert datOut.getFilePointer() == CodecUtil.headerLength(CODEC_NAME);
+      datOut.writeByte(precision);
+    }
+
+    public long ramBytesUsed() {
+      return 0;
+    }
+
+    @Override
+    protected void add(int docID) throws IOException {
+      add(docID, floatsRef.get());
+    }
+
+    @Override
+    protected void setNextAttribute(ValuesAttribute attr) {
+      floatsRef = attr.floats();
+    }
+
+    protected abstract int fillDefault(int num) throws IOException;
+
+    @Override
+    protected void merge(MergeState state) throws IOException {
+      if (state.bits == null && state.reader instanceof FloatsReader) {
+        // no deletes - bulk copy
+        // nocommit - should be do bulks with deletes too?
+        final FloatsReader reader = (FloatsReader) state.reader;
+        assert reader.precisionBytes == (int) precision;
+        if (reader.maxDoc == 0)
+          return;
+        if (datOut == null)
+          initDatOut();
+        final int docBase = state.docBase;
+        if (docBase - lastDocId > 1) {
+          // fill with default values
+          lastDocId += fillDefault(docBase - lastDocId - 1);
+        }
+        lastDocId += reader.transferTo(datOut);
+      } else
+        super.merge(state);
+    }
+
+  }
+
+  // Writes 4 bytes (float) per value
+  static class Float4Writer extends FloatsWriter {
+
+    protected Float4Writer(Directory dir, String id) throws IOException {
+      super(dir, id, 4);
+    }
+
+    @Override
+    synchronized public void add(final int docID, final double v)
+        throws IOException {
+      assert docID > lastDocId : "docID: " + docID
+          + " must be greater than the last added doc id: " + lastDocId;
+      if (datOut == null) {
+        initDatOut();
+      }
+      if (docID - lastDocId > 1) {
+        // fill with default values
+        lastDocId += fillDefault(docID - lastDocId - 1);
+      }
+      assert datOut != null;
+      datOut.writeInt(Float.floatToRawIntBits((float) v));
+      ++lastDocId;
+    }
+
+    @Override
+    synchronized public void finish(int docCount) throws IOException {
+      if (datOut == null)
+        return; // no data added - don't create file!
+      if (docCount > lastDocId + 1)
+        for (int i = lastDocId; i < docCount; i++) {
+          datOut.writeInt(INT_ZERO); // default value
+        }
+      datOut.close();
+    }
+
+    @Override
+    protected int fillDefault(int numValues) throws IOException {
+      for (int i = 0; i < numValues; i++) {
+        datOut.writeInt(INT_ZERO);
+      }
+      return numValues;
+    }
+  }
+
+  // Writes 8 bytes (double) per value
+  static class Float8Writer extends FloatsWriter {
+
+    protected Float8Writer(Directory dir, String id) throws IOException {
+      super(dir, id, 8);
+    }
+
+    @Override
+    synchronized public void add(int docID, double v) throws IOException {
+      assert docID > lastDocId : "docID: " + docID
+          + " must be greater than the last added doc id: " + lastDocId;
+      if (datOut == null) {
+        initDatOut();
+      }
+      if (docID - lastDocId > 1) {
+        // fill with default values
+        lastDocId += fillDefault(docID - lastDocId - 1);
+      }
+      assert datOut != null;
+      datOut.writeLong(Double.doubleToRawLongBits(v));
+      ++lastDocId;
+    }
+
+    @Override
+    synchronized public void finish(int docCount) throws IOException {
+      if (datOut == null)
+        return; // no data added - don't create file!
+      if (docCount > lastDocId + 1)
+        for (int i = lastDocId; i < docCount; i++) {
+          datOut.writeLong(LONG_ZERO); // default value
+        }
+      datOut.close();
+    }
+
+    @Override
+    protected int fillDefault(int numValues) throws IOException {
+      for (int i = 0; i < numValues; i++) {
+        datOut.writeLong(LONG_ZERO);
+      }
+      return numValues;
+    }
+  }
+
+  /**
+   * Opens all necessary files, but does not read any data in until you call
+   * {@link #load}.
+   */
+  static class FloatsReader extends Reader {
+
+    private final IndexInput datIn;
+    private final int precisionBytes;
+    // TODO(simonw) is ByteBuffer the way to go here?
+    private final int maxDoc;
+
+    protected FloatsReader(Directory dir, String id, int maxDoc)
+        throws IOException {
+      datIn = dir.openInput(IndexFileNames.segmentFileName(id, "",
+          IndexFileNames.CSF_DATA_EXTENSION));
+      CodecUtil.checkHeader(datIn, CODEC_NAME, VERSION_START, VERSION_START);
+      precisionBytes = datIn.readByte();
+      assert precisionBytes == 4 || precisionBytes == 8;
+      this.maxDoc = maxDoc;
+    }
+
+    int transferTo(IndexOutput out) throws IOException {
+      IndexInput indexInput = (IndexInput) datIn.clone();
+      try {
+        indexInput.seek(CodecUtil.headerLength(CODEC_NAME));
+        // skip precision:
+        indexInput.readByte();
+        out.copyBytes(indexInput, precisionBytes * maxDoc);
+      } finally {
+        indexInput.close();
+      }
+      return maxDoc;
+    }
+
+    /**
+     * Loads the actual values. You may call this more than once, eg if you
+     * already previously loaded but then discarded the Source.
+     */
+    @Override
+    public Source load() throws IOException {
+      ByteBuffer buffer = ByteBuffer.allocate(precisionBytes * maxDoc);
+      IndexInput indexInput = (IndexInput) datIn.clone();
+      indexInput.seek(CodecUtil.headerLength(CODEC_NAME));
+      // skip precision:
+      indexInput.readByte();
+      assert buffer.hasArray() : "Buffer must support Array";
+      final byte[] arr = buffer.array();
+      indexInput.readBytes(arr, 0, arr.length);
+      return precisionBytes == 4 ? new Source4(buffer) : new Source8(buffer);
+    }
+
+    private class Source4 extends Source {
+      private final FloatBuffer values;
+
+      Source4(ByteBuffer buffer) {
+        values = buffer.asFloatBuffer();
+      }
+
+      @Override
+      public double floats(int docID) {
+        final float f = values.get(docID);
+        // nocommit should we return NaN as default instead of 0.0?
+        return Float.isNaN(f) ? 0.0f : f;
+      }
+
+      public long ramBytesUsed() {
+        return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + values.limit()
+            * RamUsageEstimator.NUM_BYTES_FLOAT;
+      }
+    }
+
+    private class Source8 extends Source {
+      private final DoubleBuffer values;
+
+      Source8(ByteBuffer buffer) {
+        values = buffer.asDoubleBuffer();
+      }
+
+      @Override
+      public double floats(int docID) {
+        final double d = values.get(docID);
+        // nocommit should we return NaN as default instead of 0.0?
+        return Double.isNaN(d) ? 0.0d : d;
+      }
+
+      public long ramBytesUsed() {
+        return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + values.limit()
+            * RamUsageEstimator.NUM_BYTES_DOUBLE;
+      }
+    }
+
+    public void close() throws IOException {
+      datIn.close();
+    }
+
+    @Override
+    public ValuesEnum getEnum(AttributeSource source) throws IOException {
+      IndexInput indexInput = (IndexInput) datIn.clone();
+      indexInput.seek(CodecUtil.headerLength(CODEC_NAME));
+      // skip precision:
+      indexInput.readByte();
+      return precisionBytes == 4 ? new Floats4Enum(source, indexInput, maxDoc)
+          : new Floats8EnumImpl(source, indexInput, maxDoc);
+    }
+  }
+
+  static final class Floats4Enum extends FloatsEnumImpl {
+
+    Floats4Enum(AttributeSource source, IndexInput dataIn, int maxDoc)
+        throws IOException {
+      super(source, dataIn, 4, maxDoc, Values.SIMPLE_FLOAT_4BYTE);
+    }
+
+    @Override
+    public int advance(int target) throws IOException {
+      if (target >= maxDoc)
+        return pos = NO_MORE_DOCS;
+      dataIn.seek(fp + (target * precision));
+      ref.floats[0] = Float.intBitsToFloat(dataIn.readInt());
+      ref.offset = 0; // nocommit -- can we igore this?
+      return pos = target;
+    }
+
+    @Override
+    public int docID() {
+      return pos;
+    }
+
+    @Override
+    public int nextDoc() throws IOException {
+      return advance(pos + 1);
+    }
+  }
+
+  private static final class Floats8EnumImpl extends FloatsEnumImpl {
+
+    Floats8EnumImpl(AttributeSource source, IndexInput dataIn, int maxDoc)
+        throws IOException {
+      super(source, dataIn, 8, maxDoc, Values.SIMPLE_FLOAT_8BYTE);
+    }
+
+    @Override
+    public int advance(int target) throws IOException {
+      if (target >= maxDoc)
+        return pos = NO_MORE_DOCS;
+      dataIn.seek(fp + (target * precision));
+      ref.floats[0] = Double.longBitsToDouble(dataIn.readLong());
+      ref.offset = 0; // nocommit -- can we igore this?
+      return pos = target;
+    }
+
+    @Override
+    public int docID() {
+      return pos;
+    }
+
+    @Override
+    public int nextDoc() throws IOException {
+      return advance(pos + 1);
+    }
+  }
+
+  static abstract class FloatsEnumImpl extends ValuesEnum {
+    protected final IndexInput dataIn;
+    protected int pos = -1;
+    protected final int precision;
+    protected final int maxDoc;
+    protected final long fp;
+    protected final FloatsRef ref;
+
+    FloatsEnumImpl(AttributeSource source, IndexInput dataIn, int precision,
+        int maxDoc, Values type) throws IOException {
+      super(source, precision == 4 ? Values.SIMPLE_FLOAT_4BYTE
+          : Values.SIMPLE_FLOAT_8BYTE);
+      this.dataIn = dataIn;
+      this.precision = precision;
+      this.maxDoc = maxDoc;
+      fp = dataIn.getFilePointer();
+      this.ref = attr.floats();
+      this.ref.offset = 0;
+    }
+
+    @Override
+    public void close() throws IOException {
+      dataIn.close();
+    }
+  }
+}
\ No newline at end of file

Propchange: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/Floats.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/Floats.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Added: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/Ints.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/Ints.java?rev=1021636&view=auto
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/Ints.java (added)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/Ints.java Tue Oct 12 06:15:03 2010
@@ -0,0 +1,32 @@
+package org.apache.lucene.index.values;
+
+import java.io.IOException;
+import java.util.Collection;
+
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.values.PackedIntsImpl.IntsReader;
+import org.apache.lucene.index.values.PackedIntsImpl.IntsWriter;
+import org.apache.lucene.store.Directory;
+//nocommit - add mmap version 
+//nocommti - add bulk copy where possible
+public class Ints {
+
+  private Ints() {
+  }
+  
+  public static void files(String id, Collection<String> files)
+      throws IOException {
+    files.add(IndexFileNames.segmentFileName(id, "",
+        IndexFileNames.CSF_DATA_EXTENSION));
+  }
+
+  public static Writer getWriter(Directory dir, String id, boolean useFixedArray)
+      throws IOException {
+     //nocommit - implement fixed?!
+    return new IntsWriter(dir, id);
+  }
+
+  public static Reader getReader(Directory dir, String id, boolean useFixedArray) throws IOException {
+    return new IntsReader(dir, id);
+  }
+}

Propchange: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/Ints.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/Ints.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Added: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java?rev=1021636&view=auto
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java (added)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java Tue Oct 12 06:15:03 2010
@@ -0,0 +1,240 @@
+package org.apache.lucene.index.values;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import java.io.IOException;
+
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.AttributeSource;
+import org.apache.lucene.util.CodecUtil;
+import org.apache.lucene.util.LongsRef;
+import org.apache.lucene.util.RamUsageEstimator;
+import org.apache.lucene.util.packed.PackedInts;
+
+/** Stores ints packed with fixed-bit precision. */
+class PackedIntsImpl {
+
+  private static final String CODEC_NAME = "PackedInts";
+
+  static final int VERSION_START = 0;
+  static final int VERSION_CURRENT = VERSION_START;
+
+  static class IntsWriter extends Writer {
+    // nocommit - can we bulkcopy this on a merge?
+    private LongsRef intsRef;
+    private long[] docToValue;
+    private long minValue;
+    private long maxValue;
+    private boolean started;
+    private final Directory dir;
+    private final String id;
+    private int maxDocID;
+    private int minDocID;
+
+    protected IntsWriter(Directory dir, String id) throws IOException {
+      this.dir = dir;
+      this.id = id;
+      docToValue = new long[1];
+    }
+
+    @Override
+    synchronized public void add(int docID, long v) throws IOException {
+
+      if (!started) {
+        minValue = maxValue = v;
+        minDocID = maxDocID = docID;
+        started = true;
+
+      } else {
+        if (v < minValue) {
+          minValue = v;
+        } else if (v > maxValue) {
+          maxValue = v;
+        }
+        if (docID < minDocID) {
+          minDocID = docID;
+        } else if (docID > maxDocID) {
+          maxDocID = docID;
+        }
+      }
+      if (docID >= docToValue.length) {
+        docToValue = ArrayUtil.grow(docToValue, 1 + docID);
+      }
+      docToValue[docID] = v;
+    }
+
+    @Override
+    synchronized public void finish(int docCount) throws IOException {
+      if(!started)
+        return;
+      final IndexOutput datOut = dir.createOutput(IndexFileNames
+          .segmentFileName(id, "", IndexFileNames.CSF_DATA_EXTENSION));
+      CodecUtil.writeHeader(datOut, CODEC_NAME, VERSION_CURRENT);
+
+      // nocommit -- long can't work right since it's signed
+      datOut.writeLong(minValue);
+      // write a default value to recognize docs without a value for that field
+      final long defaultValue = ++maxValue - minValue;
+      datOut.writeLong(defaultValue);
+      PackedInts.Writer w = PackedInts.getWriter(datOut, docCount, PackedInts.bitsRequired(maxValue-minValue));
+         
+      final int limit = maxDocID + 1;
+      for (int i = 0; i < minDocID; i++) {
+        w.add(defaultValue);
+      }
+      for (int i = minDocID; i < limit; i++) {
+        w.add(docToValue[i] - minValue);
+      }
+      for (int i = limit; i < docCount; i++) {
+        w.add(defaultValue);
+      }
+      w.finish();
+
+      datOut.close();
+    }
+
+    public long ramBytesUsed() {
+      return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + docToValue.length
+          * RamUsageEstimator.NUM_BYTES_LONG;
+    }
+
+    @Override
+    protected void add(int docID) throws IOException {
+      add(docID, intsRef.get());
+    }
+
+    @Override
+    protected void setNextAttribute(ValuesAttribute attr) {
+      intsRef = attr.ints();
+    }
+  }
+
+  /**
+   * Opens all necessary files, but does not read any data in until you call
+   * {@link #load}.
+   */
+  static class IntsReader extends Reader {
+    private final IndexInput datIn;
+
+    protected IntsReader(Directory dir, String id) throws IOException {
+      datIn = dir.openInput(IndexFileNames.segmentFileName(id, "",
+          IndexFileNames.CSF_DATA_EXTENSION));
+      CodecUtil.checkHeader(datIn, CODEC_NAME, VERSION_START, VERSION_START);
+    }
+
+    /**
+     * Loads the actual values. You may call this more than once, eg if you
+     * already previously loaded but then discarded the Source.
+     */
+    @Override
+    public Source load() throws IOException {
+      return new IntsSource((IndexInput) datIn.clone());
+    }
+
+    private static class IntsSource extends Source {
+      private final long minValue;
+      private final long defaultValue;
+      private final PackedInts.Reader values;
+
+      public IntsSource(IndexInput dataIn) throws IOException {
+        dataIn.seek(CodecUtil.headerLength(CODEC_NAME));
+        minValue = dataIn.readLong();
+        defaultValue = dataIn.readLong();
+        values = PackedInts.getReader(dataIn);
+      }
+
+      @Override
+      public long ints(int docID) {
+        // nocommit -- can we somehow avoid 2X method calls
+        // on each get? must push minValue down, and make
+        // PackedInts implement Ints.Source
+        final long val = values.get(docID);
+        // docs not having a value for that field must return a default value
+        return val == defaultValue ? 0 : minValue + val;
+      }
+
+      public long ramBytesUsed() {
+        // TODO(simonw): move that to PackedInts?
+        return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER
+            + values.getBitsPerValue() * values.size();
+      }
+    }
+
+    public void close() throws IOException {
+      datIn.close();
+    }
+
+    @Override
+    public ValuesEnum getEnum(AttributeSource source) throws IOException {
+      return new IntsEnumImpl(source, (IndexInput) datIn.clone());
+    }
+
+  }
+
+  private static final class IntsEnumImpl extends ValuesEnum {
+    private final PackedInts.ReaderIterator ints;
+    private long minValue;
+    private final IndexInput dataIn;
+    private final long defaultValue;
+    private LongsRef ref;
+    private final int maxDoc;
+    private int pos = -1;
+
+    private IntsEnumImpl(AttributeSource source, IndexInput dataIn)
+        throws IOException {
+      super(source, Values.PACKED_INTS);
+      this.ref = attr.ints();
+      this.ref.offset = 0;
+      this.dataIn = dataIn;
+      dataIn.seek(CodecUtil.headerLength(CODEC_NAME));
+      minValue = dataIn.readLong();
+      defaultValue = dataIn.readLong();
+      this.ints = PackedInts.getReaderIterator(dataIn);
+      maxDoc = ints.size();
+    }
+
+    @Override
+    public void close() throws IOException {
+      ints.close();
+      dataIn.close();
+    }
+
+    @Override
+    public int advance(int target) throws IOException {
+      if (target >= maxDoc)
+        return pos = NO_MORE_DOCS;
+      final long val = ints.advance(target);
+      ref.ints[0] = val == defaultValue? 0:minValue + val;
+      ref.offset = 0; // can we skip this?
+      return pos = target;
+    }
+
+    @Override
+    public int docID() {
+      return pos;
+    }
+
+    @Override
+    public int nextDoc() throws IOException {
+      return advance(pos+1);
+    }
+  }
+}
\ No newline at end of file

Propchange: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/PackedIntsImpl.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Added: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/Reader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/Reader.java?rev=1021636&view=auto
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/Reader.java (added)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/Reader.java Tue Oct 12 06:15:03 2010
@@ -0,0 +1,109 @@
+package org.apache.lucene.index.values;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import java.io.Closeable;
+import java.io.IOException;
+import java.util.Comparator;
+
+import org.apache.lucene.util.AttributeSource;
+import org.apache.lucene.util.BytesRef;
+
+public abstract class Reader implements Closeable {
+  
+  
+  public ValuesEnum getEnum() throws IOException{
+    return getEnum(null);
+  }
+
+  public abstract ValuesEnum getEnum(AttributeSource attrSource) throws IOException;
+
+  public abstract Source load() throws IOException;
+
+  public SortedSource loadSorted(Comparator<BytesRef> comparator) throws IOException {
+    throw new UnsupportedOperationException();
+  }
+  
+
+  /**
+   * Source of integer (returned as java long), per document. The underlying
+   * implementation may use different numbers of bits per value; long is only
+   * used since it can handle all precisions.
+   */
+  public static abstract class Source {
+
+    public long ints(int docID) {
+      throw new UnsupportedOperationException("ints are not supported");
+    }
+
+    public double floats(int docID) {
+      throw new UnsupportedOperationException("floats are not supported");
+    }
+
+    public BytesRef bytes(int docID) {
+      throw new UnsupportedOperationException("bytes are not supported");
+    }
+    
+    /** Returns number of unique values.  Some impls may
+     * throw UnsupportedOperationException. */
+    public int getValueCount() {
+      throw new UnsupportedOperationException();
+    }
+    
+    public ValuesEnum getEnum() throws IOException{
+      return getEnum(null);
+    }
+    
+    // nocommit - enable obtaining enum from source since this is already in memory
+    public /*abstract*/ ValuesEnum getEnum(AttributeSource attrSource) throws IOException {
+      throw new UnsupportedOperationException();
+    }
+
+    public abstract long ramBytesUsed();
+  }
+
+  public static abstract class SortedSource extends Source {
+
+    @Override
+    public BytesRef bytes(int docID) {
+      return getByOrd(ord(docID));
+    }
+
+    /**
+     * Returns ord for specified docID. If this docID had not been added to the
+     * Writer, the ord is 0. Ord is dense, ie, starts at 0, then increments by 1
+     * for the next (as defined by {@link Comparator} value.
+     */
+    public abstract int ord(int docID);
+
+    /** Returns value for specified ord. */
+    public abstract BytesRef getByOrd(int ord);
+
+    public static class LookupResult {
+      public boolean found;
+      public int ord;
+    }
+
+    /**
+     * Finds the largest ord whose value is <= the requested value. If
+     * {@link LookupResult#found} is true, then ord is an exact match. The
+     * returned {@link LookupResult} may be reused across calls.
+     */
+    public abstract LookupResult getByValue(BytesRef value);
+  }
+  
+}

Propchange: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/Reader.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/Reader.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Added: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/Values.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/Values.java?rev=1021636&view=auto
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/Values.java (added)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/Values.java Tue Oct 12 06:15:03 2010
@@ -0,0 +1,48 @@
+package org.apache.lucene.index.values;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** Controls whether per-field values are stored into
+ *  index.  This storage is non-sparse, so it's best to
+ *  use this when all docs have the field, and loads all
+ *  values into RAM, exposing a random access API, when
+ *  loaded.
+ *
+ * <p><b>NOTE</b>: This feature is experimental and the
+ * API is free to change in non-backwards-compatible ways.  */
+public enum Values {
+
+  /** Integral value is stored as packed ints.  The bit
+   *  precision is fixed across the segment, and
+   *  determined by the min/max values in the field. */
+  PACKED_INTS,
+  PACKED_INTS_FIXED,
+  SIMPLE_FLOAT_4BYTE,
+  SIMPLE_FLOAT_8BYTE,
+
+  // nocommit -- shouldn't lucene decide/detect straight vs
+  // deref, as well fixed vs var?
+  BYTES_FIXED_STRAIGHT,
+  BYTES_FIXED_DEREF,
+  BYTES_FIXED_SORTED,
+
+  BYTES_VAR_STRAIGHT,
+  BYTES_VAR_DEREF,
+  BYTES_VAR_SORTED
+
+  // nocommit -- need STRING variants as well
+}

Propchange: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/Values.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/Values.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Added: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/ValuesAttribute.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/ValuesAttribute.java?rev=1021636&view=auto
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/ValuesAttribute.java (added)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/ValuesAttribute.java Tue Oct 12 06:15:03 2010
@@ -0,0 +1,34 @@
+package org.apache.lucene.index.values;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import java.util.Comparator;
+
+import org.apache.lucene.util.Attribute;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.FloatsRef;
+import org.apache.lucene.util.LongsRef;
+
+public interface ValuesAttribute extends Attribute {
+  public Values type();
+  public BytesRef bytes();
+  public FloatsRef floats();
+  public LongsRef ints();
+  public void setType(Values type);
+  public Comparator<BytesRef> bytesComparator();
+  public void setBytesComparator(Comparator<BytesRef> comp);
+  
+}
\ No newline at end of file

Propchange: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/ValuesAttribute.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/ValuesAttribute.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Added: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/ValuesAttributeImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/ValuesAttributeImpl.java?rev=1021636&view=auto
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/ValuesAttributeImpl.java (added)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/ValuesAttributeImpl.java Tue Oct 12 06:15:03 2010
@@ -0,0 +1,151 @@
+package org.apache.lucene.index.values;
+
+import java.util.Comparator;
+
+import org.apache.lucene.util.AttributeImpl;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.FloatsRef;
+import org.apache.lucene.util.LongsRef;
+import org.apache.lucene.util.SetOnce;
+
+public class ValuesAttributeImpl extends AttributeImpl implements ValuesAttribute {
+  private Values type;
+  private BytesRef bytes = null;
+  private FloatsRef floats = null;
+  private LongsRef ints = null;
+  private Comparator<BytesRef> bytesComp;
+
+  public BytesRef bytes() {
+    return bytes;
+  }
+
+  public FloatsRef floats() {
+    return floats;
+  }
+
+  public LongsRef ints() {
+    return ints;
+  }
+
+  public Values type() {
+    return type;
+  }
+
+  public void setType(Values type) {
+    this.type = type;
+    switch (type) {
+    case BYTES_FIXED_DEREF:
+    case BYTES_FIXED_SORTED:
+    case BYTES_FIXED_STRAIGHT:
+    case BYTES_VAR_DEREF:
+    case BYTES_VAR_SORTED:
+    case BYTES_VAR_STRAIGHT:
+      bytes = new BytesRef();
+      ints = null;
+      floats = null;
+      break;
+    case PACKED_INTS:
+    case PACKED_INTS_FIXED:
+      ints = new LongsRef(new long[1], 0, 1);
+      bytes = null;
+      floats = null;
+      break;
+    case SIMPLE_FLOAT_4BYTE:
+    case SIMPLE_FLOAT_8BYTE:
+      floats = new FloatsRef(new double[1], 0, 1);
+      ints = null;
+      bytes = null;
+      break;
+
+    }
+  }
+
+  @Override
+  public void clear() {
+    // TODO
+  }
+
+  @Override
+  public void copyTo(AttributeImpl target) {
+    ValuesAttributeImpl other = (ValuesAttributeImpl)target;
+    other.setType(type);
+    
+    switch (type) {
+    case BYTES_FIXED_DEREF:
+    case BYTES_FIXED_SORTED:
+    case BYTES_FIXED_STRAIGHT:
+    case BYTES_VAR_DEREF:
+    case BYTES_VAR_SORTED:
+    case BYTES_VAR_STRAIGHT:
+      bytes = (BytesRef) other.bytes.clone();
+      break;
+    case PACKED_INTS:
+    case PACKED_INTS_FIXED:
+      ints = (LongsRef) other.ints.clone();
+      break;
+    case SIMPLE_FLOAT_4BYTE:
+    case SIMPLE_FLOAT_8BYTE:
+      floats = (FloatsRef) other.floats.clone();
+      break;
+
+    }
+  }
+
+  /* (non-Javadoc)
+   * @see java.lang.Object#hashCode()
+   */
+  @Override
+  public int hashCode() {
+    final int prime = 31;
+    int result = 0;
+    result = prime * result + ((bytes == null) ? 0 : bytes.hashCode());
+    result = prime * result + ((floats == null) ? 0 : floats.hashCode());
+    result = prime * result + ((ints == null) ? 0 : ints.hashCode());
+    result = prime * result + ((type == null) ? 0 : type.hashCode());
+    return result;
+  }
+
+  /* (non-Javadoc)
+   * @see java.lang.Object#equals(java.lang.Object)
+   */
+  @Override
+  public boolean equals(Object obj) {
+    if (this == obj)
+      return true;
+    if (getClass() != obj.getClass())
+      return false;
+    ValuesAttributeImpl other = (ValuesAttributeImpl) obj;
+    if (bytes == null) {
+      if (other.bytes != null)
+        return false;
+    } else if (!bytes.equals(other.bytes))
+      return false;
+    if (floats == null) {
+      if (other.floats != null)
+        return false;
+    } else if (!floats.equals(other.floats))
+      return false;
+    if (ints == null) {
+      if (other.ints != null)
+        return false;
+    } else if (!ints.equals(other.ints))
+      return false;
+    if (type == null) {
+      if (other.type != null)
+        return false;
+    } else if (!type.equals(other.type))
+      return false;
+    return true;
+  }
+
+  public Comparator<BytesRef> bytesComparator() {
+    return bytesComp;
+  }
+
+  public void setBytesComparator(Comparator<BytesRef> comp) {
+    bytesComp = comp;    
+  }
+
+
+
+}

Propchange: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/ValuesAttributeImpl.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/ValuesAttributeImpl.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Added: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/ValuesEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/ValuesEnum.java?rev=1021636&view=auto
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/ValuesEnum.java (added)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/ValuesEnum.java Tue Oct 12 06:15:03 2010
@@ -0,0 +1,62 @@
+package org.apache.lucene.index.values;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import java.io.IOException;
+
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.util.Attribute;
+import org.apache.lucene.util.AttributeSource;
+
+public abstract class ValuesEnum extends DocIdSetIterator{
+  private AttributeSource source;
+  protected final ValuesAttribute attr;
+
+ 
+  protected ValuesEnum(Values enumType) {
+     this(null, enumType);
+  }
+
+  protected ValuesEnum(AttributeSource source, Values enumType) {
+    this.source = source;
+    boolean setType = !hasAttribute(ValuesAttribute.class);
+    attr = addAttribute(ValuesAttribute.class);
+    if (setType)
+      attr.setType(enumType);
+  }
+
+  public AttributeSource attributes() {
+    if (source == null)
+      source = new AttributeSource();
+    return source;
+  }
+
+  public <T extends Attribute> T addAttribute(Class<T> attr) {
+    return attributes().addAttribute(attr);
+  }
+
+  public <T extends Attribute> T getAttribute(Class<T> attr) {
+    return attributes().getAttribute(attr);
+  }
+
+  public <T extends Attribute> boolean hasAttribute(Class<T> attr) {
+    return attributes().hasAttribute(attr);
+  }
+
+  public abstract void close() throws IOException;
+
+}

Propchange: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/ValuesEnum.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/ValuesEnum.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Added: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java?rev=1021636&view=auto
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java (added)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java Tue Oct 12 06:15:03 2010
@@ -0,0 +1,255 @@
+package org.apache.lucene.index.values;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.concurrent.atomic.AtomicLong;
+
+import org.apache.lucene.index.values.Bytes.BytesBaseSource;
+import org.apache.lucene.index.values.Bytes.BytesReaderBase;
+import org.apache.lucene.index.values.Bytes.BytesWriterBase;
+import org.apache.lucene.index.values.FixedDerefBytesImpl.Reader.DerefBytesEnum;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.AttributeSource;
+import org.apache.lucene.util.ByteBlockPool;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefHash;
+import org.apache.lucene.util.CodecUtil;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.RamUsageEstimator;
+import org.apache.lucene.util.ByteBlockPool.Allocator;
+import org.apache.lucene.util.ByteBlockPool.DirectAllocator;
+import org.apache.lucene.util.BytesRefHash.ParallelArrayBase;
+import org.apache.lucene.util.BytesRefHash.ParallelBytesStartArray;
+import org.apache.lucene.util.packed.PackedInts;
+
+// Stores variable-length byte[] by deref, ie when two docs
+// have the same value, they store only 1 byte[] and both
+// docs reference that single source
+
+class VarDerefBytesImpl {
+
+  static final String CODEC_NAME = "VarDerefBytes";
+  static final int VERSION_START = 0;
+  static final int VERSION_CURRENT = VERSION_START;
+  
+ 
+
+  private static class AddressParallelArray extends ParallelArrayBase<AddressParallelArray> {
+    final int[] address;
+    
+    AddressParallelArray(int size, AtomicLong bytesUsed) {
+      super(size, bytesUsed);
+      address = new int[size]; 
+    }
+    @Override
+    protected int bytesPerEntry() {
+      return RamUsageEstimator.NUM_BYTES_INT + super.bytesPerEntry();
+    }
+
+    @Override
+    protected void copyTo(AddressParallelArray toArray, int numToCopy) {
+      super.copyTo(toArray, numToCopy);
+      System.arraycopy(address, 0, toArray.address, 0, size);
+      
+    }
+
+    @Override
+    public AddressParallelArray newInstance(int size) {
+      return new AddressParallelArray(size, bytesUsed);
+    }
+    
+  }
+
+
+  static class Writer extends BytesWriterBase {
+    private int[] docToAddress;
+    private int address = 1;
+    
+    private final ParallelBytesStartArray<AddressParallelArray> array = new ParallelBytesStartArray<AddressParallelArray>(new AddressParallelArray(0, bytesUsed));
+    private final BytesRefHash hash  = new BytesRefHash(pool, 16, array) ;
+
+    public Writer(Directory dir, String id) throws IOException  {
+      this(dir, id, new DirectAllocator(ByteBlockPool.BYTE_BLOCK_SIZE),
+          new AtomicLong());
+    }
+    public Writer(Directory dir, String id, Allocator allocator, AtomicLong bytesUsed) throws IOException {
+      super(dir, id, CODEC_NAME, VERSION_CURRENT, false, false, new ByteBlockPool(allocator), bytesUsed);
+      docToAddress = new int[1];
+      bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT);
+    }
+
+    @Override
+    synchronized public void add(int docID, BytesRef bytes) throws IOException {
+      if(bytes.length == 0)
+        return; // default
+      if(datOut == null)
+        initDataOut();
+      final int e = hash.add(bytes);
+
+      if (docID >= docToAddress.length) {
+        final int oldSize = docToAddress.length;
+        docToAddress = ArrayUtil.grow(docToAddress, 1+docID);
+        bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT * (docToAddress.length - oldSize));
+      }
+      final int docAddress;
+      if (e >= 0) {
+        docAddress = array.array.address[e] = address;
+        address += IOUtils.writeLength(datOut, bytes);
+        datOut.writeBytes(bytes.bytes, bytes.offset, bytes.length);
+        address += bytes.length;
+      } else {
+        docAddress = array.array.address[(-e)-1];
+      }
+      docToAddress[docID] = docAddress;
+    }
+
+    public long ramBytesUsed() {
+      return bytesUsed.get();
+    }
+
+    // Important that we get docCount, in case there were
+    // some last docs that we didn't see
+    @Override
+    synchronized public void finish(int docCount) throws IOException {
+      if(datOut == null)
+        return;
+      initIndexOut();
+      idxOut.writeInt(address-1);
+
+      // write index
+      // nocommit -- allow forcing fixed array (not -1)
+      // TODO(simonw): check the address calculation / make it more intuitive
+      final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount, PackedInts.bitsRequired(address-1));
+      final int limit;
+      if (docCount > docToAddress.length) {
+        limit = docToAddress.length;
+      } else {
+        limit = docCount;
+      }
+      for(int i=0;i<limit;i++) {
+        w.add(docToAddress[i]);
+      }
+      for(int i=limit;i<docCount;i++) {
+        w.add(0);
+      }
+      w.finish();
+      hash.clear(true);
+      super.finish(docCount);
+    }
+  }
+
+  public static class Reader extends BytesReaderBase {
+
+    Reader(Directory dir, String id, int maxDoc)
+      throws IOException {
+      super(dir, id, CODEC_NAME, VERSION_START, true);
+    }
+
+    @Override
+    public Source load() throws IOException {
+      return new Source(cloneData(), cloneIndex());
+    }
+
+    private static class Source extends BytesBaseSource {
+      // TODO: paged data
+      private final byte[] data;
+      private final BytesRef bytesRef = new BytesRef();
+      private final PackedInts.Reader index;
+
+      public Source(IndexInput datIn, IndexInput idxIn) throws IOException {
+        super(datIn, idxIn);
+        datIn.seek(CodecUtil.headerLength(CODEC_NAME));
+        idxIn.seek(CodecUtil.headerLength(CODEC_NAME));
+
+        final int totBytes = idxIn.readInt();
+        data = new byte[totBytes];
+        datIn.readBytes(data, 0, totBytes);
+
+        index = PackedInts.getReader(idxIn);
+        bytesRef.bytes = data;
+      }
+
+      @Override
+      public BytesRef bytes(int docID) {
+        int address = (int) index.get(docID);
+        if (address == 0) {
+          assert defaultValue.length == 0: " default value manipulated";
+          return defaultValue;
+        } else {
+          address--;
+          if ((data[address] & 0x80) == 0) {
+            // length is 1 byte
+            bytesRef.length = data[address];
+            bytesRef.offset = address+1;
+          } else {
+            bytesRef.length = (data[address]&0x7f) + ((data[address+1]&0xff)<<7);
+            bytesRef.offset = address+2;
+          }
+          return bytesRef;
+        }
+      }
+      
+      @Override
+      public int getValueCount() {
+        return index.size();
+      }
+
+      public long ramBytesUsed() {
+        // TODO(simonw): move address ram usage to PackedInts? 
+        return RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + data.length + 
+        (RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + index.getBitsPerValue() * index.size());
+      }
+    }
+
+    @Override
+    public ValuesEnum getEnum(AttributeSource source) throws IOException {
+      return new VarDerefBytesEnum(source, cloneData(), cloneIndex(), CODEC_NAME);
+    }
+    
+    static class VarDerefBytesEnum extends DerefBytesEnum {
+
+      public VarDerefBytesEnum(AttributeSource source, IndexInput datIn, IndexInput idxIn,
+          String codecName) throws IOException {
+        super(source, datIn, idxIn, codecName, -1, Values.BYTES_VAR_DEREF);
+      }
+
+    
+      @Override
+      protected void fill(long address, BytesRef ref) throws IOException {
+        // TODO(simonw): use pages here
+        datIn.seek(fp + --address);
+        final byte sizeByte = datIn.readByte();
+        final int size;
+        if ((sizeByte & 0x80) == 0) {
+          // length is 1 byte
+          size = sizeByte;
+        } else {
+          size = (sizeByte & 0x7f) + ((datIn.readByte() & 0xff) << 7);
+        }
+        if(ref.bytes.length < size)
+          ref.grow(size);
+        ref.length = size;
+        ref.offset = 0;
+        datIn.readBytes(ref.bytes, 0, size);
+      }
+    }
+  }
+}

Propchange: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/values/VarDerefBytesImpl.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL