You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2013/01/08 00:41:09 UTC

svn commit: r1430090 - in /lucene/dev/branches/lucene4547/lucene: codecs/src/java/org/apache/lucene/codecs/memory/ codecs/src/java/org/apache/lucene/codecs/simpletext/ core/src/java/org/apache/lucene/codecs/ core/src/java/org/apache/lucene/codecs/lucen...

Author: mikemccand
Date: Mon Jan  7 23:41:09 2013
New Revision: 1430090

URL: http://svn.apache.org/viewvc?rev=1430090&view=rev
Log:
cutover to Iterable for SortedDV

Removed:
    lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/SortedDocValuesConsumer.java
    lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene41/values/
Modified:
    lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesFormat.java
    lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSimpleDocValuesFormat.java
    lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/SimpleDVConsumer.java
    lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41SimpleDocValuesFormat.java
    lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java
    lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/SortedBytesDVWriter.java
    lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java
    lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java
    lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/TestDemoDocValue.java

Modified: lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesFormat.java?rev=1430090&r1=1430089&r2=1430090&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesFormat.java (original)
+++ lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryDocValuesFormat.java Mon Jan  7 23:41:09 2013
@@ -123,8 +123,6 @@ public class MemoryDocValuesFormat exten
       public SortedDocValues getSorted(FieldInfo field) throws IOException {
         SortedDocValues valuesIn = producer.getSorted(field);
         final int maxDoc = valuesIn.size();
-        final int maxLength = valuesIn.maxLength();
-        final boolean fixedLength = valuesIn.isFixedLength();
         final int valueCount = valuesIn.getValueCount();
 
         // nocommit used packed ints and so on
@@ -163,16 +161,6 @@ public class MemoryDocValuesFormat exten
           public int size() {
             return maxDoc;
           }
-
-          @Override
-          public boolean isFixedLength() {
-            return fixedLength;
-          }
-
-          @Override
-          public int maxLength() {
-            return maxLength;
-          }
         };
       }
 

Modified: lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSimpleDocValuesFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSimpleDocValuesFormat.java?rev=1430090&r1=1430089&r2=1430090&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSimpleDocValuesFormat.java (original)
+++ lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSimpleDocValuesFormat.java Mon Jan  7 23:41:09 2013
@@ -32,7 +32,6 @@ import java.util.Set;
 import org.apache.lucene.codecs.SimpleDVConsumer;
 import org.apache.lucene.codecs.SimpleDVProducer;
 import org.apache.lucene.codecs.SimpleDocValuesFormat;
-import org.apache.lucene.codecs.SortedDocValuesConsumer;
 import org.apache.lucene.index.BinaryDocValues;
 import org.apache.lucene.index.CorruptIndexException;
 import org.apache.lucene.index.DocValues;
@@ -268,21 +267,24 @@ public class SimpleTextSimpleDocValuesFo
     }
     
     @Override
-    public SortedDocValuesConsumer addSortedField(FieldInfo field, final int valueCount, boolean fixedLength, final int maxLength) throws IOException {
+    public void addSortedField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrd) throws IOException {
       assert fieldSeen(field.name);
       assert DocValues.isSortedBytes(field.getDocValuesType());
       assert !isNorms;
       writeFieldEntry(field);
+
+      int valueCount = 0;
+      int maxLength = -1;
+      for(BytesRef value : values) {
+        maxLength = Math.max(maxLength, value.length);
+        valueCount++;
+      }
+
       // write numValues
       SimpleTextUtil.write(data, NUMVALUES);
       SimpleTextUtil.write(data, Integer.toString(valueCount), scratch);
       SimpleTextUtil.writeNewline(data);
       
-      // write fixedlength
-      SimpleTextUtil.write(data, FIXEDLENGTH);
-      SimpleTextUtil.write(data, Boolean.toString(fixedLength), scratch);
-      SimpleTextUtil.writeNewline(data);
-      
       // write maxLength
       SimpleTextUtil.write(data, MAXLENGTH);
       SimpleTextUtil.write(data, Integer.toString(maxLength), scratch);
@@ -312,40 +314,34 @@ public class SimpleTextSimpleDocValuesFo
       SimpleTextUtil.writeNewline(data);
       final DecimalFormat ordEncoder = new DecimalFormat(sb.toString(), new DecimalFormatSymbols(Locale.ROOT));
 
-      return new SortedDocValuesConsumer() {
+      // for asserts:
+      int valuesSeen = 0;
 
-        // for asserts:
-        private int valuesSeen;
-        
-        @Override
-        public void addValue(BytesRef value) throws IOException {
-          // write length
-          SimpleTextUtil.write(data, LENGTH);
-          SimpleTextUtil.write(data, encoder.format(value.length), scratch);
-          SimpleTextUtil.writeNewline(data);
+      for(BytesRef value : values) {
+        // write length
+        SimpleTextUtil.write(data, LENGTH);
+        SimpleTextUtil.write(data, encoder.format(value.length), scratch);
+        SimpleTextUtil.writeNewline(data);
           
-          // write bytes -- don't use SimpleText.write
-          // because it escapes:
-          data.writeBytes(value.bytes, value.offset, value.length);
-
-          // pad to fit
-          for (int i = value.length; i < maxLength; i++) {
-            data.writeByte((byte)' ');
-          }
-          SimpleTextUtil.writeNewline(data);
-          valuesSeen++;
-          assert valuesSeen <= valueCount;
-        }
+        // write bytes -- don't use SimpleText.write
+        // because it escapes:
+        data.writeBytes(value.bytes, value.offset, value.length);
 
-        @Override
-        public void addDoc(int ord) throws IOException {
-          SimpleTextUtil.write(data, ordEncoder.format(ord), scratch);
-          SimpleTextUtil.writeNewline(data);
+        // pad to fit
+        for (int i = value.length; i < maxLength; i++) {
+          data.writeByte((byte)' ');
         }
+        SimpleTextUtil.writeNewline(data);
+        valuesSeen++;
+        assert valuesSeen <= valueCount;
+      }
 
-        @Override
-        public void finish() throws IOException {}
-      };
+      assert valuesSeen == valueCount;
+
+      for(Number ord : docToOrd) {
+        SimpleTextUtil.write(data, ordEncoder.format(ord.intValue()), scratch);
+        SimpleTextUtil.writeNewline(data);
+      }
     }
 
     /** write the header for this field */
@@ -450,9 +446,6 @@ public class SimpleTextSimpleDocValuesFo
           assert startsWith(NUMVALUES);
           field.numValues = Integer.parseInt(stripPrefix(NUMVALUES));
           readLine();
-          assert startsWith(FIXEDLENGTH);
-          field.fixedLength = Boolean.parseBoolean(stripPrefix(FIXEDLENGTH));
-          readLine();
           assert startsWith(MAXLENGTH);
           field.maxLength = Integer.parseInt(stripPrefix(MAXLENGTH));
           readLine();
@@ -635,16 +628,6 @@ public class SimpleTextSimpleDocValuesFo
         public int size() {
           return maxDoc;
         }
-
-        @Override
-        public boolean isFixedLength() {
-          return field.fixedLength;
-        }
-
-        @Override
-        public int maxLength() {
-          return field.maxLength;
-        }
       };
     }
 

Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/SimpleDVConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/SimpleDVConsumer.java?rev=1430090&r1=1430089&r2=1430090&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/SimpleDVConsumer.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/SimpleDVConsumer.java Mon Jan  7 23:41:09 2013
@@ -19,6 +19,7 @@ package org.apache.lucene.codecs;
 
 import java.io.Closeable;
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.List;
 import java.util.NoSuchElementException;
@@ -31,6 +32,8 @@ import org.apache.lucene.index.NumericDo
 import org.apache.lucene.index.SortedDocValues;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.FixedBitSet;
+import org.apache.lucene.util.PriorityQueue;
 
 // prototype streaming DV api
 public abstract class SimpleDVConsumer implements Closeable {
@@ -42,8 +45,7 @@ public abstract class SimpleDVConsumer i
 
   public abstract void addBinaryField(FieldInfo field, Iterable<BytesRef> values) throws IOException;
 
-  // nocommit: figure out whats fair here.
-  public abstract SortedDocValuesConsumer addSortedField(FieldInfo field, int valueCount, boolean fixedLength, int maxLength) throws IOException;
+  public abstract void addSortedField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrd) throws IOException;
 
   // dead simple impl: codec can optimize
   public void mergeNumericField(FieldInfo fieldInfo, final MergeState mergeState, final List<NumericDocValues> toMerge) throws IOException {
@@ -183,10 +185,238 @@ public abstract class SimpleDVConsumer i
                    });
   }
 
-  public void mergeSortedField(FieldInfo fieldInfo, MergeState mergeState, List<SortedDocValues> toMerge) throws IOException {
-    SortedDocValuesConsumer.Merger merger = new SortedDocValuesConsumer.Merger();
+  public static class SortedBytesMerger {
+
+    public int numMergedTerms;
+
+    final List<BytesRef> mergedTerms = new ArrayList<BytesRef>();
+    final List<SegmentState> segStates = new ArrayList<SegmentState>();
+
+    private static class SegmentState {
+      AtomicReader reader;
+      FixedBitSet liveTerms;
+      int ord = -1;
+      SortedDocValues values;
+      BytesRef scratch = new BytesRef();
+
+      // nocommit can we factor out the compressed fields
+      // compression?  ie we have a good idea "roughly" what
+      // the ord should be (linear projection) so we only
+      // need to encode the delta from that ...:        
+      int[] segOrdToMergedOrd;
+
+      public BytesRef nextTerm() {
+        while (ord < values.getValueCount()-1) {
+          ord++;
+          if (liveTerms == null || liveTerms.get(ord)) {
+            values.lookupOrd(ord, scratch);
+            return scratch;
+          } else {
+            // Skip "deleted" terms (ie, terms that were not
+            // referenced by any live docs):
+            values.lookupOrd(ord, scratch);
+          }
+        }
+
+        return null;
+      }
+    }
+
+    private static class TermMergeQueue extends PriorityQueue<SegmentState> {
+      public TermMergeQueue(int maxSize) {
+        super(maxSize);
+      }
+
+      @Override
+      protected boolean lessThan(SegmentState a, SegmentState b) {
+        return a.scratch.compareTo(b.scratch) <= 0;
+      }
+    }
+
+    public void merge(MergeState mergeState, List<SortedDocValues> toMerge) throws IOException {
+
+      // First pass: mark "live" terms
+      for (int readerIDX=0;readerIDX<toMerge.size();readerIDX++) {
+        AtomicReader reader = mergeState.readers.get(readerIDX);      
+        // nocommit what if this is null...?  need default source?
+        int maxDoc = reader.maxDoc();
+
+        SegmentState state = new SegmentState();
+        state.reader = reader;
+        state.values = toMerge.get(readerIDX);
+
+        segStates.add(state);
+        assert state.values.getValueCount() < Integer.MAX_VALUE;
+        if (reader.hasDeletions()) {
+          state.liveTerms = new FixedBitSet(state.values.getValueCount());
+          Bits liveDocs = reader.getLiveDocs();
+          for(int docID=0;docID<maxDoc;docID++) {
+            if (liveDocs.get(docID)) {
+              state.liveTerms.set(state.values.getOrd(docID));
+            }
+          }
+        }
+
+        // nocommit we can unload the bits to disk to reduce
+        // transient ram spike...
+      }
+
+      // Second pass: merge only the live terms
+
+      TermMergeQueue q = new TermMergeQueue(segStates.size());
+      for(SegmentState segState : segStates) {
+        if (segState.nextTerm() != null) {
+
+          // nocommit we could defer this to 3rd pass (and
+          // reduce transient RAM spike) but then
+          // we'd spend more effort computing the mapping...:
+          segState.segOrdToMergedOrd = new int[segState.values.getValueCount()];
+          q.add(segState);
+        }
+      }
+
+      BytesRef lastTerm = null;
+      int ord = 0;
+      while (q.size() != 0) {
+        SegmentState top = q.top();
+        if (lastTerm == null || !lastTerm.equals(top.scratch)) {
+          lastTerm = BytesRef.deepCopyOf(top.scratch);
+          // nocommit we could spill this to disk instead of
+          // RAM, and replay on finish...
+          mergedTerms.add(lastTerm);
+          ord++;
+        }
+
+        top.segOrdToMergedOrd[top.ord] = ord-1;
+        if (top.nextTerm() == null) {
+          q.pop();
+        } else {
+          q.updateTop();
+        }
+      }
+
+      numMergedTerms = ord;
+    }
+
+    /*
+    public void finish(SortedDocValuesConsumer consumer) throws IOException {
+
+      // Third pass: write merged result
+      for(BytesRef term : mergedTerms) {
+        consumer.addValue(term);
+      }
+
+      for(SegmentState segState : segStates) {
+        Bits liveDocs = segState.reader.getLiveDocs();
+        int maxDoc = segState.reader.maxDoc();
+        for(int docID=0;docID<maxDoc;docID++) {
+          if (liveDocs == null || liveDocs.get(docID)) {
+            int segOrd = segState.values.getOrd(docID);
+            int mergedOrd = segState.segOrdToMergedOrd[segOrd];
+            consumer.addDoc(mergedOrd);
+          }
+        }
+      }
+    }
+    */
+  }
+
+  public void mergeSortedField(FieldInfo fieldInfo, final MergeState mergeState, List<SortedDocValues> toMerge) throws IOException {
+    final SortedBytesMerger merger = new SortedBytesMerger();
+
+    // Does the heavy lifting to merge sort all "live" ords:
     merger.merge(mergeState, toMerge);
-    SortedDocValuesConsumer consumer = addSortedField(fieldInfo, merger.numMergedTerms, merger.fixedLength >= 0, merger.maxLength);
-    consumer.merge(mergeState, merger);
+
+    addSortedField(fieldInfo,
+
+                   // ord -> value
+                   new Iterable<BytesRef>() {
+                     @Override
+                     public Iterator<BytesRef> iterator() {
+                       return new Iterator<BytesRef>() {
+                         int ordUpto;
+
+                         @Override
+                         public boolean hasNext() {
+                           return ordUpto < merger.mergedTerms.size();
+                         }
+
+                         @Override
+                         public void remove() {
+                           throw new UnsupportedOperationException();
+                         }
+
+                         @Override
+                         public BytesRef next() {
+                           return merger.mergedTerms.get(ordUpto++);
+                         }
+                       };
+                     }
+                   },
+
+                   // doc -> ord
+                    new Iterable<Number>() {
+                      @Override
+                      public Iterator<Number> iterator() {
+                        return new Iterator<Number>() {
+                          int readerUpto = -1;
+                          int docIDUpto;
+                          int nextValue;
+                          SortedBytesMerger.SegmentState currentReader;
+                          Bits currentLiveDocs;
+                          boolean nextIsSet;
+
+                          @Override
+                          public boolean hasNext() {
+                            return nextIsSet || setNext();
+                          }
+
+                          @Override
+                          public void remove() {
+                            throw new UnsupportedOperationException();
+                          }
+
+                          @Override
+                          public Number next() {
+                            if (!hasNext()) {
+                              throw new NoSuchElementException();
+                            }
+                            assert nextIsSet;
+                            nextIsSet = false;
+                            // nocommit make a mutable number
+                            return nextValue;
+                          }
+
+                          private boolean setNext() {
+                            while (true) {
+                              if (readerUpto == merger.segStates.size()) {
+                                return false;
+                              }
+
+                              if (currentReader == null || docIDUpto == currentReader.reader.maxDoc()) {
+                                readerUpto++;
+                                if (readerUpto < merger.segStates.size()) {
+                                  currentReader = merger.segStates.get(readerUpto);
+                                  currentLiveDocs = currentReader.reader.getLiveDocs();
+                                }
+                                docIDUpto = 0;
+                                continue;
+                              }
+
+                              if (currentLiveDocs == null || currentLiveDocs.get(docIDUpto)) {
+                                nextIsSet = true;
+                                int segOrd = currentReader.values.getOrd(docIDUpto);
+                                nextValue = currentReader.segOrdToMergedOrd[segOrd];
+                                docIDUpto++;
+                                return true;
+                              }
+
+                              docIDUpto++;
+                            }
+                          }
+                        };
+                      }
+                    });
+
   }
 }

Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41SimpleDocValuesFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41SimpleDocValuesFormat.java?rev=1430090&r1=1430089&r2=1430090&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41SimpleDocValuesFormat.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41SimpleDocValuesFormat.java Mon Jan  7 23:41:09 2013
@@ -25,7 +25,6 @@ import org.apache.lucene.codecs.CodecUti
 import org.apache.lucene.codecs.SimpleDVConsumer;
 import org.apache.lucene.codecs.SimpleDVProducer;
 import org.apache.lucene.codecs.SimpleDocValuesFormat;
-import org.apache.lucene.codecs.SortedDocValuesConsumer;
 import org.apache.lucene.index.BinaryDocValues;
 import org.apache.lucene.index.DocValues;
 import org.apache.lucene.index.FieldInfo;
@@ -128,8 +127,8 @@ public class Lucene41SimpleDocValuesForm
     }
 
     @Override
-    public SortedDocValuesConsumer addSortedField(FieldInfo field, int valueCount, boolean fixedLength, int maxLength) throws IOException {
-      return null;
+    public void addSortedField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrd) throws IOException {
+      // nocommit todo
     }
     
     @Override

Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java?rev=1430090&r1=1430089&r2=1430090&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java Mon Jan  7 23:41:09 2013
@@ -29,7 +29,6 @@ import org.apache.lucene.codecs.Postings
 import org.apache.lucene.codecs.SimpleDVConsumer;
 import org.apache.lucene.codecs.SimpleDVProducer;
 import org.apache.lucene.codecs.SimpleDocValuesFormat;
-import org.apache.lucene.codecs.SortedDocValuesConsumer;
 import org.apache.lucene.index.BinaryDocValues;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.NumericDocValues;
@@ -111,8 +110,8 @@ public abstract class PerFieldDocValuesF
     }
 
     @Override
-    public SortedDocValuesConsumer addSortedField(FieldInfo field, int valueCount, boolean fixedLength, int maxLength) throws IOException {
-      return getInstance(field).addSortedField(field, valueCount, fixedLength, maxLength);
+    public void addSortedField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrd) throws IOException {
+      getInstance(field).addSortedField(field, values, docToOrd);
     }
 
     private SimpleDVConsumer getInstance(FieldInfo field) throws IOException {

Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/SortedBytesDVWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/SortedBytesDVWriter.java?rev=1430090&r1=1430089&r2=1430090&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/SortedBytesDVWriter.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/SortedBytesDVWriter.java Mon Jan  7 23:41:09 2013
@@ -18,9 +18,9 @@ package org.apache.lucene.index;
  */
 
 import java.io.IOException;
+import java.util.Iterator;
 
 import org.apache.lucene.codecs.SimpleDVConsumer;
-import org.apache.lucene.codecs.SortedDocValuesConsumer;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.ByteBlockPool;
 import org.apache.lucene.util.BytesRef;
@@ -44,11 +44,6 @@ class SortedBytesDVWriter extends DocVal
   private static final BytesRef EMPTY = new BytesRef(BytesRef.EMPTY_BYTES);
   private static final int DEFAULT_PENDING_SIZE = 16;
 
-  // -2 means not set yet; -1 means length isn't fixed;
-  // -otherwise it's the fixed length seen so far:
-  int fixedLength = -2;
-  int maxLength;
-
   public SortedBytesDVWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
     this.fieldInfo = fieldInfo;
     this.iwBytesUsed = iwBytesUsed;
@@ -81,12 +76,10 @@ class SortedBytesDVWriter extends DocVal
   public void finish(int maxDoc) {
     if (pendingIndex < maxDoc) {
       addOneValue(EMPTY);
-      mergeLength(0);
     }
   }
 
   private void addOneValue(BytesRef value) {
-    mergeLength(value.length);
     int ord = hash.add(value);
     if (ord < 0) {
       ord = -ord-1;
@@ -100,54 +93,102 @@ class SortedBytesDVWriter extends DocVal
     pending[pendingIndex++] = ord;
   }
 
-  private void mergeLength(int length) {
-    if (fixedLength == -2) {
-      fixedLength = length;
-    } else if (fixedLength != length) {
-      fixedLength = -1;
-    }
-    maxLength = Math.max(maxLength, length);
-  }
-
   @Override
   public void flush(SegmentWriteState state, SimpleDVConsumer dvConsumer) throws IOException {
-    SortedDocValuesConsumer consumer = dvConsumer.addSortedField(fieldInfo,
-                                                                 hash.size(),
-                                                                 fixedLength >= 0,
-                                                                 maxLength);
     final int maxDoc = state.segmentInfo.getDocCount();
-    int emptyOrd = -1;
+
+    final int emptyOrd;
     if (pendingIndex < maxDoc) {
       // Make sure we added EMPTY value before sorting:
-      emptyOrd = hash.add(EMPTY);
-      if (emptyOrd < 0) {
-        emptyOrd = -emptyOrd-1;
+      int ord = hash.add(EMPTY);
+      if (ord < 0) {
+        emptyOrd = -ord-1;
+      } else {
+        emptyOrd = ord;
       }
+    } else {
+      emptyOrd = -1;
     }
 
-    int valueCount = hash.size();
+    final int valueCount = hash.size();
 
-    int[] sortedValues = hash.sort(BytesRef.getUTF8SortedAsUnicodeComparator());
+    final int[] sortedValues = hash.sort(BytesRef.getUTF8SortedAsUnicodeComparator());
     final int sortedValueRamUsage = RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + RamUsageEstimator.NUM_BYTES_INT*valueCount;
     iwBytesUsed.addAndGet(sortedValueRamUsage);
     final int[] ordMap = new int[valueCount];
-    // Write values, in sorted order:
-    BytesRef scratch = new BytesRef();
+
     for(int ord=0;ord<valueCount;ord++) {
-      consumer.addValue(hash.get(sortedValues[ord], scratch));
       ordMap[sortedValues[ord]] = ord;
     }
+
     final int bufferedDocCount = pendingIndex;
 
-    for(int docID=0;docID<bufferedDocCount;docID++) {
-      consumer.addDoc(ordMap[pending[docID]]);
-    }
-    for(int docID=bufferedDocCount;docID<maxDoc;docID++) {
-      consumer.addDoc(ordMap[emptyOrd]);
-    }
+    dvConsumer.addSortedField(fieldInfo,
+
+                              // ord -> value
+                              new Iterable<BytesRef>() {
+                                @Override
+                                public Iterator<BytesRef> iterator() {
+                                  return new Iterator<BytesRef>() {
+                                    int ordUpto;
+                                    BytesRef scratch = new BytesRef();
+
+                                    @Override
+                                    public boolean hasNext() {
+                                      return ordUpto < valueCount;
+                                    }
+
+                                    @Override
+                                    public void remove() {
+                                      throw new UnsupportedOperationException();
+                                    }
+
+                                    @Override
+                                    public BytesRef next() {
+                                      hash.get(sortedValues[ordUpto], scratch);
+                                      ordUpto++;
+                                      return scratch;
+                                    }
+                                  };
+                                }
+                              },
+
+                              // doc -> ord
+                              new Iterable<Number>() {
+                                @Override
+                                public Iterator<Number> iterator() {
+                                  return new Iterator<Number>() {
+                                    int docUpto;
+
+                                    @Override
+                                    public boolean hasNext() {
+                                      return docUpto < maxDoc;
+                                    }
+
+                                    @Override
+                                    public void remove() {
+                                      throw new UnsupportedOperationException();
+                                    }
+
+                                    @Override
+                                    public Number next() {
+                                      int ord;
+                                      if (docUpto < bufferedDocCount) {
+                                        ord = pending[docUpto];
+                                      } else {
+                                        ord = emptyOrd;
+                                      }
+                                      docUpto++;
+                                      // nocommit make
+                                      // resuable Number?
+                                      return ordMap[ord];
+                                    }
+                                  };
+                                }
+                              });
+    
     iwBytesUsed.addAndGet(-sortedValueRamUsage);
     reset();
-    consumer.finish();
   }
 
   public void abort() {
@@ -159,7 +200,5 @@ class SortedBytesDVWriter extends DocVal
     pending = ArrayUtil.shrink(pending, DEFAULT_PENDING_SIZE);
     pendingIndex = 0;
     hash.clear();
-    fixedLength = -2;
-    maxLength = 0;
   }
 }

Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java?rev=1430090&r1=1430089&r2=1430090&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java Mon Jan  7 23:41:09 2013
@@ -30,10 +30,6 @@ public abstract class SortedDocValues ex
 
   public abstract int getValueCount();
 
-  public abstract boolean isFixedLength();
-
-  public abstract int maxLength();
-
   @Override
   public void get(int docID, BytesRef result) {
     int ord = getOrd(docID);
@@ -174,16 +170,6 @@ public abstract class SortedDocValues ex
     public int size() {
       return size;
     }
-
-    @Override
-    public boolean isFixedLength() {
-      return true;
-    }
-
-    @Override
-    public int maxLength() {
-      return 0;
-    }
   }
 
   /** If {@code key} exists, returns its ordinal, else

Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java?rev=1430090&r1=1430089&r2=1430090&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java Mon Jan  7 23:41:09 2013
@@ -937,16 +937,12 @@ class FieldCacheImpl implements FieldCac
     private final PackedInts.Reader termOrdToBytesOffset;
     private final PackedInts.Reader docToTermOrd;
     private final int numOrd;
-    private final int maxLength;
-    private final boolean isFixedLength;
 
-    public SortedDocValuesImpl(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, PackedInts.Reader docToTermOrd, int numOrd, int maxLength, boolean isFixedLength) {
+    public SortedDocValuesImpl(PagedBytes.Reader bytes, PackedInts.Reader termOrdToBytesOffset, PackedInts.Reader docToTermOrd, int numOrd) {
       this.bytes = bytes;
       this.docToTermOrd = docToTermOrd;
       this.termOrdToBytesOffset = termOrdToBytesOffset;
       this.numOrd = numOrd;
-      this.maxLength = maxLength;
-      this.isFixedLength = isFixedLength;
     }
 
     @Override
@@ -976,16 +972,6 @@ class FieldCacheImpl implements FieldCac
     }
 
     @Override
-    public boolean isFixedLength() {
-      return isFixedLength;
-    }
-
-    @Override
-    public int maxLength() {
-      return maxLength;
-    }
-
-    @Override
     public TermsEnum getTermsEnum() {
       return this.new SortedDocValuesEnum();
     }
@@ -1207,9 +1193,6 @@ class FieldCacheImpl implements FieldCac
 
         int termOrd = 0;
 
-        int sameLength = -2;
-        int maxLength = -1;
-
         // TODO: use Uninvert?
 
         if (terms != null) {
@@ -1221,12 +1204,6 @@ class FieldCacheImpl implements FieldCac
             if (term == null) {
               break;
             }
-            if (sameLength == -2) {
-              sameLength = term.length;
-            } else if (sameLength != term.length) {
-              sameLength = -1;
-            }
-            maxLength = Math.max(maxLength, term.length);
             if (termOrd >= termCountHardLimit) {
               break;
             }
@@ -1256,7 +1233,7 @@ class FieldCacheImpl implements FieldCac
         }
 
         // maybe an int-only impl?
-        return new SortedDocValuesImpl(bytes.freeze(true), termOrdToBytesOffset.getMutable(), docToTermOrd.getMutable(), termOrd, maxLength, sameLength >= 0);
+        return new SortedDocValuesImpl(bytes.freeze(true), termOrdToBytesOffset.getMutable(), docToTermOrd.getMutable(), termOrd);
       }
     }
   }
@@ -1264,14 +1241,10 @@ class FieldCacheImpl implements FieldCac
   private static class BinaryDocValuesImpl extends BinaryDocValues {
     private final PagedBytes.Reader bytes;
     private final PackedInts.Reader docToOffset;
-    private final int maxLength;
-    private final boolean isFixedLength;
 
-    public BinaryDocValuesImpl(PagedBytes.Reader bytes, PackedInts.Reader docToOffset, int maxLength, boolean isFixedLength) {
+    public BinaryDocValuesImpl(PagedBytes.Reader bytes, PackedInts.Reader docToOffset) {
       this.bytes = bytes;
       this.docToOffset = docToOffset;
-      this.maxLength = maxLength;
-      this.isFixedLength = isFixedLength;
     }
 
     @Override
@@ -1353,9 +1326,6 @@ class FieldCacheImpl implements FieldCac
         // pointer==0 means not set
         bytes.copyUsingLengthPrefix(new BytesRef());
 
-        int sameLength = -2;
-        int maxLength = -1;
-
         if (terms != null) {
           int termCount = 0;
           final TermsEnum termsEnum = terms.iterator(null);
@@ -1372,12 +1342,6 @@ class FieldCacheImpl implements FieldCac
             if (term == null) {
               break;
             }
-            if (sameLength == -2) {
-              sameLength = term.length;
-            } else if (sameLength != term.length) {
-              sameLength = -1;
-            }
-            maxLength = Math.max(maxLength, term.length);
             final long pointer = bytes.copyUsingLengthPrefix(term);
             docs = termsEnum.docs(null, docs, 0);
             while (true) {
@@ -1391,7 +1355,7 @@ class FieldCacheImpl implements FieldCac
         }
 
         // maybe an int-only impl?
-        return new BinaryDocValuesImpl(bytes.freeze(true), docToOffset.getMutable(), maxLength, sameLength >= 0);
+        return new BinaryDocValuesImpl(bytes.freeze(true), docToOffset.getMutable());
       }
     }
   }

Modified: lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/TestDemoDocValue.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/TestDemoDocValue.java?rev=1430090&r1=1430089&r2=1430090&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/TestDemoDocValue.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/TestDemoDocValue.java Mon Jan  7 23:41:09 2013
@@ -44,7 +44,6 @@ import org.apache.lucene.store.Directory
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
 import org.apache.lucene.util.LuceneTestCase;
-import org.junit.Ignore;
 
 /**
  * A very simple demo used in the API documentation (src/java/overview.html).
@@ -594,7 +593,6 @@ public class TestDemoDocValue extends Lu
     IndexReader ireader = DirectoryReader.open(directory); // read-only=true
     assert ireader.leaves().size() == 1;
     SortedDocValues dv = ireader.leaves().get(0).reader().getSortedDocValues("dv");
-    assertTrue(dv.isFixedLength()); // "hello world 1" length == "hello world 2" length
     assertEquals(2, dv.getValueCount()); // 2 ords
     BytesRef scratch = new BytesRef();
     dv.lookupOrd(0, scratch);