You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/01/21 22:50:20 UTC

svn commit: r1436640 - in /lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext: SimpleTextDocValuesFormat.java SimpleTextDocValuesReader.java SimpleTextDocValuesWriter.java SimpleTextNormsFormat.java

Author: rmuir
Date: Mon Jan 21 21:50:20 2013
New Revision: 1436640

URL: http://svn.apache.org/viewvc?rev=1436640&view=rev
Log:
split simpletext reader/writer out

Added:
    lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesReader.java   (with props)
    lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesWriter.java   (with props)
Modified:
    lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesFormat.java
    lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextNormsFormat.java

Modified: lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesFormat.java?rev=1436640&r1=1436639&r2=1436640&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesFormat.java (original)
+++ lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesFormat.java Mon Jan 21 21:50:20 2013
@@ -18,55 +18,73 @@ package org.apache.lucene.codecs.simplet
  */
 
 import java.io.IOException;
-import java.math.BigDecimal;
-import java.math.BigInteger;
-import java.text.DecimalFormat;
-import java.text.DecimalFormatSymbols;
-import java.text.ParseException;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Locale;
-import java.util.Map;
-import java.util.Set;
 
 import org.apache.lucene.codecs.DocValuesConsumer;
 import org.apache.lucene.codecs.DocValuesProducer;
 import org.apache.lucene.codecs.DocValuesFormat;
-import org.apache.lucene.index.BinaryDocValues;
-import org.apache.lucene.index.CorruptIndexException;
-import org.apache.lucene.index.FieldInfo;
-import org.apache.lucene.index.FieldInfo.DocValuesType;
-import org.apache.lucene.index.IndexFileNames;
-import org.apache.lucene.index.NumericDocValues;
 import org.apache.lucene.index.SegmentReadState;
 import org.apache.lucene.index.SegmentWriteState;
-import org.apache.lucene.index.SortedDocValues;
-import org.apache.lucene.store.IndexInput;
-import org.apache.lucene.store.IndexOutput;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.IOUtils;
-import org.apache.lucene.util.StringHelper;
-
 
 /**
  * plain text doc values format.
  * <p>
  * <b><font color="red">FOR RECREATIONAL USE ONLY</font></B>
- * @lucene.experimental
+ * <p>
+ * the .dat file contains the data.
+ *  for numbers this is a "fixed-width" file, for example a single byte range:
+ *  <pre>
+ *  field myField
+ *    minvalue 0
+ *    pattern 000
+ *  005
+ *  234
+ *  123
+ *  ...
+ *  </pre>
+ *  so a document's value (delta encoded from minvalue) can be retrieved by 
+ *  seeking to startOffset + (1+pattern.length())*docid. The extra 1 is the newline.
+ *  
+ *  for bytes this is also a "fixed-width" file, for example:
+ *  <pre>
+ *  field myField
+ *    maxlength 6
+ *    pattern 0
+ *  length 6
+ *  foobar[space][space]
+ *  length 3
+ *  baz[space][space][space][space][space]
+ *  ...
+ *  </pre>
+ *  so a doc's value can be retrieved by seeking to startOffset + (9+pattern.length+maxlength)*doc
+ *  the extra 9 is 2 newlines, plus "length " itself.
+ *  
+ *  for sorted bytes this is a fixed-width file, for example:
+ *  <pre>
+ *  field myField
+ *    numvalues 10
+ *    maxLength 8
+ *    pattern 0
+ *    ordpattern 00
+ *  length 6
+ *  foobar[space][space]
+ *  length 3
+ *  baz[space][space][space][space][space]
+ *  ...
+ *  03
+ *  06
+ *  01
+ *  10
+ *  ...
+ *  </pre>
+ *  so the "ord section" begins at startOffset + (9+pattern.length+maxlength)*numValues.
+ *  a document's ord can be retrieved by seeking to "ord section" + (1+ordpattern.length())*docid
+ *  an ord's value can be retrieved by seeking to startOffset + (9+pattern.length+maxlength)*ord
+ *   
+ *  the reader can just scan this file when it opens, skipping over the data blocks
+ *  and saving the offset/etc for each field. 
+ *  @lucene.experimental
  */
 public class SimpleTextDocValuesFormat extends DocValuesFormat {
-  final static BytesRef END     = new BytesRef("END");
-  final static BytesRef FIELD   = new BytesRef("field ");
-  // used for numerics
-  final static BytesRef MINVALUE = new BytesRef("  minvalue ");
-  final static BytesRef PATTERN  = new BytesRef("  pattern ");
-  // used for bytes
-  final static BytesRef LENGTH = new BytesRef("length ");
-  final static BytesRef MAXLENGTH = new BytesRef("  maxlength ");
-  // used for sorted bytes
-  final static BytesRef FIXEDLENGTH = new BytesRef("  fixedlength ");
-  final static BytesRef NUMVALUES = new BytesRef("  numvalues ");
-  final static BytesRef ORDPATTERN = new BytesRef("  ordpattern ");
   
   public SimpleTextDocValuesFormat() {
     super("SimpleText");
@@ -81,553 +99,4 @@ public class SimpleTextDocValuesFormat e
   public DocValuesProducer fieldsProducer(SegmentReadState state) throws IOException {
     return new SimpleTextDocValuesReader(state, "dat");
   }
-  
-  /** the .dat file contains the data.
-   *  for numbers this is a "fixed-width" file, for example a single byte range:
-   *  <pre>
-   *  field myField
-   *    minvalue 0
-   *    pattern 000
-   *  005
-   *  234
-   *  123
-   *  ...
-   *  </pre>
-   *  so a document's value (delta encoded from minvalue) can be retrieved by 
-   *  seeking to startOffset + (1+pattern.length())*docid. The extra 1 is the newline.
-   *  
-   *  for bytes this is also a "fixed-width" file, for example:
-   *  <pre>
-   *  field myField
-   *    maxlength 6
-   *    pattern 0
-   *  length 6
-   *  foobar[space][space]
-   *  length 3
-   *  baz[space][space][space][space][space]
-   *  ...
-   *  </pre>
-   *  so a doc's value can be retrieved by seeking to startOffset + (9+pattern.length+maxlength)*doc
-   *  the extra 9 is 2 newlines, plus "length " itself.
-   *  
-   *  for sorted bytes this is a fixed-width file, for example:
-   *  <pre>
-   *  field myField
-   *    numvalues 10
-   *    maxLength 8
-   *    pattern 0
-   *    ordpattern 00
-   *  length 6
-   *  foobar[space][space]
-   *  length 3
-   *  baz[space][space][space][space][space]
-   *  ...
-   *  03
-   *  06
-   *  01
-   *  10
-   *  ...
-   *  </pre>
-   *  so the "ord section" begins at startOffset + (9+pattern.length+maxlength)*numValues.
-   *  a document's ord can be retrieved by seeking to "ord section" + (1+ordpattern.length())*docid
-   *  an ord's value can be retrieved by seeking to startOffset + (9+pattern.length+maxlength)*ord
-   *   
-   *  the reader can just scan this file when it opens, skipping over the data blocks
-   *  and saving the offset/etc for each field. 
-   */
-  static class SimpleTextDocValuesWriter extends DocValuesConsumer {
-    final IndexOutput data;
-    final BytesRef scratch = new BytesRef();
-    final int numDocs;
-    // nocommit
-    final boolean isNorms;
-    private final Set<String> fieldsSeen = new HashSet<String>(); // for asserting
-    
-    public SimpleTextDocValuesWriter(SegmentWriteState state, String ext) throws IOException {
-      //System.out.println("WRITE: " + IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, ext) + " " + state.segmentInfo.getDocCount() + " docs");
-      data = state.directory.createOutput(IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, ext), state.context);
-      numDocs = state.segmentInfo.getDocCount();
-      isNorms = ext.equals("len");
-    }
-
-    // for asserting
-    private boolean fieldSeen(String field) {
-      assert !fieldsSeen.contains(field): "field \"" + field + "\" was added more than once during flush";
-      fieldsSeen.add(field);
-      return true;
-    }
-
-    @Override
-    public void addNumericField(FieldInfo field, Iterable<Number> values) throws IOException {
-      assert fieldSeen(field.name);
-      // nocommit: this must be multiple asserts
-      //assert (field.getDocValuesType() != null && (DocValues.isNumber(field.getDocValuesType()) || DocValues.isFloat(field.getDocValuesType()))) ||
-      //  (field.getNormType() != null && (DocValues.isNumber(field.getNormType()) || DocValues.isFloat(field.getNormType()))): "field=" + field.name;
-      writeFieldEntry(field);
-
-      // first pass to find min/max
-      long minValue = Long.MAX_VALUE;
-      long maxValue = Long.MIN_VALUE;
-      for(Number n : values) {
-        long v = n.longValue();
-        minValue = Math.min(minValue, v);
-        maxValue = Math.max(maxValue, v);
-      }
-      
-      // write our minimum value to the .dat, all entries are deltas from that
-      SimpleTextUtil.write(data, MINVALUE);
-      SimpleTextUtil.write(data, Long.toString(minValue), scratch);
-      SimpleTextUtil.writeNewline(data);
-      
-      // build up our fixed-width "simple text packed ints"
-      // format
-      BigInteger maxBig = BigInteger.valueOf(maxValue);
-      BigInteger minBig = BigInteger.valueOf(minValue);
-      BigInteger diffBig = maxBig.subtract(minBig);
-      int maxBytesPerValue = diffBig.toString().length();
-      StringBuilder sb = new StringBuilder();
-      for (int i = 0; i < maxBytesPerValue; i++) {
-        sb.append('0');
-      }
-      
-      // write our pattern to the .dat
-      SimpleTextUtil.write(data, PATTERN);
-      SimpleTextUtil.write(data, sb.toString(), scratch);
-      SimpleTextUtil.writeNewline(data);
-
-      final String patternString = sb.toString();
-      
-      final DecimalFormat encoder = new DecimalFormat(patternString, new DecimalFormatSymbols(Locale.ROOT));
-      
-      int numDocsWritten = 0;
-
-      // second pass to write the values
-      for(Number n : values) {
-        long value = n.longValue();
-        assert value >= minValue;
-        Number delta = BigInteger.valueOf(value).subtract(BigInteger.valueOf(minValue));
-        String s = encoder.format(delta);
-        assert s.length() == patternString.length();
-        SimpleTextUtil.write(data, s, scratch);
-        SimpleTextUtil.writeNewline(data);
-        numDocsWritten++;
-        assert numDocsWritten <= numDocs;
-      }
-
-      assert numDocs == numDocsWritten: "numDocs=" + numDocs + " numDocsWritten=" + numDocsWritten;
-    }
-
-    @Override
-    public void addBinaryField(FieldInfo field, Iterable<BytesRef> values) throws IOException {
-      assert fieldSeen(field.name);
-      assert field.getDocValuesType() == DocValuesType.BINARY;
-      assert !isNorms;
-      int maxLength = 0;
-      for(BytesRef value : values) {
-        maxLength = Math.max(maxLength, value.length);
-      }
-      writeFieldEntry(field);
-
-      // write maxLength
-      SimpleTextUtil.write(data, MAXLENGTH);
-      SimpleTextUtil.write(data, Integer.toString(maxLength), scratch);
-      SimpleTextUtil.writeNewline(data);
-      
-      int maxBytesLength = Long.toString(maxLength).length();
-      StringBuilder sb = new StringBuilder();
-      for (int i = 0; i < maxBytesLength; i++) {
-        sb.append('0');
-      }
-      // write our pattern for encoding lengths
-      SimpleTextUtil.write(data, PATTERN);
-      SimpleTextUtil.write(data, sb.toString(), scratch);
-      SimpleTextUtil.writeNewline(data);
-      final DecimalFormat encoder = new DecimalFormat(sb.toString(), new DecimalFormatSymbols(Locale.ROOT));
-
-      int numDocsWritten = 0;
-      for(BytesRef value : values) {
-        // write length
-        SimpleTextUtil.write(data, LENGTH);
-        SimpleTextUtil.write(data, encoder.format(value.length), scratch);
-        SimpleTextUtil.writeNewline(data);
-          
-        // write bytes -- don't use SimpleText.write
-        // because it escapes:
-        data.writeBytes(value.bytes, value.offset, value.length);
-
-        // pad to fit
-        for (int i = value.length; i < maxLength; i++) {
-          data.writeByte((byte)' ');
-        }
-        SimpleTextUtil.writeNewline(data);
-        numDocsWritten++;
-      }
-
-      assert numDocs == numDocsWritten;
-    }
-    
-    @Override
-    public void addSortedField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrd) throws IOException {
-      assert fieldSeen(field.name);
-      assert field.getDocValuesType() == DocValuesType.SORTED;
-      assert !isNorms;
-      writeFieldEntry(field);
-
-      int valueCount = 0;
-      int maxLength = -1;
-      for(BytesRef value : values) {
-        maxLength = Math.max(maxLength, value.length);
-        valueCount++;
-      }
-
-      // write numValues
-      SimpleTextUtil.write(data, NUMVALUES);
-      SimpleTextUtil.write(data, Integer.toString(valueCount), scratch);
-      SimpleTextUtil.writeNewline(data);
-      
-      // write maxLength
-      SimpleTextUtil.write(data, MAXLENGTH);
-      SimpleTextUtil.write(data, Integer.toString(maxLength), scratch);
-      SimpleTextUtil.writeNewline(data);
-      
-      int maxBytesLength = Integer.toString(maxLength).length();
-      StringBuilder sb = new StringBuilder();
-      for (int i = 0; i < maxBytesLength; i++) {
-        sb.append('0');
-      }
-      
-      // write our pattern for encoding lengths
-      SimpleTextUtil.write(data, PATTERN);
-      SimpleTextUtil.write(data, sb.toString(), scratch);
-      SimpleTextUtil.writeNewline(data);
-      final DecimalFormat encoder = new DecimalFormat(sb.toString(), new DecimalFormatSymbols(Locale.ROOT));
-      
-      int maxOrdBytes = Integer.toString(valueCount).length();
-      sb.setLength(0);
-      for (int i = 0; i < maxOrdBytes; i++) {
-        sb.append('0');
-      }
-      
-      // write our pattern for ords
-      SimpleTextUtil.write(data, ORDPATTERN);
-      SimpleTextUtil.write(data, sb.toString(), scratch);
-      SimpleTextUtil.writeNewline(data);
-      final DecimalFormat ordEncoder = new DecimalFormat(sb.toString(), new DecimalFormatSymbols(Locale.ROOT));
-
-      // for asserts:
-      int valuesSeen = 0;
-
-      for(BytesRef value : values) {
-        // write length
-        SimpleTextUtil.write(data, LENGTH);
-        SimpleTextUtil.write(data, encoder.format(value.length), scratch);
-        SimpleTextUtil.writeNewline(data);
-          
-        // write bytes -- don't use SimpleText.write
-        // because it escapes:
-        data.writeBytes(value.bytes, value.offset, value.length);
-
-        // pad to fit
-        for (int i = value.length; i < maxLength; i++) {
-          data.writeByte((byte)' ');
-        }
-        SimpleTextUtil.writeNewline(data);
-        valuesSeen++;
-        assert valuesSeen <= valueCount;
-      }
-
-      assert valuesSeen == valueCount;
-
-      for(Number ord : docToOrd) {
-        SimpleTextUtil.write(data, ordEncoder.format(ord.intValue()), scratch);
-        SimpleTextUtil.writeNewline(data);
-      }
-    }
-
-    /** write the header for this field */
-    private void writeFieldEntry(FieldInfo field) throws IOException {
-      SimpleTextUtil.write(data, FIELD);
-      SimpleTextUtil.write(data, field.name, scratch);
-      SimpleTextUtil.writeNewline(data);
-    }
-    
-    @Override
-    public void close() throws IOException {
-      boolean success = false;
-      try {
-        assert !fieldsSeen.isEmpty();
-        // TODO: sheisty to do this here?
-        SimpleTextUtil.write(data, END);
-        SimpleTextUtil.writeNewline(data);
-        success = true;
-      } finally {
-        if (success) {
-          IOUtils.close(data);
-        } else {
-          IOUtils.closeWhileHandlingException(data);
-        }
-      }
-    }
-  };
-
-  // nocommit make sure we test "all docs have 0 value",
-  // "all docs have empty BytesREf"
-
-  static class SimpleTextDocValuesReader extends DocValuesProducer {
-
-    static class OneField {
-      FieldInfo fieldInfo;
-      long dataStartFilePointer;
-      String pattern;
-      String ordPattern;
-      int maxLength;
-      boolean fixedLength;
-      long minValue;
-      int numValues;
-    };
-
-    final int maxDoc;
-    final IndexInput data;
-    final BytesRef scratch = new BytesRef();
-    final Map<String,OneField> fields = new HashMap<String,OneField>();
-    
-    public SimpleTextDocValuesReader(SegmentReadState state, String ext) throws IOException {
-      //System.out.println("dir=" + state.directory + " seg=" + state.segmentInfo.name + " ext=" + ext);
-      data = state.directory.openInput(IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, ext), state.context);
-      maxDoc = state.segmentInfo.getDocCount();
-      while(true) {
-        readLine();
-        //System.out.println("READ field=" + scratch.utf8ToString());
-        if (scratch.equals(END)) {
-          break;
-        }
-        assert startsWith(FIELD) : scratch.utf8ToString();
-        String fieldName = stripPrefix(FIELD);
-        //System.out.println("  field=" + fieldName);
-        FieldInfo fieldInfo = state.fieldInfos.fieldInfo(fieldName);
-        assert fieldInfo != null;
-
-        OneField field = new OneField();
-        fields.put(fieldName, field);
-
-        field.fieldInfo = fieldInfo;
-        //System.out.println("  field=" + fieldName);
-
-        // nocommit hack hack hack!!:
-        DocValuesType dvType = ext.equals("len") ? DocValuesType.NUMERIC : fieldInfo.getDocValuesType();
-        assert dvType != null;
-        if (dvType == DocValuesType.NUMERIC) {
-          readLine();
-          assert startsWith(MINVALUE): "got " + scratch.utf8ToString() + " field=" + fieldName + " ext=" + ext;
-          field.minValue = Long.parseLong(stripPrefix(MINVALUE));
-          readLine();
-          assert startsWith(PATTERN);
-          field.pattern = stripPrefix(PATTERN);
-          field.dataStartFilePointer = data.getFilePointer();
-          data.seek(data.getFilePointer() + (1+field.pattern.length()) * maxDoc);
-        } else if (dvType == DocValuesType.BINARY) {
-          readLine();
-          assert startsWith(MAXLENGTH);
-          field.maxLength = Integer.parseInt(stripPrefix(MAXLENGTH));
-          readLine();
-          assert startsWith(PATTERN);
-          field.pattern = stripPrefix(PATTERN);
-          field.dataStartFilePointer = data.getFilePointer();
-          data.seek(data.getFilePointer() + (9+field.pattern.length()+field.maxLength) * maxDoc);
-        } else if (dvType == DocValuesType.SORTED) {
-          readLine();
-          assert startsWith(NUMVALUES);
-          field.numValues = Integer.parseInt(stripPrefix(NUMVALUES));
-          readLine();
-          assert startsWith(MAXLENGTH);
-          field.maxLength = Integer.parseInt(stripPrefix(MAXLENGTH));
-          readLine();
-          assert startsWith(PATTERN);
-          field.pattern = stripPrefix(PATTERN);
-          readLine();
-          assert startsWith(ORDPATTERN);
-          field.ordPattern = stripPrefix(ORDPATTERN);
-          field.dataStartFilePointer = data.getFilePointer();
-          data.seek(data.getFilePointer() + (9+field.pattern.length()+field.maxLength) * field.numValues + (1+field.ordPattern.length())*maxDoc);
-        } else {
-          throw new AssertionError();
-        }
-      }
-
-      // We should only be called from above if at least one
-      // field has DVs:
-      assert !fields.isEmpty();
-    }
-
-    @Override
-    public NumericDocValues getNumeric(FieldInfo fieldInfo) throws IOException {
-      final OneField field = fields.get(fieldInfo.name);
-      assert field != null;
-
-      // SegmentCoreReaders already verifies this field is
-      // valid:
-      assert field != null: "field=" + fieldInfo.name + " fields=" + fields;
-
-      final IndexInput in = data.clone();
-      final BytesRef scratch = new BytesRef();
-      final DecimalFormat decoder = new DecimalFormat(field.pattern, new DecimalFormatSymbols(Locale.ROOT));
-
-      decoder.setParseBigDecimal(true);
-
-      return new NumericDocValues() {
-        @Override
-        public long get(int docID) {
-          try {
-            //System.out.println(Thread.currentThread().getName() + ": get docID=" + docID + " in=" + in);
-            if (docID < 0 || docID >= maxDoc) {
-              throw new IndexOutOfBoundsException("docID must be 0 .. " + (maxDoc-1) + "; got " + docID);
-            }
-            in.seek(field.dataStartFilePointer + (1+field.pattern.length())*docID);
-            SimpleTextUtil.readLine(in, scratch);
-            //System.out.println("parsing delta: " + scratch.utf8ToString());
-            BigDecimal bd;
-            try {
-              bd = (BigDecimal) decoder.parse(scratch.utf8ToString());
-            } catch (ParseException pe) {
-              CorruptIndexException e = new CorruptIndexException("failed to parse BigDecimal value");
-              e.initCause(pe);
-              throw e;
-            }
-            return BigInteger.valueOf(field.minValue).add(bd.toBigIntegerExact()).longValue();
-          } catch (IOException ioe) {
-            throw new RuntimeException(ioe);
-          }
-        }
-      };
-    }
-
-    @Override
-    public BinaryDocValues getBinary(FieldInfo fieldInfo) throws IOException {
-      final OneField field = fields.get(fieldInfo.name);
-
-      // SegmentCoreReaders already verifies this field is
-      // valid:
-      assert field != null;
-
-      final IndexInput in = data.clone();
-      final BytesRef scratch = new BytesRef();
-      final DecimalFormat decoder = new DecimalFormat(field.pattern, new DecimalFormatSymbols(Locale.ROOT));
-
-      return new BinaryDocValues() {
-        @Override
-        public void get(int docID, BytesRef result) {
-          try {
-            if (docID < 0 || docID >= maxDoc) {
-              throw new IndexOutOfBoundsException("docID must be 0 .. " + (maxDoc-1) + "; got " + docID);
-            }
-            in.seek(field.dataStartFilePointer + (9+field.pattern.length() + field.maxLength)*docID);
-            SimpleTextUtil.readLine(in, scratch);
-            assert StringHelper.startsWith(scratch, LENGTH);
-            int len;
-            try {
-              len = decoder.parse(new String(scratch.bytes, scratch.offset + LENGTH.length, scratch.length - LENGTH.length, "UTF-8")).intValue();
-            } catch (ParseException pe) {
-              CorruptIndexException e = new CorruptIndexException("failed to parse int length");
-              e.initCause(pe);
-              throw e;
-            }
-            result.bytes = new byte[len];
-            result.offset = 0;
-            result.length = len;
-            in.readBytes(result.bytes, 0, len);
-          } catch (IOException ioe) {
-            throw new RuntimeException(ioe);
-          }
-        }
-      };
-    }
-
-    @Override
-    public SortedDocValues getSorted(FieldInfo fieldInfo) throws IOException {
-      final OneField field = fields.get(fieldInfo.name);
-
-      // SegmentCoreReaders already verifies this field is
-      // valid:
-      assert field != null;
-
-      final IndexInput in = data.clone();
-      final BytesRef scratch = new BytesRef();
-      final DecimalFormat decoder = new DecimalFormat(field.pattern, new DecimalFormatSymbols(Locale.ROOT));
-      final DecimalFormat ordDecoder = new DecimalFormat(field.ordPattern, new DecimalFormatSymbols(Locale.ROOT));
-
-      return new SortedDocValues() {
-        @Override
-        public int getOrd(int docID) {
-          if (docID < 0 || docID >= maxDoc) {
-            throw new IndexOutOfBoundsException("docID must be 0 .. " + (maxDoc-1) + "; got " + docID);
-          }
-          try {
-            in.seek(field.dataStartFilePointer + field.numValues * (9 + field.pattern.length() + field.maxLength) + docID * (1 + field.ordPattern.length()));
-            SimpleTextUtil.readLine(in, scratch);
-            try {
-              return ordDecoder.parse(scratch.utf8ToString()).intValue();
-            } catch (ParseException pe) {
-              CorruptIndexException e = new CorruptIndexException("failed to parse ord");
-              e.initCause(pe);
-              throw e;
-            }
-          } catch (IOException ioe) {
-            throw new RuntimeException(ioe);
-          }
-        }
-
-        @Override
-        public void lookupOrd(int ord, BytesRef result) {
-          try {
-            if (ord < 0 || ord >= field.numValues) {
-              throw new IndexOutOfBoundsException("ord must be 0 .. " + (field.numValues-1) + "; got " + ord);
-            }
-            in.seek(field.dataStartFilePointer + ord * (9 + field.pattern.length() + field.maxLength));
-            SimpleTextUtil.readLine(in, scratch);
-            assert StringHelper.startsWith(scratch, LENGTH): "got " + scratch.utf8ToString() + " in=" + in;
-            int len;
-            try {
-              len = decoder.parse(new String(scratch.bytes, scratch.offset + LENGTH.length, scratch.length - LENGTH.length, "UTF-8")).intValue();
-            } catch (ParseException pe) {
-              CorruptIndexException e = new CorruptIndexException("failed to parse int length");
-              e.initCause(pe);
-              throw e;
-            }
-            result.bytes = new byte[len];
-            result.offset = 0;
-            result.length = len;
-            in.readBytes(result.bytes, 0, len);
-          } catch (IOException ioe) {
-            throw new RuntimeException(ioe);
-          }
-        }
-
-        @Override
-        public int getValueCount() {
-          return field.numValues;
-        }
-      };
-    }
-
-    @Override
-    public void close() throws IOException {
-      data.close();
-    }
-
-    /** Used only in ctor: */
-    private void readLine() throws IOException {
-      SimpleTextUtil.readLine(data, scratch);
-      //System.out.println("line: " + scratch.utf8ToString());
-    }
-
-    /** Used only in ctor: */
-    private boolean startsWith(BytesRef prefix) {
-      return StringHelper.startsWith(scratch, prefix);
-    }
-
-    /** Used only in ctor: */
-    private String stripPrefix(BytesRef prefix) throws IOException {
-      return new String(scratch.bytes, scratch.offset + prefix.length, scratch.length - prefix.length, "UTF-8");
-    }
-  }
 }

Added: lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesReader.java?rev=1436640&view=auto
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesReader.java (added)
+++ lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesReader.java Mon Jan 21 21:50:20 2013
@@ -0,0 +1,312 @@
+package org.apache.lucene.codecs.simpletext;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.math.BigDecimal;
+import java.math.BigInteger;
+import java.text.DecimalFormat;
+import java.text.DecimalFormatSymbols;
+import java.text.ParseException;
+import java.util.HashMap;
+import java.util.Locale;
+import java.util.Map;
+
+import org.apache.lucene.codecs.DocValuesProducer;
+import org.apache.lucene.index.BinaryDocValues;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.NumericDocValues;
+import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.index.SortedDocValues;
+import org.apache.lucene.index.FieldInfo.DocValuesType;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.StringHelper;
+
+import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.END;
+import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.FIELD;
+import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.LENGTH;
+import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.MAXLENGTH;
+import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.MINVALUE;
+import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.NUMVALUES;
+import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.ORDPATTERN;
+import static org.apache.lucene.codecs.simpletext.SimpleTextDocValuesWriter.PATTERN;
+
+
+// nocommit make sure we test "all docs have 0 value",
+// "all docs have empty BytesREf"
+
+class SimpleTextDocValuesReader extends DocValuesProducer {
+
+  static class OneField {
+    FieldInfo fieldInfo;
+    long dataStartFilePointer;
+    String pattern;
+    String ordPattern;
+    int maxLength;
+    boolean fixedLength;
+    long minValue;
+    int numValues;
+  };
+
+  final int maxDoc;
+  final IndexInput data;
+  final BytesRef scratch = new BytesRef();
+  final Map<String,OneField> fields = new HashMap<String,OneField>();
+  
+  public SimpleTextDocValuesReader(SegmentReadState state, String ext) throws IOException {
+    //System.out.println("dir=" + state.directory + " seg=" + state.segmentInfo.name + " ext=" + ext);
+    data = state.directory.openInput(IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, ext), state.context);
+    maxDoc = state.segmentInfo.getDocCount();
+    while(true) {
+      readLine();
+      //System.out.println("READ field=" + scratch.utf8ToString());
+      if (scratch.equals(END)) {
+        break;
+      }
+      assert startsWith(FIELD) : scratch.utf8ToString();
+      String fieldName = stripPrefix(FIELD);
+      //System.out.println("  field=" + fieldName);
+      FieldInfo fieldInfo = state.fieldInfos.fieldInfo(fieldName);
+      assert fieldInfo != null;
+
+      OneField field = new OneField();
+      fields.put(fieldName, field);
+
+      field.fieldInfo = fieldInfo;
+      //System.out.println("  field=" + fieldName);
+
+      // nocommit hack hack hack!!:
+      DocValuesType dvType = ext.equals("len") ? DocValuesType.NUMERIC : fieldInfo.getDocValuesType();
+      assert dvType != null;
+      if (dvType == DocValuesType.NUMERIC) {
+        readLine();
+        assert startsWith(MINVALUE): "got " + scratch.utf8ToString() + " field=" + fieldName + " ext=" + ext;
+        field.minValue = Long.parseLong(stripPrefix(MINVALUE));
+        readLine();
+        assert startsWith(PATTERN);
+        field.pattern = stripPrefix(PATTERN);
+        field.dataStartFilePointer = data.getFilePointer();
+        data.seek(data.getFilePointer() + (1+field.pattern.length()) * maxDoc);
+      } else if (dvType == DocValuesType.BINARY) {
+        readLine();
+        assert startsWith(MAXLENGTH);
+        field.maxLength = Integer.parseInt(stripPrefix(MAXLENGTH));
+        readLine();
+        assert startsWith(PATTERN);
+        field.pattern = stripPrefix(PATTERN);
+        field.dataStartFilePointer = data.getFilePointer();
+        data.seek(data.getFilePointer() + (9+field.pattern.length()+field.maxLength) * maxDoc);
+      } else if (dvType == DocValuesType.SORTED) {
+        readLine();
+        assert startsWith(NUMVALUES);
+        field.numValues = Integer.parseInt(stripPrefix(NUMVALUES));
+        readLine();
+        assert startsWith(MAXLENGTH);
+        field.maxLength = Integer.parseInt(stripPrefix(MAXLENGTH));
+        readLine();
+        assert startsWith(PATTERN);
+        field.pattern = stripPrefix(PATTERN);
+        readLine();
+        assert startsWith(ORDPATTERN);
+        field.ordPattern = stripPrefix(ORDPATTERN);
+        field.dataStartFilePointer = data.getFilePointer();
+        data.seek(data.getFilePointer() + (9+field.pattern.length()+field.maxLength) * field.numValues + (1+field.ordPattern.length())*maxDoc);
+      } else {
+        throw new AssertionError();
+      }
+    }
+
+    // We should only be called from above if at least one
+    // field has DVs:
+    assert !fields.isEmpty();
+  }
+
+  @Override
+  public NumericDocValues getNumeric(FieldInfo fieldInfo) throws IOException {
+    final OneField field = fields.get(fieldInfo.name);
+    assert field != null;
+
+    // SegmentCoreReaders already verifies this field is
+    // valid:
+    assert field != null: "field=" + fieldInfo.name + " fields=" + fields;
+
+    final IndexInput in = data.clone();
+    final BytesRef scratch = new BytesRef();
+    final DecimalFormat decoder = new DecimalFormat(field.pattern, new DecimalFormatSymbols(Locale.ROOT));
+
+    decoder.setParseBigDecimal(true);
+
+    return new NumericDocValues() {
+      @Override
+      public long get(int docID) {
+        try {
+          //System.out.println(Thread.currentThread().getName() + ": get docID=" + docID + " in=" + in);
+          if (docID < 0 || docID >= maxDoc) {
+            throw new IndexOutOfBoundsException("docID must be 0 .. " + (maxDoc-1) + "; got " + docID);
+          }
+          in.seek(field.dataStartFilePointer + (1+field.pattern.length())*docID);
+          SimpleTextUtil.readLine(in, scratch);
+          //System.out.println("parsing delta: " + scratch.utf8ToString());
+          BigDecimal bd;
+          try {
+            bd = (BigDecimal) decoder.parse(scratch.utf8ToString());
+          } catch (ParseException pe) {
+            CorruptIndexException e = new CorruptIndexException("failed to parse BigDecimal value");
+            e.initCause(pe);
+            throw e;
+          }
+          return BigInteger.valueOf(field.minValue).add(bd.toBigIntegerExact()).longValue();
+        } catch (IOException ioe) {
+          throw new RuntimeException(ioe);
+        }
+      }
+    };
+  }
+
+  @Override
+  public BinaryDocValues getBinary(FieldInfo fieldInfo) throws IOException {
+    final OneField field = fields.get(fieldInfo.name);
+
+    // SegmentCoreReaders already verifies this field is
+    // valid:
+    assert field != null;
+
+    final IndexInput in = data.clone();
+    final BytesRef scratch = new BytesRef();
+    final DecimalFormat decoder = new DecimalFormat(field.pattern, new DecimalFormatSymbols(Locale.ROOT));
+
+    return new BinaryDocValues() {
+      @Override
+      public void get(int docID, BytesRef result) {
+        try {
+          if (docID < 0 || docID >= maxDoc) {
+            throw new IndexOutOfBoundsException("docID must be 0 .. " + (maxDoc-1) + "; got " + docID);
+          }
+          in.seek(field.dataStartFilePointer + (9+field.pattern.length() + field.maxLength)*docID);
+          SimpleTextUtil.readLine(in, scratch);
+          assert StringHelper.startsWith(scratch, LENGTH);
+          int len;
+          try {
+            len = decoder.parse(new String(scratch.bytes, scratch.offset + LENGTH.length, scratch.length - LENGTH.length, "UTF-8")).intValue();
+          } catch (ParseException pe) {
+            CorruptIndexException e = new CorruptIndexException("failed to parse int length");
+            e.initCause(pe);
+            throw e;
+          }
+          result.bytes = new byte[len];
+          result.offset = 0;
+          result.length = len;
+          in.readBytes(result.bytes, 0, len);
+        } catch (IOException ioe) {
+          throw new RuntimeException(ioe);
+        }
+      }
+    };
+  }
+
+  @Override
+  public SortedDocValues getSorted(FieldInfo fieldInfo) throws IOException {
+    final OneField field = fields.get(fieldInfo.name);
+
+    // SegmentCoreReaders already verifies this field is
+    // valid:
+    assert field != null;
+
+    final IndexInput in = data.clone();
+    final BytesRef scratch = new BytesRef();
+    final DecimalFormat decoder = new DecimalFormat(field.pattern, new DecimalFormatSymbols(Locale.ROOT));
+    final DecimalFormat ordDecoder = new DecimalFormat(field.ordPattern, new DecimalFormatSymbols(Locale.ROOT));
+
+    return new SortedDocValues() {
+      @Override
+      public int getOrd(int docID) {
+        if (docID < 0 || docID >= maxDoc) {
+          throw new IndexOutOfBoundsException("docID must be 0 .. " + (maxDoc-1) + "; got " + docID);
+        }
+        try {
+          in.seek(field.dataStartFilePointer + field.numValues * (9 + field.pattern.length() + field.maxLength) + docID * (1 + field.ordPattern.length()));
+          SimpleTextUtil.readLine(in, scratch);
+          try {
+            return ordDecoder.parse(scratch.utf8ToString()).intValue();
+          } catch (ParseException pe) {
+            CorruptIndexException e = new CorruptIndexException("failed to parse ord");
+            e.initCause(pe);
+            throw e;
+          }
+        } catch (IOException ioe) {
+          throw new RuntimeException(ioe);
+        }
+      }
+
+      @Override
+      public void lookupOrd(int ord, BytesRef result) {
+        try {
+          if (ord < 0 || ord >= field.numValues) {
+            throw new IndexOutOfBoundsException("ord must be 0 .. " + (field.numValues-1) + "; got " + ord);
+          }
+          in.seek(field.dataStartFilePointer + ord * (9 + field.pattern.length() + field.maxLength));
+          SimpleTextUtil.readLine(in, scratch);
+          assert StringHelper.startsWith(scratch, LENGTH): "got " + scratch.utf8ToString() + " in=" + in;
+          int len;
+          try {
+            len = decoder.parse(new String(scratch.bytes, scratch.offset + LENGTH.length, scratch.length - LENGTH.length, "UTF-8")).intValue();
+          } catch (ParseException pe) {
+            CorruptIndexException e = new CorruptIndexException("failed to parse int length");
+            e.initCause(pe);
+            throw e;
+          }
+          result.bytes = new byte[len];
+          result.offset = 0;
+          result.length = len;
+          in.readBytes(result.bytes, 0, len);
+        } catch (IOException ioe) {
+          throw new RuntimeException(ioe);
+        }
+      }
+
+      @Override
+      public int getValueCount() {
+        return field.numValues;
+      }
+    };
+  }
+
+  @Override
+  public void close() throws IOException {
+    data.close();
+  }
+
+  /** Used only in ctor: */
+  private void readLine() throws IOException {
+    SimpleTextUtil.readLine(data, scratch);
+    //System.out.println("line: " + scratch.utf8ToString());
+  }
+
+  /** Used only in ctor: */
+  private boolean startsWith(BytesRef prefix) {
+    return StringHelper.startsWith(scratch, prefix);
+  }
+
+  /** Used only in ctor: */
+  private String stripPrefix(BytesRef prefix) throws IOException {
+    return new String(scratch.bytes, scratch.offset + prefix.length, scratch.length - prefix.length, "UTF-8");
+  }
+}

Added: lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesWriter.java?rev=1436640&view=auto
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesWriter.java (added)
+++ lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesWriter.java Mon Jan 21 21:50:20 2013
@@ -0,0 +1,282 @@
+package org.apache.lucene.codecs.simpletext;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.math.BigInteger;
+import java.text.DecimalFormat;
+import java.text.DecimalFormatSymbols;
+import java.util.HashSet;
+import java.util.Locale;
+import java.util.Set;
+
+import org.apache.lucene.codecs.DocValuesConsumer;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.index.FieldInfo.DocValuesType;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
+
+class SimpleTextDocValuesWriter extends DocValuesConsumer {
+  final static BytesRef END     = new BytesRef("END");
+  final static BytesRef FIELD   = new BytesRef("field ");
+  // used for numerics
+  final static BytesRef MINVALUE = new BytesRef("  minvalue ");
+  final static BytesRef PATTERN  = new BytesRef("  pattern ");
+  // used for bytes
+  final static BytesRef LENGTH = new BytesRef("length ");
+  final static BytesRef MAXLENGTH = new BytesRef("  maxlength ");
+  // used for sorted bytes
+  final static BytesRef NUMVALUES = new BytesRef("  numvalues ");
+  final static BytesRef ORDPATTERN = new BytesRef("  ordpattern ");
+  
+  final IndexOutput data;
+  final BytesRef scratch = new BytesRef();
+  final int numDocs;
+  // nocommit
+  final boolean isNorms;
+  private final Set<String> fieldsSeen = new HashSet<String>(); // for asserting
+  
+  public SimpleTextDocValuesWriter(SegmentWriteState state, String ext) throws IOException {
+    //System.out.println("WRITE: " + IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, ext) + " " + state.segmentInfo.getDocCount() + " docs");
+    data = state.directory.createOutput(IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, ext), state.context);
+    numDocs = state.segmentInfo.getDocCount();
+    isNorms = ext.equals("len");
+  }
+
+  // for asserting
+  private boolean fieldSeen(String field) {
+    assert !fieldsSeen.contains(field): "field \"" + field + "\" was added more than once during flush";
+    fieldsSeen.add(field);
+    return true;
+  }
+
+  @Override
+  public void addNumericField(FieldInfo field, Iterable<Number> values) throws IOException {
+    assert fieldSeen(field.name);
+    // nocommit: this must be multiple asserts
+    //assert (field.getDocValuesType() != null && (DocValues.isNumber(field.getDocValuesType()) || DocValues.isFloat(field.getDocValuesType()))) ||
+    //  (field.getNormType() != null && (DocValues.isNumber(field.getNormType()) || DocValues.isFloat(field.getNormType()))): "field=" + field.name;
+    writeFieldEntry(field);
+
+    // first pass to find min/max
+    long minValue = Long.MAX_VALUE;
+    long maxValue = Long.MIN_VALUE;
+    for(Number n : values) {
+      long v = n.longValue();
+      minValue = Math.min(minValue, v);
+      maxValue = Math.max(maxValue, v);
+    }
+    
+    // write our minimum value to the .dat, all entries are deltas from that
+    SimpleTextUtil.write(data, MINVALUE);
+    SimpleTextUtil.write(data, Long.toString(minValue), scratch);
+    SimpleTextUtil.writeNewline(data);
+    
+    // build up our fixed-width "simple text packed ints"
+    // format
+    BigInteger maxBig = BigInteger.valueOf(maxValue);
+    BigInteger minBig = BigInteger.valueOf(minValue);
+    BigInteger diffBig = maxBig.subtract(minBig);
+    int maxBytesPerValue = diffBig.toString().length();
+    StringBuilder sb = new StringBuilder();
+    for (int i = 0; i < maxBytesPerValue; i++) {
+      sb.append('0');
+    }
+    
+    // write our pattern to the .dat
+    SimpleTextUtil.write(data, PATTERN);
+    SimpleTextUtil.write(data, sb.toString(), scratch);
+    SimpleTextUtil.writeNewline(data);
+
+    final String patternString = sb.toString();
+    
+    final DecimalFormat encoder = new DecimalFormat(patternString, new DecimalFormatSymbols(Locale.ROOT));
+    
+    int numDocsWritten = 0;
+
+    // second pass to write the values
+    for(Number n : values) {
+      long value = n.longValue();
+      assert value >= minValue;
+      Number delta = BigInteger.valueOf(value).subtract(BigInteger.valueOf(minValue));
+      String s = encoder.format(delta);
+      assert s.length() == patternString.length();
+      SimpleTextUtil.write(data, s, scratch);
+      SimpleTextUtil.writeNewline(data);
+      numDocsWritten++;
+      assert numDocsWritten <= numDocs;
+    }
+
+    assert numDocs == numDocsWritten: "numDocs=" + numDocs + " numDocsWritten=" + numDocsWritten;
+  }
+
+  @Override
+  public void addBinaryField(FieldInfo field, Iterable<BytesRef> values) throws IOException {
+    assert fieldSeen(field.name);
+    assert field.getDocValuesType() == DocValuesType.BINARY;
+    assert !isNorms;
+    int maxLength = 0;
+    for(BytesRef value : values) {
+      maxLength = Math.max(maxLength, value.length);
+    }
+    writeFieldEntry(field);
+
+    // write maxLength
+    SimpleTextUtil.write(data, MAXLENGTH);
+    SimpleTextUtil.write(data, Integer.toString(maxLength), scratch);
+    SimpleTextUtil.writeNewline(data);
+    
+    int maxBytesLength = Long.toString(maxLength).length();
+    StringBuilder sb = new StringBuilder();
+    for (int i = 0; i < maxBytesLength; i++) {
+      sb.append('0');
+    }
+    // write our pattern for encoding lengths
+    SimpleTextUtil.write(data, PATTERN);
+    SimpleTextUtil.write(data, sb.toString(), scratch);
+    SimpleTextUtil.writeNewline(data);
+    final DecimalFormat encoder = new DecimalFormat(sb.toString(), new DecimalFormatSymbols(Locale.ROOT));
+
+    int numDocsWritten = 0;
+    for(BytesRef value : values) {
+      // write length
+      SimpleTextUtil.write(data, LENGTH);
+      SimpleTextUtil.write(data, encoder.format(value.length), scratch);
+      SimpleTextUtil.writeNewline(data);
+        
+      // write bytes -- don't use SimpleText.write
+      // because it escapes:
+      data.writeBytes(value.bytes, value.offset, value.length);
+
+      // pad to fit
+      for (int i = value.length; i < maxLength; i++) {
+        data.writeByte((byte)' ');
+      }
+      SimpleTextUtil.writeNewline(data);
+      numDocsWritten++;
+    }
+
+    assert numDocs == numDocsWritten;
+  }
+  
+  @Override
+  public void addSortedField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrd) throws IOException {
+    assert fieldSeen(field.name);
+    assert field.getDocValuesType() == DocValuesType.SORTED;
+    assert !isNorms;
+    writeFieldEntry(field);
+
+    int valueCount = 0;
+    int maxLength = -1;
+    for(BytesRef value : values) {
+      maxLength = Math.max(maxLength, value.length);
+      valueCount++;
+    }
+
+    // write numValues
+    SimpleTextUtil.write(data, NUMVALUES);
+    SimpleTextUtil.write(data, Integer.toString(valueCount), scratch);
+    SimpleTextUtil.writeNewline(data);
+    
+    // write maxLength
+    SimpleTextUtil.write(data, MAXLENGTH);
+    SimpleTextUtil.write(data, Integer.toString(maxLength), scratch);
+    SimpleTextUtil.writeNewline(data);
+    
+    int maxBytesLength = Integer.toString(maxLength).length();
+    StringBuilder sb = new StringBuilder();
+    for (int i = 0; i < maxBytesLength; i++) {
+      sb.append('0');
+    }
+    
+    // write our pattern for encoding lengths
+    SimpleTextUtil.write(data, PATTERN);
+    SimpleTextUtil.write(data, sb.toString(), scratch);
+    SimpleTextUtil.writeNewline(data);
+    final DecimalFormat encoder = new DecimalFormat(sb.toString(), new DecimalFormatSymbols(Locale.ROOT));
+    
+    int maxOrdBytes = Integer.toString(valueCount).length();
+    sb.setLength(0);
+    for (int i = 0; i < maxOrdBytes; i++) {
+      sb.append('0');
+    }
+    
+    // write our pattern for ords
+    SimpleTextUtil.write(data, ORDPATTERN);
+    SimpleTextUtil.write(data, sb.toString(), scratch);
+    SimpleTextUtil.writeNewline(data);
+    final DecimalFormat ordEncoder = new DecimalFormat(sb.toString(), new DecimalFormatSymbols(Locale.ROOT));
+
+    // for asserts:
+    int valuesSeen = 0;
+
+    for(BytesRef value : values) {
+      // write length
+      SimpleTextUtil.write(data, LENGTH);
+      SimpleTextUtil.write(data, encoder.format(value.length), scratch);
+      SimpleTextUtil.writeNewline(data);
+        
+      // write bytes -- don't use SimpleText.write
+      // because it escapes:
+      data.writeBytes(value.bytes, value.offset, value.length);
+
+      // pad to fit
+      for (int i = value.length; i < maxLength; i++) {
+        data.writeByte((byte)' ');
+      }
+      SimpleTextUtil.writeNewline(data);
+      valuesSeen++;
+      assert valuesSeen <= valueCount;
+    }
+
+    assert valuesSeen == valueCount;
+
+    for(Number ord : docToOrd) {
+      SimpleTextUtil.write(data, ordEncoder.format(ord.intValue()), scratch);
+      SimpleTextUtil.writeNewline(data);
+    }
+  }
+
+  /** write the header for this field */
+  private void writeFieldEntry(FieldInfo field) throws IOException {
+    SimpleTextUtil.write(data, FIELD);
+    SimpleTextUtil.write(data, field.name, scratch);
+    SimpleTextUtil.writeNewline(data);
+  }
+  
+  @Override
+  public void close() throws IOException {
+    boolean success = false;
+    try {
+      assert !fieldsSeen.isEmpty();
+      // TODO: sheisty to do this here?
+      SimpleTextUtil.write(data, END);
+      SimpleTextUtil.writeNewline(data);
+      success = true;
+    } finally {
+      if (success) {
+        IOUtils.close(data);
+      } else {
+        IOUtils.closeWhileHandlingException(data);
+      }
+    }
+  }
+}

Modified: lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextNormsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextNormsFormat.java?rev=1436640&r1=1436639&r2=1436640&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextNormsFormat.java (original)
+++ lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextNormsFormat.java Mon Jan 21 21:50:20 2013
@@ -22,8 +22,6 @@ import java.io.IOException;
 import org.apache.lucene.codecs.DocValuesConsumer;
 import org.apache.lucene.codecs.DocValuesProducer;
 import org.apache.lucene.codecs.NormsFormat;
-import org.apache.lucene.codecs.simpletext.SimpleTextDocValuesFormat.SimpleTextDocValuesReader;
-import org.apache.lucene.codecs.simpletext.SimpleTextDocValuesFormat.SimpleTextDocValuesWriter;
 import org.apache.lucene.index.SegmentReadState;
 import org.apache.lucene.index.SegmentWriteState;