You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@lucene.apache.org by rm...@apache.org on 2012/11/09 21:04:41 UTC

svn commit: r1407612 - /lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSimpleDocValuesFormat.java

Author: rmuir
Date: Fri Nov  9 20:04:40 2012
New Revision: 1407612

URL: http://svn.apache.org/viewvc?rev=1407612&view=rev
Log:
sorted bytes file format

Modified:
    lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSimpleDocValuesFormat.java

Modified: lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSimpleDocValuesFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSimpleDocValuesFormat.java?rev=1407612&r1=1407611&r2=1407612&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSimpleDocValuesFormat.java (original)
+++ lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSimpleDocValuesFormat.java Fri Nov  9 20:04:40 2012
@@ -62,6 +62,9 @@ public class SimpleTextSimpleDocValuesFo
   // used for bytes
   final static BytesRef MAXLENGTH = new BytesRef("  maxlength ");
   final static BytesRef LENGTH = new BytesRef("length ");
+  // used for sorted bytes
+  final static BytesRef NUMVALUES = new BytesRef("  numvalues");
+  final static BytesRef ORDPATTERN = new BytesRef("  ordpattern");
 
   @Override
   public SimpleDVConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
@@ -100,6 +103,28 @@ public class SimpleTextSimpleDocValuesFo
    *  </pre>
    *  so a document's value can be retrieved by seeking to startOffset + (9+pattern.length+maxlength)*docid
    *  the extra 9 is 2 newlines, plus "length " itself.
+   *  
+   *  for sorted bytes this is a fixed-width file, for example:
+   *  <pre>
+   *  field myField
+   *    numvalues 10
+   *    maxLength 8
+   *    pattern 0
+   *    ordpattern 00
+   *  length 6
+   *  foobar[space][space]
+   *  length 3
+   *  baz[space][space][space][space][space]
+   *  ...
+   *  03
+   *  06
+   *  01
+   *  10
+   *  ...
+   *  </pre>
+   *  so the "ord section" begins at startOffset + (9+pattern.length+maxlength)*numValues.
+   *  a document's ord can be retrieved by seeking to "ord section" + (1+ordpattern.length())*docid
+   *  an ord's value can be retrieved by seeking to startOffset + (9+pattern.length+maxlength)*ord
    *   
    *  the reader can just scan this file when it opens, skipping over the data blocks
    *  and saving the offset/etc for each field.