You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2012/11/09 21:04:41 UTC
svn commit: r1407612 -
/lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSimpleDocValuesFormat.java
Author: rmuir
Date: Fri Nov 9 20:04:40 2012
New Revision: 1407612
URL: http://svn.apache.org/viewvc?rev=1407612&view=rev
Log:
sorted bytes file format
Modified:
lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSimpleDocValuesFormat.java
Modified: lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSimpleDocValuesFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSimpleDocValuesFormat.java?rev=1407612&r1=1407611&r2=1407612&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSimpleDocValuesFormat.java (original)
+++ lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSimpleDocValuesFormat.java Fri Nov 9 20:04:40 2012
@@ -62,6 +62,9 @@ public class SimpleTextSimpleDocValuesFo
// used for bytes
final static BytesRef MAXLENGTH = new BytesRef(" maxlength ");
final static BytesRef LENGTH = new BytesRef("length ");
+ // used for sorted bytes
+ final static BytesRef NUMVALUES = new BytesRef(" numvalues");
+ final static BytesRef ORDPATTERN = new BytesRef(" ordpattern");
@Override
public SimpleDVConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
@@ -100,6 +103,28 @@ public class SimpleTextSimpleDocValuesFo
* </pre>
* so a document's value can be retrieved by seeking to startOffset + (9+pattern.length+maxlength)*docid
* the extra 9 is 2 newlines, plus "length " itself.
+ *
+ * for sorted bytes this is a fixed-width file, for example:
+ * <pre>
+ * field myField
+ * numvalues 10
+ * maxLength 8
+ * pattern 0
+ * ordpattern 00
+ * length 6
+ * foobar[space][space]
+ * length 3
+ * baz[space][space][space][space][space]
+ * ...
+ * 03
+ * 06
+ * 01
+ * 10
+ * ...
+ * </pre>
+ * so the "ord section" begins at startOffset + (9+pattern.length+maxlength)*numValues.
+ * a document's ord can be retrieved by seeking to "ord section" + (1+ordpattern.length())*docid
+ * an ord's value can be retrieved by seeking to startOffset + (9+pattern.length+maxlength)*ord
*
* the reader can just scan this file when it opens, skipping over the data blocks
* and saving the offset/etc for each field.