You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/01/15 01:51:19 UTC

svn commit: r1433250 - in /lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene: codecs/lucene41/ util/fst/

Author: rmuir
Date: Tue Jan 15 00:51:19 2013
New Revision: 1433250

URL: http://svn.apache.org/viewvc?rev=1433250&view=rev
Log:
add in-ram sortedbytes

Modified:
    lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41Codec.java
    lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41SimpleDocValuesConsumer.java
    lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41SimpleDocValuesProducer.java
    lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/fst/Util.java

Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41Codec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41Codec.java?rev=1433250&r1=1433249&r2=1433250&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41Codec.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41Codec.java Tue Jan 15 00:51:19 2013
@@ -144,7 +144,7 @@ public class Lucene41Codec extends Codec
 
   private final PostingsFormat defaultFormat = PostingsFormat.forName("Lucene41");
   // nocommit
-  private final SimpleDocValuesFormat defaultDVFormat = SimpleDocValuesFormat.forName("Disk");
+  private final SimpleDocValuesFormat defaultDVFormat = SimpleDocValuesFormat.forName("Lucene41");
 
   private final SimpleNormsFormat simpleNormsFormat = new Lucene41SimpleNormsFormat();
 

Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41SimpleDocValuesConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41SimpleDocValuesConsumer.java?rev=1433250&r1=1433249&r2=1433250&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41SimpleDocValuesConsumer.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41SimpleDocValuesConsumer.java Tue Jan 15 00:51:19 2013
@@ -30,6 +30,12 @@ import org.apache.lucene.index.SegmentWr
 import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.IntsRef;
+import org.apache.lucene.util.fst.Builder;
+import org.apache.lucene.util.fst.FST;
+import org.apache.lucene.util.fst.FST.INPUT_TYPE;
+import org.apache.lucene.util.fst.PositiveIntOutputs;
+import org.apache.lucene.util.fst.Util;
 import org.apache.lucene.util.packed.PackedInts;
 import org.apache.lucene.util.packed.PackedInts.FormatAndBits;
 
@@ -50,7 +56,6 @@ class Lucene41SimpleDocValuesConsumer ex
   static final byte FST = 2;
   
   final IndexOutput data, meta;
-  final int maxDoc;
   
   Lucene41SimpleDocValuesConsumer(SegmentWriteState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
     boolean success = false;
@@ -61,7 +66,6 @@ class Lucene41SimpleDocValuesConsumer ex
       String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
       meta = state.directory.createOutput(metaName, state.context);
       CodecUtil.writeHeader(meta, metaCodec, VERSION_CURRENT);
-      maxDoc = state.segmentInfo.getDocCount();
       success = true;
     } finally {
       if (!success) {
@@ -217,7 +221,24 @@ class Lucene41SimpleDocValuesConsumer ex
 
   @Override
   public void addSortedField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrd) throws IOException {
-    throw new AssertionError();
+    // write the ordinals as numerics
+    addNumericField(field, docToOrd);
+    
+    // write the values as FST
+    meta.writeVInt(field.number);
+    meta.writeByte(FST);
+    meta.writeLong(data.getFilePointer());
+    PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
+    Builder<Long> builder = new Builder<Long>(INPUT_TYPE.BYTE1, outputs);
+    IntsRef scratch = new IntsRef();
+    long ord = 0;
+    for (BytesRef v : values) {
+      builder.add(Util.toIntsRef(v, scratch), ord);
+      ord++;
+    }
+    FST<Long> fst = builder.finish();
+    fst.save(data);
+    meta.writeVInt((int)ord);
   }
   
   // nocommit: can/should we make override merge + make it smarter to pull the values 

Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41SimpleDocValuesProducer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41SimpleDocValuesProducer.java?rev=1433250&r1=1433249&r2=1433250&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41SimpleDocValuesProducer.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41SimpleDocValuesProducer.java Tue Jan 15 00:51:19 2013
@@ -24,6 +24,7 @@ import java.util.Map;
 import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.codecs.SimpleDVProducer;
 import org.apache.lucene.index.BinaryDocValues;
+import org.apache.lucene.index.CorruptIndexException;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.FieldInfos;
 import org.apache.lucene.index.IndexFileNames;
@@ -33,6 +34,12 @@ import org.apache.lucene.index.SortedDoc
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.IntsRef;
+import org.apache.lucene.util.fst.FST;
+import org.apache.lucene.util.fst.FST.Arc;
+import org.apache.lucene.util.fst.FST.BytesReader;
+import org.apache.lucene.util.fst.PositiveIntOutputs;
+import org.apache.lucene.util.fst.Util;
 import org.apache.lucene.util.packed.PackedInts;
 
 class Lucene41SimpleDocValuesProducer extends SimpleDVProducer {
@@ -50,6 +57,9 @@ class Lucene41SimpleDocValuesProducer ex
   // else in this map.
   private final Map<Integer,BinaryDocValues> binaryInstances =
       new HashMap<Integer,BinaryDocValues>();
+  
+  private final Map<Integer,FST<Long>> fstInstances =
+      new HashMap<Integer,FST<Long>>();
     
   Lucene41SimpleDocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
     String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
@@ -96,6 +106,13 @@ class Lucene41SimpleDocValuesProducer ex
         entry.minLength = meta.readVInt();
         entry.maxLength = meta.readVInt();
         binaries.put(fieldNumber, entry);
+      } else if (fieldType == Lucene41SimpleDocValuesConsumer.FST) {
+        FSTEntry entry = new FSTEntry();
+        entry.offset = meta.readLong();
+        entry.numOrds = meta.readVInt();
+        fsts.put(fieldNumber, entry);
+      } else {
+        throw new CorruptIndexException("invalid entry type: " + fieldType + ", input=" + meta);
       }
       fieldNumber = meta.readVInt();
     }
@@ -113,7 +130,6 @@ class Lucene41SimpleDocValuesProducer ex
   
   private NumericDocValues loadNumeric(FieldInfo field) throws IOException {
     NumericEntry entry = numerics.get(field.number);
-    final IndexInput data = this.data.clone();
     data.seek(entry.offset);
     if (entry.tableized) {
       int size = data.readVInt();
@@ -154,7 +170,6 @@ class Lucene41SimpleDocValuesProducer ex
   
   private BinaryDocValues loadBinary(FieldInfo field) throws IOException {
     BinaryEntry entry = binaries.get(field.number);
-    final IndexInput data = this.data.clone();
     data.seek(entry.offset);
     assert entry.numBytes < Integer.MAX_VALUE; // nocommit
     final byte[] bytes = new byte[(int)entry.numBytes];
@@ -184,13 +199,51 @@ class Lucene41SimpleDocValuesProducer ex
     }
   }
   
-  
-
   @Override
   public SortedDocValues getSorted(FieldInfo field) throws IOException {
-    throw new AssertionError();
-  }
+    final FSTEntry entry = fsts.get(field.number);
+    FST<Long> instance;
+    synchronized(this) {
+      instance = fstInstances.get(field.number);
+      if (instance == null) {
+        data.seek(entry.offset);
+        instance = new FST<Long>(data, PositiveIntOutputs.getSingleton(true));
+        fstInstances.put(field.number, instance);
+      }
+    }
+    final NumericDocValues docToOrd = getNumeric(field);
+    final FST<Long> fst = instance;
+    
+    // per-thread resources
+    final BytesReader in = fst.getBytesReader(0);
+    final Arc<Long> firstArc = new Arc<Long>();
+    final Arc<Long> scratchArc = new Arc<Long>();
+    final IntsRef scratchInts = new IntsRef();
+    
+    return new SortedDocValues() {
+      @Override
+      public int getOrd(int docID) {
+        return (int) docToOrd.get(docID);
+      }
+
+      @Override
+      public void lookupOrd(int ord, BytesRef result) {
+        try {
+          in.setPosition(0);
+          fst.getFirstArc(firstArc);
+          Util.toBytesRef(Util.getByOutput(fst, ord, in, firstArc, scratchArc, scratchInts), result);
+        } catch (IOException bogus) {
+          throw new RuntimeException(bogus);
+        }
+      }
 
+      @Override
+      public int getValueCount() {
+        return entry.numOrds;
+      }
+    };
+  }
+  
   @Override
   public void close() throws IOException {
     data.close();
@@ -212,5 +265,4 @@ class Lucene41SimpleDocValuesProducer ex
     long offset;
     int numOrds;
   }
-
 }

Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/fst/Util.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/fst/Util.java?rev=1433250&r1=1433249&r2=1433250&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/fst/Util.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/fst/Util.java Tue Jan 15 00:51:19 2013
@@ -109,7 +109,13 @@ public final class Util {
     FST.Arc<Long> scratchArc = new FST.Arc<Long>();
 
     final IntsRef result = new IntsRef();
-
+    
+    return getByOutput(fst, targetOutput, in, arc, scratchArc, result);
+  }
+    
+  /** Expert: like {@link Util#getByOutput(FST, long)} except reusing */
+  // nocommit
+  public static IntsRef getByOutput(FST<Long> fst, long targetOutput, BytesReader in, Arc<Long> arc, Arc<Long> scratchArc, IntsRef result) throws IOException {
     long output = arc.output;
     int upto = 0;