You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/01/24 18:01:10 UTC

svn commit: r1438072 - in /lucene/dev/branches/lucene4547/lucene: core/src/java/org/apache/lucene/codecs/lucene40/ core/src/java/org/apache/lucene/codecs/lucene41/ test-framework/src/java/org/apache/lucene/codecs/lucene40/ test-framework/src/java/org/a...

Author: rmuir
Date: Thu Jan 24 17:01:10 2013
New Revision: 1438072

URL: http://svn.apache.org/viewvc?rev=1438072&view=rev
Log:
4.0 sortedbytes

Removed:
    lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40LyingDocValuesFormat.java
    lucene/dev/branches/lucene4547/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40LyingRWDocValuesFormat.java
Modified:
    lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40Codec.java
    lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesFormat.java
    lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesReader.java
    lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41Codec.java
    lucene/dev/branches/lucene4547/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesWriter.java
    lucene/dev/branches/lucene4547/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40RWCodec.java
    lucene/dev/branches/lucene4547/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene41/Lucene41RWCodec.java

Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40Codec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40Codec.java?rev=1438072&r1=1438071&r2=1438072&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40Codec.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40Codec.java Thu Jan 24 17:01:10 2013
@@ -87,7 +87,7 @@ public class Lucene40Codec extends Codec
     return infosFormat;
   }
   
-  private final DocValuesFormat defaultDVFormat = new Lucene40LyingDocValuesFormat();
+  private final DocValuesFormat defaultDVFormat = new Lucene40DocValuesFormat();
 
   @Override
   public DocValuesFormat docValuesFormat() {

Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesFormat.java?rev=1438072&r1=1438071&r2=1438072&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesFormat.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesFormat.java Thu Jan 24 17:01:10 2013
@@ -51,7 +51,6 @@ public class Lucene40DocValuesFormat ext
   static final String VAR_INTS_CODEC_NAME = "PackedInts";
   static final int VAR_INTS_VERSION_START = 0;
   static final int VAR_INTS_VERSION_CURRENT = VAR_INTS_VERSION_START;
-  
   static final byte VAR_INTS_PACKED = 0x00;
   static final byte VAR_INTS_FIXED_64 = 0x01;
   
@@ -75,4 +74,17 @@ public class Lucene40DocValuesFormat ext
   static final String BYTES_VAR_STRAIGHT_CODEC_NAME_DAT = "VarStraightBytesDat";
   static final int BYTES_VAR_STRAIGHT_VERSION_START = 0;
   static final int BYTES_VAR_STRAIGHT_VERSION_CURRENT = BYTES_VAR_STRAIGHT_VERSION_START;
+  
+  // constants for BYTES_FIXED_SORTED
+  static final String BYTES_FIXED_SORTED_CODEC_NAME_IDX = "FixedSortedBytesIdx";
+  static final String BYTES_FIXED_SORTED_CODEC_NAME_DAT = "FixedSortedBytesDat";
+  static final int BYTES_FIXED_SORTED_VERSION_START = 0;
+  static final int BYTES_FIXED_SORTED_VERSION_CURRENT = BYTES_FIXED_SORTED_VERSION_START;
+  
+  // constants for BYTES_VAR_SORTED
+  // NOTE THIS IS NOT A BUG! 4.0 actually screwed this up (VAR_SORTED and VAR_DEREF have same codec header)
+  static final String BYTES_VAR_SORTED_CODEC_NAME_IDX = "VarDerefBytesIdx";
+  static final String BYTES_VAR_SORTED_CODEC_NAME_DAT = "VarDerefBytesDat";
+  static final int BYTES_VAR_SORTED_VERSION_START = 0;
+  static final int BYTES_VAR_SORTED_VERSION_CURRENT = BYTES_VAR_SORTED_VERSION_START;
 }

Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesReader.java?rev=1438072&r1=1438071&r2=1438072&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesReader.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesReader.java Thu Jan 24 17:01:10 2013
@@ -276,7 +276,7 @@ class Lucene40DocValuesReader extends Do
           instance = loadBytesVarStraight(field);
           break;
         default:
-          throw new AssertionError();
+          throw new AssertionError(); // nocommit
       }
       binaryInstances.put(field.number, instance);
     }
@@ -355,7 +355,113 @@ class Lucene40DocValuesReader extends Do
 
   @Override
   public synchronized SortedDocValues getSorted(FieldInfo field) throws IOException {
-    throw new AssertionError();
+    SortedDocValues instance = sortedInstances.get(field.number);
+    if (instance == null) {
+      String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, Integer.toString(field.number), "dat");
+      String indexName = IndexFileNames.segmentFileName(state.segmentInfo.name, Integer.toString(field.number), "idx");
+      IndexInput data = null;
+      IndexInput index = null;
+      boolean success = false;
+      try {
+        data = dir.openInput(dataName, state.context);
+        index = dir.openInput(indexName, state.context);
+        switch(LegacyDocValuesType.valueOf(field.getAttribute(legacyKey))) {
+          case BYTES_FIXED_SORTED:
+            instance = loadBytesFixedSorted(field, data, index);
+            break;
+          case BYTES_VAR_SORTED:
+            instance = loadBytesVarSorted(field, data, index);
+            break;
+          default:
+            throw new AssertionError();
+        }
+        success = true;
+      } finally {
+        if (success) {
+          IOUtils.close(data, index);
+        } else {
+          IOUtils.closeWhileHandlingException(data, index);
+        }
+      }
+      sortedInstances.put(field.number, instance);
+    }
+    return instance;
+  }
+  
+  private SortedDocValues loadBytesFixedSorted(FieldInfo field, IndexInput data, IndexInput index) throws IOException {
+    CodecUtil.checkHeader(data, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_CODEC_NAME_DAT, 
+                                Lucene40DocValuesFormat.BYTES_FIXED_SORTED_VERSION_START, 
+                                Lucene40DocValuesFormat.BYTES_FIXED_SORTED_VERSION_CURRENT);
+    CodecUtil.checkHeader(index, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_CODEC_NAME_IDX, 
+                                 Lucene40DocValuesFormat.BYTES_FIXED_SORTED_VERSION_START, 
+                                 Lucene40DocValuesFormat.BYTES_FIXED_SORTED_VERSION_CURRENT);
+    
+    final int fixedLength = data.readInt();
+    final int valueCount = index.readInt();
+    
+    // nocommit? can the current impl even handle > 2G?
+    final byte[] bytes = new byte[fixedLength*valueCount];
+    data.readBytes(bytes, 0, bytes.length);
+    final PackedInts.Reader reader = PackedInts.getReader(index);
+    
+    return new SortedDocValues() {
+      @Override
+      public int getOrd(int docID) {
+        return (int) reader.get(docID);
+      }
+
+      @Override
+      public void lookupOrd(int ord, BytesRef result) {
+        result.bytes = bytes;
+        result.offset = ord * fixedLength;
+        result.length = fixedLength;
+      }
+
+      @Override
+      public int getValueCount() {
+        return valueCount;
+      }
+    };
+  }
+  
+  private SortedDocValues loadBytesVarSorted(FieldInfo field, IndexInput data, IndexInput index) throws IOException {
+    CodecUtil.checkHeader(data, Lucene40DocValuesFormat.BYTES_VAR_SORTED_CODEC_NAME_DAT, 
+                                Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_START, 
+                                Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_CURRENT);
+    CodecUtil.checkHeader(index, Lucene40DocValuesFormat.BYTES_VAR_SORTED_CODEC_NAME_IDX, 
+                                 Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_START, 
+                                 Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_CURRENT);
+  
+    long maxAddress = index.readLong();
+    // nocommit? can the current impl even handle > 2G?
+    final byte[] bytes = new byte[(int)maxAddress];
+    data.readBytes(bytes, 0, bytes.length);
+    
+    final PackedInts.Reader addressReader = PackedInts.getReader(index);
+    final PackedInts.Reader ordsReader = PackedInts.getReader(index);
+    
+    final int valueCount = addressReader.size() - 1;
+    
+    return new SortedDocValues() {
+      @Override
+      public int getOrd(int docID) {
+        return (int)ordsReader.get(docID);
+      }
+
+      @Override
+      public void lookupOrd(int ord, BytesRef result) {
+        long startAddress = addressReader.get(ord);
+        long endAddress = addressReader.get(ord+1);
+        result.bytes = bytes;
+        result.offset = (int)startAddress;
+        result.length = (int)(endAddress - startAddress);
+      }
+
+      @Override
+      public int getValueCount() {
+        return valueCount;
+      }
+    };
   }
   
   @Override

Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41Codec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41Codec.java?rev=1438072&r1=1438071&r2=1438072&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41Codec.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41Codec.java Thu Jan 24 17:01:10 2013
@@ -32,7 +32,7 @@ import org.apache.lucene.codecs.StoredFi
 import org.apache.lucene.codecs.TermVectorsFormat;
 import org.apache.lucene.codecs.compressing.CompressingStoredFieldsFormat;
 import org.apache.lucene.codecs.compressing.CompressionMode;
-import org.apache.lucene.codecs.lucene40.Lucene40LyingDocValuesFormat;
+import org.apache.lucene.codecs.lucene40.Lucene40DocValuesFormat;
 import org.apache.lucene.codecs.lucene40.Lucene40FieldInfosFormat;
 import org.apache.lucene.codecs.lucene40.Lucene40LiveDocsFormat;
 import org.apache.lucene.codecs.lucene40.Lucene40NormsFormat;
@@ -125,7 +125,7 @@ public class Lucene41Codec extends Codec
   }
 
   private final PostingsFormat defaultFormat = PostingsFormat.forName("Lucene41");
-  private final DocValuesFormat dvFormat = new Lucene40LyingDocValuesFormat();
+  private final DocValuesFormat dvFormat = new Lucene40DocValuesFormat();
   private final NormsFormat normsFormat = new Lucene40NormsFormat();
 
   @Override

Modified: lucene/dev/branches/lucene4547/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesWriter.java?rev=1438072&r1=1438071&r2=1438072&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesWriter.java (original)
+++ lucene/dev/branches/lucene4547/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesWriter.java Thu Jan 24 17:01:10 2013
@@ -219,12 +219,16 @@ class Lucene40DocValuesWriter extends Do
                           Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_CODEC_NAME_IDX,
                           Lucene40DocValuesFormat.BYTES_VAR_STRAIGHT_VERSION_CURRENT);
     
+    /* values */
+    
     final long startPos = data.getFilePointer();
     
     for (BytesRef v : values) {
       data.writeBytes(v.bytes, v.offset, v.length);
     }
     
+    /* addresses */
+    
     final long maxAddress = data.getFilePointer() - startPos;
     index.writeVLong(maxAddress);
     
@@ -245,8 +249,121 @@ class Lucene40DocValuesWriter extends Do
 
   @Override
   public void addSortedField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrd) throws IOException {
-    assert false;
-  }  
+    // examine the values to determine best type to use
+    int minLength = Integer.MAX_VALUE;
+    int maxLength = Integer.MIN_VALUE;
+    for (BytesRef b : values) {
+      minLength = Math.min(minLength, b.length);
+      maxLength = Math.max(maxLength, b.length);
+    }
+    
+    boolean success = false;
+    IndexOutput data = null;
+    IndexOutput index = null;
+    String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, Integer.toString(field.number), "dat");
+    String indexName = IndexFileNames.segmentFileName(state.segmentInfo.name, Integer.toString(field.number), "idx");
+    
+    try {
+      data = dir.createOutput(dataName, state.context);
+      index = dir.createOutput(indexName, state.context);
+      if (minLength == maxLength) {
+        // fixed byte[]
+        addFixedSortedBytesField(field, data, index, values, docToOrd, minLength);
+      } else {
+        // var byte[]
+        addVarSortedBytesField(field, data, index, values, docToOrd);
+      }
+      success = true;
+    } finally {
+      if (success) {
+        IOUtils.close(data, index);
+      } else {
+        IOUtils.closeWhileHandlingException(data, index);
+      }
+    }
+  }
+  
+  private void addFixedSortedBytesField(FieldInfo field, IndexOutput data, IndexOutput index, Iterable<BytesRef> values, Iterable<Number> docToOrd, int length) throws IOException {
+    field.putAttribute(legacyKey, LegacyDocValuesType.BYTES_FIXED_SORTED.name());
+
+    CodecUtil.writeHeader(data, 
+                          Lucene40DocValuesFormat.BYTES_FIXED_SORTED_CODEC_NAME_DAT,
+                          Lucene40DocValuesFormat.BYTES_FIXED_SORTED_VERSION_CURRENT);
+    
+    CodecUtil.writeHeader(index, 
+                          Lucene40DocValuesFormat.BYTES_FIXED_SORTED_CODEC_NAME_IDX,
+                          Lucene40DocValuesFormat.BYTES_FIXED_SORTED_VERSION_CURRENT);
+    
+    /* values */
+    
+    data.writeInt(length);
+    int valueCount = 0;
+    for (BytesRef v : values) {
+      data.writeBytes(v.bytes, v.offset, v.length);
+      valueCount++;
+    }
+    
+    /* ordinals */
+    
+    index.writeInt(valueCount);
+    int maxDoc = state.segmentInfo.getDocCount();
+    assert valueCount > 0;
+    final PackedInts.Writer w = PackedInts.getWriter(index, maxDoc, PackedInts.bitsRequired(valueCount-1), PackedInts.DEFAULT);
+    for (Number n : docToOrd) {
+      w.add(n.longValue());
+    }
+    w.finish();
+  }
+  
+  private void addVarSortedBytesField(FieldInfo field, IndexOutput data, IndexOutput index, Iterable<BytesRef> values, Iterable<Number> docToOrd) throws IOException {
+    field.putAttribute(legacyKey, LegacyDocValuesType.BYTES_VAR_SORTED.name());
+    
+    CodecUtil.writeHeader(data, 
+                          Lucene40DocValuesFormat.BYTES_VAR_SORTED_CODEC_NAME_DAT,
+                          Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_CURRENT);
+
+    CodecUtil.writeHeader(index, 
+                          Lucene40DocValuesFormat.BYTES_VAR_SORTED_CODEC_NAME_IDX,
+                          Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_CURRENT);
+
+    /* values */
+    
+    final long startPos = data.getFilePointer();
+    
+    int valueCount = 0;
+    for (BytesRef v : values) {
+      data.writeBytes(v.bytes, v.offset, v.length);
+      valueCount++;
+    }
+    
+    /* addresses */
+    
+    final long maxAddress = data.getFilePointer() - startPos;
+    index.writeLong(maxAddress);
+    
+    assert valueCount != Integer.MAX_VALUE; // unsupported by the 4.0 impl
+    
+    final PackedInts.Writer w = PackedInts.getWriter(index, valueCount+1, PackedInts.bitsRequired(maxAddress), PackedInts.DEFAULT);
+    long currentPosition = 0;
+    for (BytesRef v : values) {
+      w.add(currentPosition);
+      currentPosition += v.length;
+    }
+    // write sentinel
+    assert currentPosition == maxAddress;
+    w.add(currentPosition);
+    w.finish();
+    
+    /* ordinals */
+    
+    final int maxDoc = state.segmentInfo.getDocCount();
+    assert valueCount > 0;
+    final PackedInts.Writer ords = PackedInts.getWriter(index, maxDoc, PackedInts.bitsRequired(valueCount-1), PackedInts.DEFAULT);
+    for (Number n : docToOrd) {
+      ords.add(n.longValue());
+    }
+    ords.finish();
+  }
   
   @Override
   public void close() throws IOException {

Modified: lucene/dev/branches/lucene4547/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40RWCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40RWCodec.java?rev=1438072&r1=1438071&r2=1438072&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40RWCodec.java (original)
+++ lucene/dev/branches/lucene4547/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40/Lucene40RWCodec.java Thu Jan 24 17:01:10 2013
@@ -33,8 +33,7 @@ public final class Lucene40RWCodec exten
     }
   };
   
-  //private final DocValuesFormat docValues = new Lucene40RWDocValuesFormat();
-  private final DocValuesFormat docValues = new Lucene40LyingRWDocValuesFormat();
+  private final DocValuesFormat docValues = new Lucene40RWDocValuesFormat();
   private final NormsFormat norms = new Lucene40RWNormsFormat();
   
   @Override

Modified: lucene/dev/branches/lucene4547/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene41/Lucene41RWCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene41/Lucene41RWCodec.java?rev=1438072&r1=1438071&r2=1438072&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene41/Lucene41RWCodec.java (original)
+++ lucene/dev/branches/lucene4547/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene41/Lucene41RWCodec.java Thu Jan 24 17:01:10 2013
@@ -9,7 +9,7 @@ import org.apache.lucene.codecs.NormsFor
 import org.apache.lucene.codecs.StoredFieldsFormat;
 import org.apache.lucene.codecs.lucene40.Lucene40FieldInfosFormat;
 import org.apache.lucene.codecs.lucene40.Lucene40FieldInfosWriter;
-import org.apache.lucene.codecs.lucene40.Lucene40LyingRWDocValuesFormat;
+import org.apache.lucene.codecs.lucene40.Lucene40RWDocValuesFormat;
 import org.apache.lucene.codecs.lucene40.Lucene40RWNormsFormat;
 
 /*
@@ -41,7 +41,7 @@ public class Lucene41RWCodec extends Luc
     }
   };
   
-  private final DocValuesFormat docValues = new Lucene40LyingRWDocValuesFormat();
+  private final DocValuesFormat docValues = new Lucene40RWDocValuesFormat();
   private final NormsFormat norms = new Lucene40RWNormsFormat();
   
   @Override